xref: /dragonfly/sys/dev/drm/radeon/cik.c (revision a1626531)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include "drmP.h"
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "radeon_audio.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 #include "radeon_kfd.h"
36 
37 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
48 MODULE_FIRMWARE("radeon/bonaire_me.bin");
49 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
54 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
55 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
76 
77 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
83 
84 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
85 MODULE_FIRMWARE("radeon/kaveri_me.bin");
86 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
88 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
89 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
90 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
91 
92 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
93 MODULE_FIRMWARE("radeon/KABINI_me.bin");
94 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
95 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
96 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
97 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
98 
99 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
100 MODULE_FIRMWARE("radeon/kabini_me.bin");
101 MODULE_FIRMWARE("radeon/kabini_ce.bin");
102 MODULE_FIRMWARE("radeon/kabini_mec.bin");
103 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
104 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
105 
106 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
112 
113 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
114 MODULE_FIRMWARE("radeon/mullins_me.bin");
115 MODULE_FIRMWARE("radeon/mullins_ce.bin");
116 MODULE_FIRMWARE("radeon/mullins_mec.bin");
117 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
118 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
119 
120 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
121 static void cik_rlc_stop(struct radeon_device *rdev);
122 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
123 static void cik_program_aspm(struct radeon_device *rdev);
124 static void cik_init_pg(struct radeon_device *rdev);
125 static void cik_init_cg(struct radeon_device *rdev);
126 static void cik_fini_pg(struct radeon_device *rdev);
127 static void cik_fini_cg(struct radeon_device *rdev);
128 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
129 					  bool enable);
130 
131 /**
132  * cik_get_allowed_info_register - fetch the register for the info ioctl
133  *
134  * @rdev: radeon_device pointer
135  * @reg: register offset in bytes
136  * @val: register value
137  *
138  * Returns 0 for success or -EINVAL for an invalid register
139  *
140  */
141 int cik_get_allowed_info_register(struct radeon_device *rdev,
142 				  u32 reg, u32 *val)
143 {
144 	switch (reg) {
145 	case GRBM_STATUS:
146 	case GRBM_STATUS2:
147 	case GRBM_STATUS_SE0:
148 	case GRBM_STATUS_SE1:
149 	case GRBM_STATUS_SE2:
150 	case GRBM_STATUS_SE3:
151 	case SRBM_STATUS:
152 	case SRBM_STATUS2:
153 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
154 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
155 	case UVD_STATUS:
156 	/* TODO VCE */
157 		*val = RREG32(reg);
158 		return 0;
159 	default:
160 		return -EINVAL;
161 	}
162 }
163 
164 /*
165  * Indirect registers accessor
166  */
167 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
168 {
169 	u32 r;
170 
171 	lockmgr(&rdev->didt_idx_lock, LK_EXCLUSIVE);
172 	WREG32(CIK_DIDT_IND_INDEX, (reg));
173 	r = RREG32(CIK_DIDT_IND_DATA);
174 	lockmgr(&rdev->didt_idx_lock, LK_RELEASE);
175 	return r;
176 }
177 
178 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
179 {
180 	lockmgr(&rdev->didt_idx_lock, LK_EXCLUSIVE);
181 	WREG32(CIK_DIDT_IND_INDEX, (reg));
182 	WREG32(CIK_DIDT_IND_DATA, (v));
183 	lockmgr(&rdev->didt_idx_lock, LK_RELEASE);
184 }
185 
186 /* get temperature in millidegrees */
187 int ci_get_temp(struct radeon_device *rdev)
188 {
189 	u32 temp;
190 	int actual_temp = 0;
191 
192 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
193 		CTF_TEMP_SHIFT;
194 
195 	if (temp & 0x200)
196 		actual_temp = 255;
197 	else
198 		actual_temp = temp & 0x1ff;
199 
200 	actual_temp = actual_temp * 1000;
201 
202 	return actual_temp;
203 }
204 
205 /* get temperature in millidegrees */
206 int kv_get_temp(struct radeon_device *rdev)
207 {
208 	u32 temp;
209 	int actual_temp = 0;
210 
211 	temp = RREG32_SMC(0xC0300E0C);
212 
213 	if (temp)
214 		actual_temp = (temp / 8) - 49;
215 	else
216 		actual_temp = 0;
217 
218 	actual_temp = actual_temp * 1000;
219 
220 	return actual_temp;
221 }
222 
223 /*
224  * Indirect registers accessor
225  */
226 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
227 {
228 	u32 r;
229 
230 	lockmgr(&rdev->pciep_idx_lock, LK_EXCLUSIVE);
231 	WREG32(PCIE_INDEX, reg);
232 	(void)RREG32(PCIE_INDEX);
233 	r = RREG32(PCIE_DATA);
234 	lockmgr(&rdev->pciep_idx_lock, LK_RELEASE);
235 	return r;
236 }
237 
238 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
239 {
240 	lockmgr(&rdev->pciep_idx_lock, LK_EXCLUSIVE);
241 	WREG32(PCIE_INDEX, reg);
242 	(void)RREG32(PCIE_INDEX);
243 	WREG32(PCIE_DATA, v);
244 	(void)RREG32(PCIE_DATA);
245 	lockmgr(&rdev->pciep_idx_lock, LK_RELEASE);
246 }
247 
248 static const u32 spectre_rlc_save_restore_register_list[] =
249 {
250 	(0x0e00 << 16) | (0xc12c >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc140 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc150 >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc15c >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc168 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc170 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc178 >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc204 >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc2b4 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc2b8 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc2bc >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc2c0 >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0x8228 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0x829c >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0x869c >> 2),
279 	0x00000000,
280 	(0x0600 << 16) | (0x98f4 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0x98f8 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0x9900 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc260 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0x90e8 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0x3c000 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0x3c00c >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0x8c1c >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0x9700 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xcd20 >> 2),
299 	0x00000000,
300 	(0x4e00 << 16) | (0xcd20 >> 2),
301 	0x00000000,
302 	(0x5e00 << 16) | (0xcd20 >> 2),
303 	0x00000000,
304 	(0x6e00 << 16) | (0xcd20 >> 2),
305 	0x00000000,
306 	(0x7e00 << 16) | (0xcd20 >> 2),
307 	0x00000000,
308 	(0x8e00 << 16) | (0xcd20 >> 2),
309 	0x00000000,
310 	(0x9e00 << 16) | (0xcd20 >> 2),
311 	0x00000000,
312 	(0xae00 << 16) | (0xcd20 >> 2),
313 	0x00000000,
314 	(0xbe00 << 16) | (0xcd20 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0x89bc >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0x8900 >> 2),
319 	0x00000000,
320 	0x3,
321 	(0x0e00 << 16) | (0xc130 >> 2),
322 	0x00000000,
323 	(0x0e00 << 16) | (0xc134 >> 2),
324 	0x00000000,
325 	(0x0e00 << 16) | (0xc1fc >> 2),
326 	0x00000000,
327 	(0x0e00 << 16) | (0xc208 >> 2),
328 	0x00000000,
329 	(0x0e00 << 16) | (0xc264 >> 2),
330 	0x00000000,
331 	(0x0e00 << 16) | (0xc268 >> 2),
332 	0x00000000,
333 	(0x0e00 << 16) | (0xc26c >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0xc270 >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0xc274 >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0xc278 >> 2),
340 	0x00000000,
341 	(0x0e00 << 16) | (0xc27c >> 2),
342 	0x00000000,
343 	(0x0e00 << 16) | (0xc280 >> 2),
344 	0x00000000,
345 	(0x0e00 << 16) | (0xc284 >> 2),
346 	0x00000000,
347 	(0x0e00 << 16) | (0xc288 >> 2),
348 	0x00000000,
349 	(0x0e00 << 16) | (0xc28c >> 2),
350 	0x00000000,
351 	(0x0e00 << 16) | (0xc290 >> 2),
352 	0x00000000,
353 	(0x0e00 << 16) | (0xc294 >> 2),
354 	0x00000000,
355 	(0x0e00 << 16) | (0xc298 >> 2),
356 	0x00000000,
357 	(0x0e00 << 16) | (0xc29c >> 2),
358 	0x00000000,
359 	(0x0e00 << 16) | (0xc2a0 >> 2),
360 	0x00000000,
361 	(0x0e00 << 16) | (0xc2a4 >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0xc2a8 >> 2),
364 	0x00000000,
365 	(0x0e00 << 16) | (0xc2ac  >> 2),
366 	0x00000000,
367 	(0x0e00 << 16) | (0xc2b0 >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0x301d0 >> 2),
370 	0x00000000,
371 	(0x0e00 << 16) | (0x30238 >> 2),
372 	0x00000000,
373 	(0x0e00 << 16) | (0x30250 >> 2),
374 	0x00000000,
375 	(0x0e00 << 16) | (0x30254 >> 2),
376 	0x00000000,
377 	(0x0e00 << 16) | (0x30258 >> 2),
378 	0x00000000,
379 	(0x0e00 << 16) | (0x3025c >> 2),
380 	0x00000000,
381 	(0x4e00 << 16) | (0xc900 >> 2),
382 	0x00000000,
383 	(0x5e00 << 16) | (0xc900 >> 2),
384 	0x00000000,
385 	(0x6e00 << 16) | (0xc900 >> 2),
386 	0x00000000,
387 	(0x7e00 << 16) | (0xc900 >> 2),
388 	0x00000000,
389 	(0x8e00 << 16) | (0xc900 >> 2),
390 	0x00000000,
391 	(0x9e00 << 16) | (0xc900 >> 2),
392 	0x00000000,
393 	(0xae00 << 16) | (0xc900 >> 2),
394 	0x00000000,
395 	(0xbe00 << 16) | (0xc900 >> 2),
396 	0x00000000,
397 	(0x4e00 << 16) | (0xc904 >> 2),
398 	0x00000000,
399 	(0x5e00 << 16) | (0xc904 >> 2),
400 	0x00000000,
401 	(0x6e00 << 16) | (0xc904 >> 2),
402 	0x00000000,
403 	(0x7e00 << 16) | (0xc904 >> 2),
404 	0x00000000,
405 	(0x8e00 << 16) | (0xc904 >> 2),
406 	0x00000000,
407 	(0x9e00 << 16) | (0xc904 >> 2),
408 	0x00000000,
409 	(0xae00 << 16) | (0xc904 >> 2),
410 	0x00000000,
411 	(0xbe00 << 16) | (0xc904 >> 2),
412 	0x00000000,
413 	(0x4e00 << 16) | (0xc908 >> 2),
414 	0x00000000,
415 	(0x5e00 << 16) | (0xc908 >> 2),
416 	0x00000000,
417 	(0x6e00 << 16) | (0xc908 >> 2),
418 	0x00000000,
419 	(0x7e00 << 16) | (0xc908 >> 2),
420 	0x00000000,
421 	(0x8e00 << 16) | (0xc908 >> 2),
422 	0x00000000,
423 	(0x9e00 << 16) | (0xc908 >> 2),
424 	0x00000000,
425 	(0xae00 << 16) | (0xc908 >> 2),
426 	0x00000000,
427 	(0xbe00 << 16) | (0xc908 >> 2),
428 	0x00000000,
429 	(0x4e00 << 16) | (0xc90c >> 2),
430 	0x00000000,
431 	(0x5e00 << 16) | (0xc90c >> 2),
432 	0x00000000,
433 	(0x6e00 << 16) | (0xc90c >> 2),
434 	0x00000000,
435 	(0x7e00 << 16) | (0xc90c >> 2),
436 	0x00000000,
437 	(0x8e00 << 16) | (0xc90c >> 2),
438 	0x00000000,
439 	(0x9e00 << 16) | (0xc90c >> 2),
440 	0x00000000,
441 	(0xae00 << 16) | (0xc90c >> 2),
442 	0x00000000,
443 	(0xbe00 << 16) | (0xc90c >> 2),
444 	0x00000000,
445 	(0x4e00 << 16) | (0xc910 >> 2),
446 	0x00000000,
447 	(0x5e00 << 16) | (0xc910 >> 2),
448 	0x00000000,
449 	(0x6e00 << 16) | (0xc910 >> 2),
450 	0x00000000,
451 	(0x7e00 << 16) | (0xc910 >> 2),
452 	0x00000000,
453 	(0x8e00 << 16) | (0xc910 >> 2),
454 	0x00000000,
455 	(0x9e00 << 16) | (0xc910 >> 2),
456 	0x00000000,
457 	(0xae00 << 16) | (0xc910 >> 2),
458 	0x00000000,
459 	(0xbe00 << 16) | (0xc910 >> 2),
460 	0x00000000,
461 	(0x0e00 << 16) | (0xc99c >> 2),
462 	0x00000000,
463 	(0x0e00 << 16) | (0x9834 >> 2),
464 	0x00000000,
465 	(0x0000 << 16) | (0x30f00 >> 2),
466 	0x00000000,
467 	(0x0001 << 16) | (0x30f00 >> 2),
468 	0x00000000,
469 	(0x0000 << 16) | (0x30f04 >> 2),
470 	0x00000000,
471 	(0x0001 << 16) | (0x30f04 >> 2),
472 	0x00000000,
473 	(0x0000 << 16) | (0x30f08 >> 2),
474 	0x00000000,
475 	(0x0001 << 16) | (0x30f08 >> 2),
476 	0x00000000,
477 	(0x0000 << 16) | (0x30f0c >> 2),
478 	0x00000000,
479 	(0x0001 << 16) | (0x30f0c >> 2),
480 	0x00000000,
481 	(0x0600 << 16) | (0x9b7c >> 2),
482 	0x00000000,
483 	(0x0e00 << 16) | (0x8a14 >> 2),
484 	0x00000000,
485 	(0x0e00 << 16) | (0x8a18 >> 2),
486 	0x00000000,
487 	(0x0600 << 16) | (0x30a00 >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0x8bf0 >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0x8bcc >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0x8b24 >> 2),
494 	0x00000000,
495 	(0x0e00 << 16) | (0x30a04 >> 2),
496 	0x00000000,
497 	(0x0600 << 16) | (0x30a10 >> 2),
498 	0x00000000,
499 	(0x0600 << 16) | (0x30a14 >> 2),
500 	0x00000000,
501 	(0x0600 << 16) | (0x30a18 >> 2),
502 	0x00000000,
503 	(0x0600 << 16) | (0x30a2c >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0xc700 >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0xc704 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0xc708 >> 2),
510 	0x00000000,
511 	(0x0e00 << 16) | (0xc768 >> 2),
512 	0x00000000,
513 	(0x0400 << 16) | (0xc770 >> 2),
514 	0x00000000,
515 	(0x0400 << 16) | (0xc774 >> 2),
516 	0x00000000,
517 	(0x0400 << 16) | (0xc778 >> 2),
518 	0x00000000,
519 	(0x0400 << 16) | (0xc77c >> 2),
520 	0x00000000,
521 	(0x0400 << 16) | (0xc780 >> 2),
522 	0x00000000,
523 	(0x0400 << 16) | (0xc784 >> 2),
524 	0x00000000,
525 	(0x0400 << 16) | (0xc788 >> 2),
526 	0x00000000,
527 	(0x0400 << 16) | (0xc78c >> 2),
528 	0x00000000,
529 	(0x0400 << 16) | (0xc798 >> 2),
530 	0x00000000,
531 	(0x0400 << 16) | (0xc79c >> 2),
532 	0x00000000,
533 	(0x0400 << 16) | (0xc7a0 >> 2),
534 	0x00000000,
535 	(0x0400 << 16) | (0xc7a4 >> 2),
536 	0x00000000,
537 	(0x0400 << 16) | (0xc7a8 >> 2),
538 	0x00000000,
539 	(0x0400 << 16) | (0xc7ac >> 2),
540 	0x00000000,
541 	(0x0400 << 16) | (0xc7b0 >> 2),
542 	0x00000000,
543 	(0x0400 << 16) | (0xc7b4 >> 2),
544 	0x00000000,
545 	(0x0e00 << 16) | (0x9100 >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0x3c010 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0x92a8 >> 2),
550 	0x00000000,
551 	(0x0e00 << 16) | (0x92ac >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0x92b4 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x92b8 >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0x92bc >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x92c0 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x92c4 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x92c8 >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x92cc >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x92d0 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x8c00 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x8c04 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x8c20 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x8c38 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x8c3c >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0xae00 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x9604 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0xac08 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0xac0c >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0xac10 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0xac14 >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0xac58 >> 2),
592 	0x00000000,
593 	(0x0e00 << 16) | (0xac68 >> 2),
594 	0x00000000,
595 	(0x0e00 << 16) | (0xac6c >> 2),
596 	0x00000000,
597 	(0x0e00 << 16) | (0xac70 >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0xac74 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0xac78 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0xac7c >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xac80 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xac84 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xac88 >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0xac8c >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0x970c >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0x9714 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0x9718 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0x971c >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0x31068 >> 2),
622 	0x00000000,
623 	(0x4e00 << 16) | (0x31068 >> 2),
624 	0x00000000,
625 	(0x5e00 << 16) | (0x31068 >> 2),
626 	0x00000000,
627 	(0x6e00 << 16) | (0x31068 >> 2),
628 	0x00000000,
629 	(0x7e00 << 16) | (0x31068 >> 2),
630 	0x00000000,
631 	(0x8e00 << 16) | (0x31068 >> 2),
632 	0x00000000,
633 	(0x9e00 << 16) | (0x31068 >> 2),
634 	0x00000000,
635 	(0xae00 << 16) | (0x31068 >> 2),
636 	0x00000000,
637 	(0xbe00 << 16) | (0x31068 >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0xcd10 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0xcd14 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x88b0 >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0x88b4 >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0x88b8 >> 2),
648 	0x00000000,
649 	(0x0e00 << 16) | (0x88bc >> 2),
650 	0x00000000,
651 	(0x0400 << 16) | (0x89c0 >> 2),
652 	0x00000000,
653 	(0x0e00 << 16) | (0x88c4 >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0x88c8 >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0x88d0 >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0x88d4 >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0x88d8 >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0x8980 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0x30938 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0x3093c >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0x30940 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0x89a0 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0x30900 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0x30904 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0x89b4 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x3c210 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0x3c214 >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0x3c218 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0x8904 >> 2),
686 	0x00000000,
687 	0x5,
688 	(0x0e00 << 16) | (0x8c28 >> 2),
689 	(0x0e00 << 16) | (0x8c2c >> 2),
690 	(0x0e00 << 16) | (0x8c30 >> 2),
691 	(0x0e00 << 16) | (0x8c34 >> 2),
692 	(0x0e00 << 16) | (0x9600 >> 2),
693 };
694 
695 static const u32 kalindi_rlc_save_restore_register_list[] =
696 {
697 	(0x0e00 << 16) | (0xc12c >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0xc140 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0xc150 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0xc15c >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0xc168 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0xc170 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0xc204 >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0xc2b4 >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0xc2b8 >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0xc2bc >> 2),
716 	0x00000000,
717 	(0x0e00 << 16) | (0xc2c0 >> 2),
718 	0x00000000,
719 	(0x0e00 << 16) | (0x8228 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0x829c >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0x869c >> 2),
724 	0x00000000,
725 	(0x0600 << 16) | (0x98f4 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0x98f8 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0x9900 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc260 >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0x90e8 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0x3c000 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0x3c00c >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0x8c1c >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0x9700 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0xcd20 >> 2),
744 	0x00000000,
745 	(0x4e00 << 16) | (0xcd20 >> 2),
746 	0x00000000,
747 	(0x5e00 << 16) | (0xcd20 >> 2),
748 	0x00000000,
749 	(0x6e00 << 16) | (0xcd20 >> 2),
750 	0x00000000,
751 	(0x7e00 << 16) | (0xcd20 >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x89bc >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x8900 >> 2),
756 	0x00000000,
757 	0x3,
758 	(0x0e00 << 16) | (0xc130 >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0xc134 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0xc1fc >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0xc208 >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0xc264 >> 2),
767 	0x00000000,
768 	(0x0e00 << 16) | (0xc268 >> 2),
769 	0x00000000,
770 	(0x0e00 << 16) | (0xc26c >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0xc270 >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0xc274 >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0xc28c >> 2),
777 	0x00000000,
778 	(0x0e00 << 16) | (0xc290 >> 2),
779 	0x00000000,
780 	(0x0e00 << 16) | (0xc294 >> 2),
781 	0x00000000,
782 	(0x0e00 << 16) | (0xc298 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0xc2a0 >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc2a4 >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0xc2a8 >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0xc2ac >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0x301d0 >> 2),
793 	0x00000000,
794 	(0x0e00 << 16) | (0x30238 >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0x30250 >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0x30254 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0x30258 >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0x3025c >> 2),
803 	0x00000000,
804 	(0x4e00 << 16) | (0xc900 >> 2),
805 	0x00000000,
806 	(0x5e00 << 16) | (0xc900 >> 2),
807 	0x00000000,
808 	(0x6e00 << 16) | (0xc900 >> 2),
809 	0x00000000,
810 	(0x7e00 << 16) | (0xc900 >> 2),
811 	0x00000000,
812 	(0x4e00 << 16) | (0xc904 >> 2),
813 	0x00000000,
814 	(0x5e00 << 16) | (0xc904 >> 2),
815 	0x00000000,
816 	(0x6e00 << 16) | (0xc904 >> 2),
817 	0x00000000,
818 	(0x7e00 << 16) | (0xc904 >> 2),
819 	0x00000000,
820 	(0x4e00 << 16) | (0xc908 >> 2),
821 	0x00000000,
822 	(0x5e00 << 16) | (0xc908 >> 2),
823 	0x00000000,
824 	(0x6e00 << 16) | (0xc908 >> 2),
825 	0x00000000,
826 	(0x7e00 << 16) | (0xc908 >> 2),
827 	0x00000000,
828 	(0x4e00 << 16) | (0xc90c >> 2),
829 	0x00000000,
830 	(0x5e00 << 16) | (0xc90c >> 2),
831 	0x00000000,
832 	(0x6e00 << 16) | (0xc90c >> 2),
833 	0x00000000,
834 	(0x7e00 << 16) | (0xc90c >> 2),
835 	0x00000000,
836 	(0x4e00 << 16) | (0xc910 >> 2),
837 	0x00000000,
838 	(0x5e00 << 16) | (0xc910 >> 2),
839 	0x00000000,
840 	(0x6e00 << 16) | (0xc910 >> 2),
841 	0x00000000,
842 	(0x7e00 << 16) | (0xc910 >> 2),
843 	0x00000000,
844 	(0x0e00 << 16) | (0xc99c >> 2),
845 	0x00000000,
846 	(0x0e00 << 16) | (0x9834 >> 2),
847 	0x00000000,
848 	(0x0000 << 16) | (0x30f00 >> 2),
849 	0x00000000,
850 	(0x0000 << 16) | (0x30f04 >> 2),
851 	0x00000000,
852 	(0x0000 << 16) | (0x30f08 >> 2),
853 	0x00000000,
854 	(0x0000 << 16) | (0x30f0c >> 2),
855 	0x00000000,
856 	(0x0600 << 16) | (0x9b7c >> 2),
857 	0x00000000,
858 	(0x0e00 << 16) | (0x8a14 >> 2),
859 	0x00000000,
860 	(0x0e00 << 16) | (0x8a18 >> 2),
861 	0x00000000,
862 	(0x0600 << 16) | (0x30a00 >> 2),
863 	0x00000000,
864 	(0x0e00 << 16) | (0x8bf0 >> 2),
865 	0x00000000,
866 	(0x0e00 << 16) | (0x8bcc >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0x8b24 >> 2),
869 	0x00000000,
870 	(0x0e00 << 16) | (0x30a04 >> 2),
871 	0x00000000,
872 	(0x0600 << 16) | (0x30a10 >> 2),
873 	0x00000000,
874 	(0x0600 << 16) | (0x30a14 >> 2),
875 	0x00000000,
876 	(0x0600 << 16) | (0x30a18 >> 2),
877 	0x00000000,
878 	(0x0600 << 16) | (0x30a2c >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0xc700 >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0xc704 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0xc708 >> 2),
885 	0x00000000,
886 	(0x0e00 << 16) | (0xc768 >> 2),
887 	0x00000000,
888 	(0x0400 << 16) | (0xc770 >> 2),
889 	0x00000000,
890 	(0x0400 << 16) | (0xc774 >> 2),
891 	0x00000000,
892 	(0x0400 << 16) | (0xc798 >> 2),
893 	0x00000000,
894 	(0x0400 << 16) | (0xc79c >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x9100 >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x3c010 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x8c00 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x8c04 >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0x8c20 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x8c38 >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0x8c3c >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0xae00 >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0x9604 >> 2),
913 	0x00000000,
914 	(0x0e00 << 16) | (0xac08 >> 2),
915 	0x00000000,
916 	(0x0e00 << 16) | (0xac0c >> 2),
917 	0x00000000,
918 	(0x0e00 << 16) | (0xac10 >> 2),
919 	0x00000000,
920 	(0x0e00 << 16) | (0xac14 >> 2),
921 	0x00000000,
922 	(0x0e00 << 16) | (0xac58 >> 2),
923 	0x00000000,
924 	(0x0e00 << 16) | (0xac68 >> 2),
925 	0x00000000,
926 	(0x0e00 << 16) | (0xac6c >> 2),
927 	0x00000000,
928 	(0x0e00 << 16) | (0xac70 >> 2),
929 	0x00000000,
930 	(0x0e00 << 16) | (0xac74 >> 2),
931 	0x00000000,
932 	(0x0e00 << 16) | (0xac78 >> 2),
933 	0x00000000,
934 	(0x0e00 << 16) | (0xac7c >> 2),
935 	0x00000000,
936 	(0x0e00 << 16) | (0xac80 >> 2),
937 	0x00000000,
938 	(0x0e00 << 16) | (0xac84 >> 2),
939 	0x00000000,
940 	(0x0e00 << 16) | (0xac88 >> 2),
941 	0x00000000,
942 	(0x0e00 << 16) | (0xac8c >> 2),
943 	0x00000000,
944 	(0x0e00 << 16) | (0x970c >> 2),
945 	0x00000000,
946 	(0x0e00 << 16) | (0x9714 >> 2),
947 	0x00000000,
948 	(0x0e00 << 16) | (0x9718 >> 2),
949 	0x00000000,
950 	(0x0e00 << 16) | (0x971c >> 2),
951 	0x00000000,
952 	(0x0e00 << 16) | (0x31068 >> 2),
953 	0x00000000,
954 	(0x4e00 << 16) | (0x31068 >> 2),
955 	0x00000000,
956 	(0x5e00 << 16) | (0x31068 >> 2),
957 	0x00000000,
958 	(0x6e00 << 16) | (0x31068 >> 2),
959 	0x00000000,
960 	(0x7e00 << 16) | (0x31068 >> 2),
961 	0x00000000,
962 	(0x0e00 << 16) | (0xcd10 >> 2),
963 	0x00000000,
964 	(0x0e00 << 16) | (0xcd14 >> 2),
965 	0x00000000,
966 	(0x0e00 << 16) | (0x88b0 >> 2),
967 	0x00000000,
968 	(0x0e00 << 16) | (0x88b4 >> 2),
969 	0x00000000,
970 	(0x0e00 << 16) | (0x88b8 >> 2),
971 	0x00000000,
972 	(0x0e00 << 16) | (0x88bc >> 2),
973 	0x00000000,
974 	(0x0400 << 16) | (0x89c0 >> 2),
975 	0x00000000,
976 	(0x0e00 << 16) | (0x88c4 >> 2),
977 	0x00000000,
978 	(0x0e00 << 16) | (0x88c8 >> 2),
979 	0x00000000,
980 	(0x0e00 << 16) | (0x88d0 >> 2),
981 	0x00000000,
982 	(0x0e00 << 16) | (0x88d4 >> 2),
983 	0x00000000,
984 	(0x0e00 << 16) | (0x88d8 >> 2),
985 	0x00000000,
986 	(0x0e00 << 16) | (0x8980 >> 2),
987 	0x00000000,
988 	(0x0e00 << 16) | (0x30938 >> 2),
989 	0x00000000,
990 	(0x0e00 << 16) | (0x3093c >> 2),
991 	0x00000000,
992 	(0x0e00 << 16) | (0x30940 >> 2),
993 	0x00000000,
994 	(0x0e00 << 16) | (0x89a0 >> 2),
995 	0x00000000,
996 	(0x0e00 << 16) | (0x30900 >> 2),
997 	0x00000000,
998 	(0x0e00 << 16) | (0x30904 >> 2),
999 	0x00000000,
1000 	(0x0e00 << 16) | (0x89b4 >> 2),
1001 	0x00000000,
1002 	(0x0e00 << 16) | (0x3e1fc >> 2),
1003 	0x00000000,
1004 	(0x0e00 << 16) | (0x3c210 >> 2),
1005 	0x00000000,
1006 	(0x0e00 << 16) | (0x3c214 >> 2),
1007 	0x00000000,
1008 	(0x0e00 << 16) | (0x3c218 >> 2),
1009 	0x00000000,
1010 	(0x0e00 << 16) | (0x8904 >> 2),
1011 	0x00000000,
1012 	0x5,
1013 	(0x0e00 << 16) | (0x8c28 >> 2),
1014 	(0x0e00 << 16) | (0x8c2c >> 2),
1015 	(0x0e00 << 16) | (0x8c30 >> 2),
1016 	(0x0e00 << 16) | (0x8c34 >> 2),
1017 	(0x0e00 << 16) | (0x9600 >> 2),
1018 };
1019 
1020 static const u32 bonaire_golden_spm_registers[] =
1021 {
1022 	0x30800, 0xe0ffffff, 0xe0000000
1023 };
1024 
1025 static const u32 bonaire_golden_common_registers[] =
1026 {
1027 	0xc770, 0xffffffff, 0x00000800,
1028 	0xc774, 0xffffffff, 0x00000800,
1029 	0xc798, 0xffffffff, 0x00007fbf,
1030 	0xc79c, 0xffffffff, 0x00007faf
1031 };
1032 
1033 static const u32 bonaire_golden_registers[] =
1034 {
1035 	0x3354, 0x00000333, 0x00000333,
1036 	0x3350, 0x000c0fc0, 0x00040200,
1037 	0x9a10, 0x00010000, 0x00058208,
1038 	0x3c000, 0xffff1fff, 0x00140000,
1039 	0x3c200, 0xfdfc0fff, 0x00000100,
1040 	0x3c234, 0x40000000, 0x40000200,
1041 	0x9830, 0xffffffff, 0x00000000,
1042 	0x9834, 0xf00fffff, 0x00000400,
1043 	0x9838, 0x0002021c, 0x00020200,
1044 	0xc78, 0x00000080, 0x00000000,
1045 	0x5bb0, 0x000000f0, 0x00000070,
1046 	0x5bc0, 0xf0311fff, 0x80300000,
1047 	0x98f8, 0x73773777, 0x12010001,
1048 	0x350c, 0x00810000, 0x408af000,
1049 	0x7030, 0x31000111, 0x00000011,
1050 	0x2f48, 0x73773777, 0x12010001,
1051 	0x220c, 0x00007fb6, 0x0021a1b1,
1052 	0x2210, 0x00007fb6, 0x002021b1,
1053 	0x2180, 0x00007fb6, 0x00002191,
1054 	0x2218, 0x00007fb6, 0x002121b1,
1055 	0x221c, 0x00007fb6, 0x002021b1,
1056 	0x21dc, 0x00007fb6, 0x00002191,
1057 	0x21e0, 0x00007fb6, 0x00002191,
1058 	0x3628, 0x0000003f, 0x0000000a,
1059 	0x362c, 0x0000003f, 0x0000000a,
1060 	0x2ae4, 0x00073ffe, 0x000022a2,
1061 	0x240c, 0x000007ff, 0x00000000,
1062 	0x8a14, 0xf000003f, 0x00000007,
1063 	0x8bf0, 0x00002001, 0x00000001,
1064 	0x8b24, 0xffffffff, 0x00ffffff,
1065 	0x30a04, 0x0000ff0f, 0x00000000,
1066 	0x28a4c, 0x07ffffff, 0x06000000,
1067 	0x4d8, 0x00000fff, 0x00000100,
1068 	0x3e78, 0x00000001, 0x00000002,
1069 	0x9100, 0x03000000, 0x0362c688,
1070 	0x8c00, 0x000000ff, 0x00000001,
1071 	0xe40, 0x00001fff, 0x00001fff,
1072 	0x9060, 0x0000007f, 0x00000020,
1073 	0x9508, 0x00010000, 0x00010000,
1074 	0xac14, 0x000003ff, 0x000000f3,
1075 	0xac0c, 0xffffffff, 0x00001032
1076 };
1077 
1078 static const u32 bonaire_mgcg_cgcg_init[] =
1079 {
1080 	0xc420, 0xffffffff, 0xfffffffc,
1081 	0x30800, 0xffffffff, 0xe0000000,
1082 	0x3c2a0, 0xffffffff, 0x00000100,
1083 	0x3c208, 0xffffffff, 0x00000100,
1084 	0x3c2c0, 0xffffffff, 0xc0000100,
1085 	0x3c2c8, 0xffffffff, 0xc0000100,
1086 	0x3c2c4, 0xffffffff, 0xc0000100,
1087 	0x55e4, 0xffffffff, 0x00600100,
1088 	0x3c280, 0xffffffff, 0x00000100,
1089 	0x3c214, 0xffffffff, 0x06000100,
1090 	0x3c220, 0xffffffff, 0x00000100,
1091 	0x3c218, 0xffffffff, 0x06000100,
1092 	0x3c204, 0xffffffff, 0x00000100,
1093 	0x3c2e0, 0xffffffff, 0x00000100,
1094 	0x3c224, 0xffffffff, 0x00000100,
1095 	0x3c200, 0xffffffff, 0x00000100,
1096 	0x3c230, 0xffffffff, 0x00000100,
1097 	0x3c234, 0xffffffff, 0x00000100,
1098 	0x3c250, 0xffffffff, 0x00000100,
1099 	0x3c254, 0xffffffff, 0x00000100,
1100 	0x3c258, 0xffffffff, 0x00000100,
1101 	0x3c25c, 0xffffffff, 0x00000100,
1102 	0x3c260, 0xffffffff, 0x00000100,
1103 	0x3c27c, 0xffffffff, 0x00000100,
1104 	0x3c278, 0xffffffff, 0x00000100,
1105 	0x3c210, 0xffffffff, 0x06000100,
1106 	0x3c290, 0xffffffff, 0x00000100,
1107 	0x3c274, 0xffffffff, 0x00000100,
1108 	0x3c2b4, 0xffffffff, 0x00000100,
1109 	0x3c2b0, 0xffffffff, 0x00000100,
1110 	0x3c270, 0xffffffff, 0x00000100,
1111 	0x30800, 0xffffffff, 0xe0000000,
1112 	0x3c020, 0xffffffff, 0x00010000,
1113 	0x3c024, 0xffffffff, 0x00030002,
1114 	0x3c028, 0xffffffff, 0x00040007,
1115 	0x3c02c, 0xffffffff, 0x00060005,
1116 	0x3c030, 0xffffffff, 0x00090008,
1117 	0x3c034, 0xffffffff, 0x00010000,
1118 	0x3c038, 0xffffffff, 0x00030002,
1119 	0x3c03c, 0xffffffff, 0x00040007,
1120 	0x3c040, 0xffffffff, 0x00060005,
1121 	0x3c044, 0xffffffff, 0x00090008,
1122 	0x3c048, 0xffffffff, 0x00010000,
1123 	0x3c04c, 0xffffffff, 0x00030002,
1124 	0x3c050, 0xffffffff, 0x00040007,
1125 	0x3c054, 0xffffffff, 0x00060005,
1126 	0x3c058, 0xffffffff, 0x00090008,
1127 	0x3c05c, 0xffffffff, 0x00010000,
1128 	0x3c060, 0xffffffff, 0x00030002,
1129 	0x3c064, 0xffffffff, 0x00040007,
1130 	0x3c068, 0xffffffff, 0x00060005,
1131 	0x3c06c, 0xffffffff, 0x00090008,
1132 	0x3c070, 0xffffffff, 0x00010000,
1133 	0x3c074, 0xffffffff, 0x00030002,
1134 	0x3c078, 0xffffffff, 0x00040007,
1135 	0x3c07c, 0xffffffff, 0x00060005,
1136 	0x3c080, 0xffffffff, 0x00090008,
1137 	0x3c084, 0xffffffff, 0x00010000,
1138 	0x3c088, 0xffffffff, 0x00030002,
1139 	0x3c08c, 0xffffffff, 0x00040007,
1140 	0x3c090, 0xffffffff, 0x00060005,
1141 	0x3c094, 0xffffffff, 0x00090008,
1142 	0x3c098, 0xffffffff, 0x00010000,
1143 	0x3c09c, 0xffffffff, 0x00030002,
1144 	0x3c0a0, 0xffffffff, 0x00040007,
1145 	0x3c0a4, 0xffffffff, 0x00060005,
1146 	0x3c0a8, 0xffffffff, 0x00090008,
1147 	0x3c000, 0xffffffff, 0x96e00200,
1148 	0x8708, 0xffffffff, 0x00900100,
1149 	0xc424, 0xffffffff, 0x0020003f,
1150 	0x38, 0xffffffff, 0x0140001c,
1151 	0x3c, 0x000f0000, 0x000f0000,
1152 	0x220, 0xffffffff, 0xC060000C,
1153 	0x224, 0xc0000fff, 0x00000100,
1154 	0xf90, 0xffffffff, 0x00000100,
1155 	0xf98, 0x00000101, 0x00000000,
1156 	0x20a8, 0xffffffff, 0x00000104,
1157 	0x55e4, 0xff000fff, 0x00000100,
1158 	0x30cc, 0xc0000fff, 0x00000104,
1159 	0xc1e4, 0x00000001, 0x00000001,
1160 	0xd00c, 0xff000ff0, 0x00000100,
1161 	0xd80c, 0xff000ff0, 0x00000100
1162 };
1163 
1164 static const u32 spectre_golden_spm_registers[] =
1165 {
1166 	0x30800, 0xe0ffffff, 0xe0000000
1167 };
1168 
1169 static const u32 spectre_golden_common_registers[] =
1170 {
1171 	0xc770, 0xffffffff, 0x00000800,
1172 	0xc774, 0xffffffff, 0x00000800,
1173 	0xc798, 0xffffffff, 0x00007fbf,
1174 	0xc79c, 0xffffffff, 0x00007faf
1175 };
1176 
1177 static const u32 spectre_golden_registers[] =
1178 {
1179 	0x3c000, 0xffff1fff, 0x96940200,
1180 	0x3c00c, 0xffff0001, 0xff000000,
1181 	0x3c200, 0xfffc0fff, 0x00000100,
1182 	0x6ed8, 0x00010101, 0x00010000,
1183 	0x9834, 0xf00fffff, 0x00000400,
1184 	0x9838, 0xfffffffc, 0x00020200,
1185 	0x5bb0, 0x000000f0, 0x00000070,
1186 	0x5bc0, 0xf0311fff, 0x80300000,
1187 	0x98f8, 0x73773777, 0x12010001,
1188 	0x9b7c, 0x00ff0000, 0x00fc0000,
1189 	0x2f48, 0x73773777, 0x12010001,
1190 	0x8a14, 0xf000003f, 0x00000007,
1191 	0x8b24, 0xffffffff, 0x00ffffff,
1192 	0x28350, 0x3f3f3fff, 0x00000082,
1193 	0x28354, 0x0000003f, 0x00000000,
1194 	0x3e78, 0x00000001, 0x00000002,
1195 	0x913c, 0xffff03df, 0x00000004,
1196 	0xc768, 0x00000008, 0x00000008,
1197 	0x8c00, 0x000008ff, 0x00000800,
1198 	0x9508, 0x00010000, 0x00010000,
1199 	0xac0c, 0xffffffff, 0x54763210,
1200 	0x214f8, 0x01ff01ff, 0x00000002,
1201 	0x21498, 0x007ff800, 0x00200000,
1202 	0x2015c, 0xffffffff, 0x00000f40,
1203 	0x30934, 0xffffffff, 0x00000001
1204 };
1205 
1206 static const u32 spectre_mgcg_cgcg_init[] =
1207 {
1208 	0xc420, 0xffffffff, 0xfffffffc,
1209 	0x30800, 0xffffffff, 0xe0000000,
1210 	0x3c2a0, 0xffffffff, 0x00000100,
1211 	0x3c208, 0xffffffff, 0x00000100,
1212 	0x3c2c0, 0xffffffff, 0x00000100,
1213 	0x3c2c8, 0xffffffff, 0x00000100,
1214 	0x3c2c4, 0xffffffff, 0x00000100,
1215 	0x55e4, 0xffffffff, 0x00600100,
1216 	0x3c280, 0xffffffff, 0x00000100,
1217 	0x3c214, 0xffffffff, 0x06000100,
1218 	0x3c220, 0xffffffff, 0x00000100,
1219 	0x3c218, 0xffffffff, 0x06000100,
1220 	0x3c204, 0xffffffff, 0x00000100,
1221 	0x3c2e0, 0xffffffff, 0x00000100,
1222 	0x3c224, 0xffffffff, 0x00000100,
1223 	0x3c200, 0xffffffff, 0x00000100,
1224 	0x3c230, 0xffffffff, 0x00000100,
1225 	0x3c234, 0xffffffff, 0x00000100,
1226 	0x3c250, 0xffffffff, 0x00000100,
1227 	0x3c254, 0xffffffff, 0x00000100,
1228 	0x3c258, 0xffffffff, 0x00000100,
1229 	0x3c25c, 0xffffffff, 0x00000100,
1230 	0x3c260, 0xffffffff, 0x00000100,
1231 	0x3c27c, 0xffffffff, 0x00000100,
1232 	0x3c278, 0xffffffff, 0x00000100,
1233 	0x3c210, 0xffffffff, 0x06000100,
1234 	0x3c290, 0xffffffff, 0x00000100,
1235 	0x3c274, 0xffffffff, 0x00000100,
1236 	0x3c2b4, 0xffffffff, 0x00000100,
1237 	0x3c2b0, 0xffffffff, 0x00000100,
1238 	0x3c270, 0xffffffff, 0x00000100,
1239 	0x30800, 0xffffffff, 0xe0000000,
1240 	0x3c020, 0xffffffff, 0x00010000,
1241 	0x3c024, 0xffffffff, 0x00030002,
1242 	0x3c028, 0xffffffff, 0x00040007,
1243 	0x3c02c, 0xffffffff, 0x00060005,
1244 	0x3c030, 0xffffffff, 0x00090008,
1245 	0x3c034, 0xffffffff, 0x00010000,
1246 	0x3c038, 0xffffffff, 0x00030002,
1247 	0x3c03c, 0xffffffff, 0x00040007,
1248 	0x3c040, 0xffffffff, 0x00060005,
1249 	0x3c044, 0xffffffff, 0x00090008,
1250 	0x3c048, 0xffffffff, 0x00010000,
1251 	0x3c04c, 0xffffffff, 0x00030002,
1252 	0x3c050, 0xffffffff, 0x00040007,
1253 	0x3c054, 0xffffffff, 0x00060005,
1254 	0x3c058, 0xffffffff, 0x00090008,
1255 	0x3c05c, 0xffffffff, 0x00010000,
1256 	0x3c060, 0xffffffff, 0x00030002,
1257 	0x3c064, 0xffffffff, 0x00040007,
1258 	0x3c068, 0xffffffff, 0x00060005,
1259 	0x3c06c, 0xffffffff, 0x00090008,
1260 	0x3c070, 0xffffffff, 0x00010000,
1261 	0x3c074, 0xffffffff, 0x00030002,
1262 	0x3c078, 0xffffffff, 0x00040007,
1263 	0x3c07c, 0xffffffff, 0x00060005,
1264 	0x3c080, 0xffffffff, 0x00090008,
1265 	0x3c084, 0xffffffff, 0x00010000,
1266 	0x3c088, 0xffffffff, 0x00030002,
1267 	0x3c08c, 0xffffffff, 0x00040007,
1268 	0x3c090, 0xffffffff, 0x00060005,
1269 	0x3c094, 0xffffffff, 0x00090008,
1270 	0x3c098, 0xffffffff, 0x00010000,
1271 	0x3c09c, 0xffffffff, 0x00030002,
1272 	0x3c0a0, 0xffffffff, 0x00040007,
1273 	0x3c0a4, 0xffffffff, 0x00060005,
1274 	0x3c0a8, 0xffffffff, 0x00090008,
1275 	0x3c0ac, 0xffffffff, 0x00010000,
1276 	0x3c0b0, 0xffffffff, 0x00030002,
1277 	0x3c0b4, 0xffffffff, 0x00040007,
1278 	0x3c0b8, 0xffffffff, 0x00060005,
1279 	0x3c0bc, 0xffffffff, 0x00090008,
1280 	0x3c000, 0xffffffff, 0x96e00200,
1281 	0x8708, 0xffffffff, 0x00900100,
1282 	0xc424, 0xffffffff, 0x0020003f,
1283 	0x38, 0xffffffff, 0x0140001c,
1284 	0x3c, 0x000f0000, 0x000f0000,
1285 	0x220, 0xffffffff, 0xC060000C,
1286 	0x224, 0xc0000fff, 0x00000100,
1287 	0xf90, 0xffffffff, 0x00000100,
1288 	0xf98, 0x00000101, 0x00000000,
1289 	0x20a8, 0xffffffff, 0x00000104,
1290 	0x55e4, 0xff000fff, 0x00000100,
1291 	0x30cc, 0xc0000fff, 0x00000104,
1292 	0xc1e4, 0x00000001, 0x00000001,
1293 	0xd00c, 0xff000ff0, 0x00000100,
1294 	0xd80c, 0xff000ff0, 0x00000100
1295 };
1296 
1297 static const u32 kalindi_golden_spm_registers[] =
1298 {
1299 	0x30800, 0xe0ffffff, 0xe0000000
1300 };
1301 
1302 static const u32 kalindi_golden_common_registers[] =
1303 {
1304 	0xc770, 0xffffffff, 0x00000800,
1305 	0xc774, 0xffffffff, 0x00000800,
1306 	0xc798, 0xffffffff, 0x00007fbf,
1307 	0xc79c, 0xffffffff, 0x00007faf
1308 };
1309 
1310 static const u32 kalindi_golden_registers[] =
1311 {
1312 	0x3c000, 0xffffdfff, 0x6e944040,
1313 	0x55e4, 0xff607fff, 0xfc000100,
1314 	0x3c220, 0xff000fff, 0x00000100,
1315 	0x3c224, 0xff000fff, 0x00000100,
1316 	0x3c200, 0xfffc0fff, 0x00000100,
1317 	0x6ed8, 0x00010101, 0x00010000,
1318 	0x9830, 0xffffffff, 0x00000000,
1319 	0x9834, 0xf00fffff, 0x00000400,
1320 	0x5bb0, 0x000000f0, 0x00000070,
1321 	0x5bc0, 0xf0311fff, 0x80300000,
1322 	0x98f8, 0x73773777, 0x12010001,
1323 	0x98fc, 0xffffffff, 0x00000010,
1324 	0x9b7c, 0x00ff0000, 0x00fc0000,
1325 	0x8030, 0x00001f0f, 0x0000100a,
1326 	0x2f48, 0x73773777, 0x12010001,
1327 	0x2408, 0x000fffff, 0x000c007f,
1328 	0x8a14, 0xf000003f, 0x00000007,
1329 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1330 	0x30a04, 0x0000ff0f, 0x00000000,
1331 	0x28a4c, 0x07ffffff, 0x06000000,
1332 	0x4d8, 0x00000fff, 0x00000100,
1333 	0x3e78, 0x00000001, 0x00000002,
1334 	0xc768, 0x00000008, 0x00000008,
1335 	0x8c00, 0x000000ff, 0x00000003,
1336 	0x214f8, 0x01ff01ff, 0x00000002,
1337 	0x21498, 0x007ff800, 0x00200000,
1338 	0x2015c, 0xffffffff, 0x00000f40,
1339 	0x88c4, 0x001f3ae3, 0x00000082,
1340 	0x88d4, 0x0000001f, 0x00000010,
1341 	0x30934, 0xffffffff, 0x00000000
1342 };
1343 
1344 static const u32 kalindi_mgcg_cgcg_init[] =
1345 {
1346 	0xc420, 0xffffffff, 0xfffffffc,
1347 	0x30800, 0xffffffff, 0xe0000000,
1348 	0x3c2a0, 0xffffffff, 0x00000100,
1349 	0x3c208, 0xffffffff, 0x00000100,
1350 	0x3c2c0, 0xffffffff, 0x00000100,
1351 	0x3c2c8, 0xffffffff, 0x00000100,
1352 	0x3c2c4, 0xffffffff, 0x00000100,
1353 	0x55e4, 0xffffffff, 0x00600100,
1354 	0x3c280, 0xffffffff, 0x00000100,
1355 	0x3c214, 0xffffffff, 0x06000100,
1356 	0x3c220, 0xffffffff, 0x00000100,
1357 	0x3c218, 0xffffffff, 0x06000100,
1358 	0x3c204, 0xffffffff, 0x00000100,
1359 	0x3c2e0, 0xffffffff, 0x00000100,
1360 	0x3c224, 0xffffffff, 0x00000100,
1361 	0x3c200, 0xffffffff, 0x00000100,
1362 	0x3c230, 0xffffffff, 0x00000100,
1363 	0x3c234, 0xffffffff, 0x00000100,
1364 	0x3c250, 0xffffffff, 0x00000100,
1365 	0x3c254, 0xffffffff, 0x00000100,
1366 	0x3c258, 0xffffffff, 0x00000100,
1367 	0x3c25c, 0xffffffff, 0x00000100,
1368 	0x3c260, 0xffffffff, 0x00000100,
1369 	0x3c27c, 0xffffffff, 0x00000100,
1370 	0x3c278, 0xffffffff, 0x00000100,
1371 	0x3c210, 0xffffffff, 0x06000100,
1372 	0x3c290, 0xffffffff, 0x00000100,
1373 	0x3c274, 0xffffffff, 0x00000100,
1374 	0x3c2b4, 0xffffffff, 0x00000100,
1375 	0x3c2b0, 0xffffffff, 0x00000100,
1376 	0x3c270, 0xffffffff, 0x00000100,
1377 	0x30800, 0xffffffff, 0xe0000000,
1378 	0x3c020, 0xffffffff, 0x00010000,
1379 	0x3c024, 0xffffffff, 0x00030002,
1380 	0x3c028, 0xffffffff, 0x00040007,
1381 	0x3c02c, 0xffffffff, 0x00060005,
1382 	0x3c030, 0xffffffff, 0x00090008,
1383 	0x3c034, 0xffffffff, 0x00010000,
1384 	0x3c038, 0xffffffff, 0x00030002,
1385 	0x3c03c, 0xffffffff, 0x00040007,
1386 	0x3c040, 0xffffffff, 0x00060005,
1387 	0x3c044, 0xffffffff, 0x00090008,
1388 	0x3c000, 0xffffffff, 0x96e00200,
1389 	0x8708, 0xffffffff, 0x00900100,
1390 	0xc424, 0xffffffff, 0x0020003f,
1391 	0x38, 0xffffffff, 0x0140001c,
1392 	0x3c, 0x000f0000, 0x000f0000,
1393 	0x220, 0xffffffff, 0xC060000C,
1394 	0x224, 0xc0000fff, 0x00000100,
1395 	0x20a8, 0xffffffff, 0x00000104,
1396 	0x55e4, 0xff000fff, 0x00000100,
1397 	0x30cc, 0xc0000fff, 0x00000104,
1398 	0xc1e4, 0x00000001, 0x00000001,
1399 	0xd00c, 0xff000ff0, 0x00000100,
1400 	0xd80c, 0xff000ff0, 0x00000100
1401 };
1402 
1403 static const u32 hawaii_golden_spm_registers[] =
1404 {
1405 	0x30800, 0xe0ffffff, 0xe0000000
1406 };
1407 
1408 static const u32 hawaii_golden_common_registers[] =
1409 {
1410 	0x30800, 0xffffffff, 0xe0000000,
1411 	0x28350, 0xffffffff, 0x3a00161a,
1412 	0x28354, 0xffffffff, 0x0000002e,
1413 	0x9a10, 0xffffffff, 0x00018208,
1414 	0x98f8, 0xffffffff, 0x12011003
1415 };
1416 
1417 static const u32 hawaii_golden_registers[] =
1418 {
1419 	0x3354, 0x00000333, 0x00000333,
1420 	0x9a10, 0x00010000, 0x00058208,
1421 	0x9830, 0xffffffff, 0x00000000,
1422 	0x9834, 0xf00fffff, 0x00000400,
1423 	0x9838, 0x0002021c, 0x00020200,
1424 	0xc78, 0x00000080, 0x00000000,
1425 	0x5bb0, 0x000000f0, 0x00000070,
1426 	0x5bc0, 0xf0311fff, 0x80300000,
1427 	0x350c, 0x00810000, 0x408af000,
1428 	0x7030, 0x31000111, 0x00000011,
1429 	0x2f48, 0x73773777, 0x12010001,
1430 	0x2120, 0x0000007f, 0x0000001b,
1431 	0x21dc, 0x00007fb6, 0x00002191,
1432 	0x3628, 0x0000003f, 0x0000000a,
1433 	0x362c, 0x0000003f, 0x0000000a,
1434 	0x2ae4, 0x00073ffe, 0x000022a2,
1435 	0x240c, 0x000007ff, 0x00000000,
1436 	0x8bf0, 0x00002001, 0x00000001,
1437 	0x8b24, 0xffffffff, 0x00ffffff,
1438 	0x30a04, 0x0000ff0f, 0x00000000,
1439 	0x28a4c, 0x07ffffff, 0x06000000,
1440 	0x3e78, 0x00000001, 0x00000002,
1441 	0xc768, 0x00000008, 0x00000008,
1442 	0xc770, 0x00000f00, 0x00000800,
1443 	0xc774, 0x00000f00, 0x00000800,
1444 	0xc798, 0x00ffffff, 0x00ff7fbf,
1445 	0xc79c, 0x00ffffff, 0x00ff7faf,
1446 	0x8c00, 0x000000ff, 0x00000800,
1447 	0xe40, 0x00001fff, 0x00001fff,
1448 	0x9060, 0x0000007f, 0x00000020,
1449 	0x9508, 0x00010000, 0x00010000,
1450 	0xae00, 0x00100000, 0x000ff07c,
1451 	0xac14, 0x000003ff, 0x0000000f,
1452 	0xac10, 0xffffffff, 0x7564fdec,
1453 	0xac0c, 0xffffffff, 0x3120b9a8,
1454 	0xac08, 0x20000000, 0x0f9c0000
1455 };
1456 
1457 static const u32 hawaii_mgcg_cgcg_init[] =
1458 {
1459 	0xc420, 0xffffffff, 0xfffffffd,
1460 	0x30800, 0xffffffff, 0xe0000000,
1461 	0x3c2a0, 0xffffffff, 0x00000100,
1462 	0x3c208, 0xffffffff, 0x00000100,
1463 	0x3c2c0, 0xffffffff, 0x00000100,
1464 	0x3c2c8, 0xffffffff, 0x00000100,
1465 	0x3c2c4, 0xffffffff, 0x00000100,
1466 	0x55e4, 0xffffffff, 0x00200100,
1467 	0x3c280, 0xffffffff, 0x00000100,
1468 	0x3c214, 0xffffffff, 0x06000100,
1469 	0x3c220, 0xffffffff, 0x00000100,
1470 	0x3c218, 0xffffffff, 0x06000100,
1471 	0x3c204, 0xffffffff, 0x00000100,
1472 	0x3c2e0, 0xffffffff, 0x00000100,
1473 	0x3c224, 0xffffffff, 0x00000100,
1474 	0x3c200, 0xffffffff, 0x00000100,
1475 	0x3c230, 0xffffffff, 0x00000100,
1476 	0x3c234, 0xffffffff, 0x00000100,
1477 	0x3c250, 0xffffffff, 0x00000100,
1478 	0x3c254, 0xffffffff, 0x00000100,
1479 	0x3c258, 0xffffffff, 0x00000100,
1480 	0x3c25c, 0xffffffff, 0x00000100,
1481 	0x3c260, 0xffffffff, 0x00000100,
1482 	0x3c27c, 0xffffffff, 0x00000100,
1483 	0x3c278, 0xffffffff, 0x00000100,
1484 	0x3c210, 0xffffffff, 0x06000100,
1485 	0x3c290, 0xffffffff, 0x00000100,
1486 	0x3c274, 0xffffffff, 0x00000100,
1487 	0x3c2b4, 0xffffffff, 0x00000100,
1488 	0x3c2b0, 0xffffffff, 0x00000100,
1489 	0x3c270, 0xffffffff, 0x00000100,
1490 	0x30800, 0xffffffff, 0xe0000000,
1491 	0x3c020, 0xffffffff, 0x00010000,
1492 	0x3c024, 0xffffffff, 0x00030002,
1493 	0x3c028, 0xffffffff, 0x00040007,
1494 	0x3c02c, 0xffffffff, 0x00060005,
1495 	0x3c030, 0xffffffff, 0x00090008,
1496 	0x3c034, 0xffffffff, 0x00010000,
1497 	0x3c038, 0xffffffff, 0x00030002,
1498 	0x3c03c, 0xffffffff, 0x00040007,
1499 	0x3c040, 0xffffffff, 0x00060005,
1500 	0x3c044, 0xffffffff, 0x00090008,
1501 	0x3c048, 0xffffffff, 0x00010000,
1502 	0x3c04c, 0xffffffff, 0x00030002,
1503 	0x3c050, 0xffffffff, 0x00040007,
1504 	0x3c054, 0xffffffff, 0x00060005,
1505 	0x3c058, 0xffffffff, 0x00090008,
1506 	0x3c05c, 0xffffffff, 0x00010000,
1507 	0x3c060, 0xffffffff, 0x00030002,
1508 	0x3c064, 0xffffffff, 0x00040007,
1509 	0x3c068, 0xffffffff, 0x00060005,
1510 	0x3c06c, 0xffffffff, 0x00090008,
1511 	0x3c070, 0xffffffff, 0x00010000,
1512 	0x3c074, 0xffffffff, 0x00030002,
1513 	0x3c078, 0xffffffff, 0x00040007,
1514 	0x3c07c, 0xffffffff, 0x00060005,
1515 	0x3c080, 0xffffffff, 0x00090008,
1516 	0x3c084, 0xffffffff, 0x00010000,
1517 	0x3c088, 0xffffffff, 0x00030002,
1518 	0x3c08c, 0xffffffff, 0x00040007,
1519 	0x3c090, 0xffffffff, 0x00060005,
1520 	0x3c094, 0xffffffff, 0x00090008,
1521 	0x3c098, 0xffffffff, 0x00010000,
1522 	0x3c09c, 0xffffffff, 0x00030002,
1523 	0x3c0a0, 0xffffffff, 0x00040007,
1524 	0x3c0a4, 0xffffffff, 0x00060005,
1525 	0x3c0a8, 0xffffffff, 0x00090008,
1526 	0x3c0ac, 0xffffffff, 0x00010000,
1527 	0x3c0b0, 0xffffffff, 0x00030002,
1528 	0x3c0b4, 0xffffffff, 0x00040007,
1529 	0x3c0b8, 0xffffffff, 0x00060005,
1530 	0x3c0bc, 0xffffffff, 0x00090008,
1531 	0x3c0c0, 0xffffffff, 0x00010000,
1532 	0x3c0c4, 0xffffffff, 0x00030002,
1533 	0x3c0c8, 0xffffffff, 0x00040007,
1534 	0x3c0cc, 0xffffffff, 0x00060005,
1535 	0x3c0d0, 0xffffffff, 0x00090008,
1536 	0x3c0d4, 0xffffffff, 0x00010000,
1537 	0x3c0d8, 0xffffffff, 0x00030002,
1538 	0x3c0dc, 0xffffffff, 0x00040007,
1539 	0x3c0e0, 0xffffffff, 0x00060005,
1540 	0x3c0e4, 0xffffffff, 0x00090008,
1541 	0x3c0e8, 0xffffffff, 0x00010000,
1542 	0x3c0ec, 0xffffffff, 0x00030002,
1543 	0x3c0f0, 0xffffffff, 0x00040007,
1544 	0x3c0f4, 0xffffffff, 0x00060005,
1545 	0x3c0f8, 0xffffffff, 0x00090008,
1546 	0xc318, 0xffffffff, 0x00020200,
1547 	0x3350, 0xffffffff, 0x00000200,
1548 	0x15c0, 0xffffffff, 0x00000400,
1549 	0x55e8, 0xffffffff, 0x00000000,
1550 	0x2f50, 0xffffffff, 0x00000902,
1551 	0x3c000, 0xffffffff, 0x96940200,
1552 	0x8708, 0xffffffff, 0x00900100,
1553 	0xc424, 0xffffffff, 0x0020003f,
1554 	0x38, 0xffffffff, 0x0140001c,
1555 	0x3c, 0x000f0000, 0x000f0000,
1556 	0x220, 0xffffffff, 0xc060000c,
1557 	0x224, 0xc0000fff, 0x00000100,
1558 	0xf90, 0xffffffff, 0x00000100,
1559 	0xf98, 0x00000101, 0x00000000,
1560 	0x20a8, 0xffffffff, 0x00000104,
1561 	0x55e4, 0xff000fff, 0x00000100,
1562 	0x30cc, 0xc0000fff, 0x00000104,
1563 	0xc1e4, 0x00000001, 0x00000001,
1564 	0xd00c, 0xff000ff0, 0x00000100,
1565 	0xd80c, 0xff000ff0, 0x00000100
1566 };
1567 
1568 static const u32 godavari_golden_registers[] =
1569 {
1570 	0x55e4, 0xff607fff, 0xfc000100,
1571 	0x6ed8, 0x00010101, 0x00010000,
1572 	0x9830, 0xffffffff, 0x00000000,
1573 	0x98302, 0xf00fffff, 0x00000400,
1574 	0x6130, 0xffffffff, 0x00010000,
1575 	0x5bb0, 0x000000f0, 0x00000070,
1576 	0x5bc0, 0xf0311fff, 0x80300000,
1577 	0x98f8, 0x73773777, 0x12010001,
1578 	0x98fc, 0xffffffff, 0x00000010,
1579 	0x8030, 0x00001f0f, 0x0000100a,
1580 	0x2f48, 0x73773777, 0x12010001,
1581 	0x2408, 0x000fffff, 0x000c007f,
1582 	0x8a14, 0xf000003f, 0x00000007,
1583 	0x8b24, 0xffffffff, 0x00ff0fff,
1584 	0x30a04, 0x0000ff0f, 0x00000000,
1585 	0x28a4c, 0x07ffffff, 0x06000000,
1586 	0x4d8, 0x00000fff, 0x00000100,
1587 	0xd014, 0x00010000, 0x00810001,
1588 	0xd814, 0x00010000, 0x00810001,
1589 	0x3e78, 0x00000001, 0x00000002,
1590 	0xc768, 0x00000008, 0x00000008,
1591 	0xc770, 0x00000f00, 0x00000800,
1592 	0xc774, 0x00000f00, 0x00000800,
1593 	0xc798, 0x00ffffff, 0x00ff7fbf,
1594 	0xc79c, 0x00ffffff, 0x00ff7faf,
1595 	0x8c00, 0x000000ff, 0x00000001,
1596 	0x214f8, 0x01ff01ff, 0x00000002,
1597 	0x21498, 0x007ff800, 0x00200000,
1598 	0x2015c, 0xffffffff, 0x00000f40,
1599 	0x88c4, 0x001f3ae3, 0x00000082,
1600 	0x88d4, 0x0000001f, 0x00000010,
1601 	0x30934, 0xffffffff, 0x00000000
1602 };
1603 
1604 
1605 static void cik_init_golden_registers(struct radeon_device *rdev)
1606 {
1607 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1608 	mutex_lock(&rdev->grbm_idx_mutex);
1609 	switch (rdev->family) {
1610 	case CHIP_BONAIRE:
1611 		radeon_program_register_sequence(rdev,
1612 						 bonaire_mgcg_cgcg_init,
1613 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1614 		radeon_program_register_sequence(rdev,
1615 						 bonaire_golden_registers,
1616 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1617 		radeon_program_register_sequence(rdev,
1618 						 bonaire_golden_common_registers,
1619 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1620 		radeon_program_register_sequence(rdev,
1621 						 bonaire_golden_spm_registers,
1622 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1623 		break;
1624 	case CHIP_KABINI:
1625 		radeon_program_register_sequence(rdev,
1626 						 kalindi_mgcg_cgcg_init,
1627 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1628 		radeon_program_register_sequence(rdev,
1629 						 kalindi_golden_registers,
1630 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1631 		radeon_program_register_sequence(rdev,
1632 						 kalindi_golden_common_registers,
1633 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1634 		radeon_program_register_sequence(rdev,
1635 						 kalindi_golden_spm_registers,
1636 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1637 		break;
1638 	case CHIP_MULLINS:
1639 		radeon_program_register_sequence(rdev,
1640 						 kalindi_mgcg_cgcg_init,
1641 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1642 		radeon_program_register_sequence(rdev,
1643 						 godavari_golden_registers,
1644 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1645 		radeon_program_register_sequence(rdev,
1646 						 kalindi_golden_common_registers,
1647 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1648 		radeon_program_register_sequence(rdev,
1649 						 kalindi_golden_spm_registers,
1650 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1651 		break;
1652 	case CHIP_KAVERI:
1653 		radeon_program_register_sequence(rdev,
1654 						 spectre_mgcg_cgcg_init,
1655 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1656 		radeon_program_register_sequence(rdev,
1657 						 spectre_golden_registers,
1658 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1659 		radeon_program_register_sequence(rdev,
1660 						 spectre_golden_common_registers,
1661 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1662 		radeon_program_register_sequence(rdev,
1663 						 spectre_golden_spm_registers,
1664 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1665 		break;
1666 	case CHIP_HAWAII:
1667 		radeon_program_register_sequence(rdev,
1668 						 hawaii_mgcg_cgcg_init,
1669 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1670 		radeon_program_register_sequence(rdev,
1671 						 hawaii_golden_registers,
1672 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1673 		radeon_program_register_sequence(rdev,
1674 						 hawaii_golden_common_registers,
1675 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1676 		radeon_program_register_sequence(rdev,
1677 						 hawaii_golden_spm_registers,
1678 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1679 		break;
1680 	default:
1681 		break;
1682 	}
1683 	mutex_unlock(&rdev->grbm_idx_mutex);
1684 }
1685 
1686 /**
1687  * cik_get_xclk - get the xclk
1688  *
1689  * @rdev: radeon_device pointer
1690  *
1691  * Returns the reference clock used by the gfx engine
1692  * (CIK).
1693  */
1694 u32 cik_get_xclk(struct radeon_device *rdev)
1695 {
1696 	u32 reference_clock = rdev->clock.spll.reference_freq;
1697 
1698 	if (rdev->flags & RADEON_IS_IGP) {
1699 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1700 			return reference_clock / 2;
1701 	} else {
1702 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1703 			return reference_clock / 4;
1704 	}
1705 	return reference_clock;
1706 }
1707 
1708 /**
1709  * cik_mm_rdoorbell - read a doorbell dword
1710  *
1711  * @rdev: radeon_device pointer
1712  * @index: doorbell index
1713  *
1714  * Returns the value in the doorbell aperture at the
1715  * requested doorbell index (CIK).
1716  */
1717 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1718 {
1719 	if (index < rdev->doorbell.num_doorbells) {
1720 		return readl(rdev->doorbell.ptr + index);
1721 	} else {
1722 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1723 		return 0;
1724 	}
1725 }
1726 
1727 /**
1728  * cik_mm_wdoorbell - write a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  * @v: value to write
1733  *
1734  * Writes @v to the doorbell aperture at the
1735  * requested doorbell index (CIK).
1736  */
1737 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1738 {
1739 	if (index < rdev->doorbell.num_doorbells) {
1740 		writel(v, rdev->doorbell.ptr + index);
1741 	} else {
1742 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1743 	}
1744 }
1745 
1746 #define BONAIRE_IO_MC_REGS_SIZE 36
1747 
1748 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1749 {
1750 	{0x00000070, 0x04400000},
1751 	{0x00000071, 0x80c01803},
1752 	{0x00000072, 0x00004004},
1753 	{0x00000073, 0x00000100},
1754 	{0x00000074, 0x00ff0000},
1755 	{0x00000075, 0x34000000},
1756 	{0x00000076, 0x08000014},
1757 	{0x00000077, 0x00cc08ec},
1758 	{0x00000078, 0x00000400},
1759 	{0x00000079, 0x00000000},
1760 	{0x0000007a, 0x04090000},
1761 	{0x0000007c, 0x00000000},
1762 	{0x0000007e, 0x4408a8e8},
1763 	{0x0000007f, 0x00000304},
1764 	{0x00000080, 0x00000000},
1765 	{0x00000082, 0x00000001},
1766 	{0x00000083, 0x00000002},
1767 	{0x00000084, 0xf3e4f400},
1768 	{0x00000085, 0x052024e3},
1769 	{0x00000087, 0x00000000},
1770 	{0x00000088, 0x01000000},
1771 	{0x0000008a, 0x1c0a0000},
1772 	{0x0000008b, 0xff010000},
1773 	{0x0000008d, 0xffffefff},
1774 	{0x0000008e, 0xfff3efff},
1775 	{0x0000008f, 0xfff3efbf},
1776 	{0x00000092, 0xf7ffffff},
1777 	{0x00000093, 0xffffff7f},
1778 	{0x00000095, 0x00101101},
1779 	{0x00000096, 0x00000fff},
1780 	{0x00000097, 0x00116fff},
1781 	{0x00000098, 0x60010000},
1782 	{0x00000099, 0x10010000},
1783 	{0x0000009a, 0x00006000},
1784 	{0x0000009b, 0x00001000},
1785 	{0x0000009f, 0x00b48000}
1786 };
1787 
1788 #define HAWAII_IO_MC_REGS_SIZE 22
1789 
1790 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1791 {
1792 	{0x0000007d, 0x40000000},
1793 	{0x0000007e, 0x40180304},
1794 	{0x0000007f, 0x0000ff00},
1795 	{0x00000081, 0x00000000},
1796 	{0x00000083, 0x00000800},
1797 	{0x00000086, 0x00000000},
1798 	{0x00000087, 0x00000100},
1799 	{0x00000088, 0x00020100},
1800 	{0x00000089, 0x00000000},
1801 	{0x0000008b, 0x00040000},
1802 	{0x0000008c, 0x00000100},
1803 	{0x0000008e, 0xff010000},
1804 	{0x00000090, 0xffffefff},
1805 	{0x00000091, 0xfff3efff},
1806 	{0x00000092, 0xfff3efbf},
1807 	{0x00000093, 0xf7ffffff},
1808 	{0x00000094, 0xffffff7f},
1809 	{0x00000095, 0x00000fff},
1810 	{0x00000096, 0x00116fff},
1811 	{0x00000097, 0x60010000},
1812 	{0x00000098, 0x10010000},
1813 	{0x0000009f, 0x00c79000}
1814 };
1815 
1816 
1817 /**
1818  * cik_srbm_select - select specific register instances
1819  *
1820  * @rdev: radeon_device pointer
1821  * @me: selected ME (micro engine)
1822  * @pipe: pipe
1823  * @queue: queue
1824  * @vmid: VMID
1825  *
1826  * Switches the currently active registers instances.  Some
1827  * registers are instanced per VMID, others are instanced per
1828  * me/pipe/queue combination.
1829  */
1830 static void cik_srbm_select(struct radeon_device *rdev,
1831 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1832 {
1833 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1834 			     MEID(me & 0x3) |
1835 			     VMID(vmid & 0xf) |
1836 			     QUEUEID(queue & 0x7));
1837 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1838 }
1839 
1840 /* ucode loading */
1841 /**
1842  * ci_mc_load_microcode - load MC ucode into the hw
1843  *
1844  * @rdev: radeon_device pointer
1845  *
1846  * Load the GDDR MC ucode into the hw (CIK).
1847  * Returns 0 on success, error on failure.
1848  */
1849 int ci_mc_load_microcode(struct radeon_device *rdev)
1850 {
1851 	const __be32 *fw_data = NULL;
1852 	const __le32 *new_fw_data = NULL;
1853 	u32 running, tmp;
1854 	u32 *io_mc_regs = NULL;
1855 	const __le32 *new_io_mc_regs = NULL;
1856 	int i, regs_size, ucode_size;
1857 
1858 	if (!rdev->mc_fw)
1859 		return -EINVAL;
1860 
1861 	if (rdev->new_fw) {
1862 		const struct mc_firmware_header_v1_0 *hdr =
1863 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1864 
1865 		radeon_ucode_print_mc_hdr(&hdr->header);
1866 
1867 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1868 		new_io_mc_regs = (const __le32 *)
1869 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1870 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1871 		new_fw_data = (const __le32 *)
1872 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1873 	} else {
1874 		ucode_size = rdev->mc_fw->datasize / 4;
1875 
1876 		switch (rdev->family) {
1877 		case CHIP_BONAIRE:
1878 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1879 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1880 			break;
1881 		case CHIP_HAWAII:
1882 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1883 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1884 			break;
1885 		default:
1886 			return -EINVAL;
1887 		}
1888 		fw_data = (const __be32 *)rdev->mc_fw->data;
1889 	}
1890 
1891 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1892 
1893 	if (running == 0) {
1894 		/* reset the engine and set to writable */
1895 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1896 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1897 
1898 		/* load mc io regs */
1899 		for (i = 0; i < regs_size; i++) {
1900 			if (rdev->new_fw) {
1901 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1902 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1903 			} else {
1904 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1905 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1906 			}
1907 		}
1908 
1909 		tmp = RREG32(MC_SEQ_MISC0);
1910 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1911 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1912 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1913 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1914 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1915 		}
1916 
1917 		/* load the MC ucode */
1918 		for (i = 0; i < ucode_size; i++) {
1919 			if (rdev->new_fw)
1920 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1921 			else
1922 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1923 		}
1924 
1925 		/* put the engine back into the active state */
1926 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1927 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1928 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1929 
1930 		/* wait for training to complete */
1931 		for (i = 0; i < rdev->usec_timeout; i++) {
1932 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1933 				break;
1934 			udelay(1);
1935 		}
1936 		for (i = 0; i < rdev->usec_timeout; i++) {
1937 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1938 				break;
1939 			udelay(1);
1940 		}
1941 	}
1942 
1943 	return 0;
1944 }
1945 
1946 /**
1947  * cik_init_microcode - load ucode images from disk
1948  *
1949  * @rdev: radeon_device pointer
1950  *
1951  * Use the firmware interface to load the ucode images into
1952  * the driver (not loaded into hw).
1953  * Returns 0 on success, error on failure.
1954  */
1955 static int cik_init_microcode(struct radeon_device *rdev)
1956 {
1957 	const char *chip_name;
1958 	const char *new_chip_name;
1959 	size_t pfp_req_size, me_req_size, ce_req_size,
1960 		mec_req_size, rlc_req_size, mc_req_size = 0,
1961 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1962 	char fw_name[30];
1963 	int new_fw = 0;
1964 	int err;
1965 	int num_fw;
1966 	bool new_smc = false;
1967 
1968 	DRM_DEBUG("\n");
1969 
1970 	switch (rdev->family) {
1971 	case CHIP_BONAIRE:
1972 		chip_name = "BONAIRE";
1973 		if ((rdev->pdev->revision == 0x80) ||
1974 		    (rdev->pdev->revision == 0x81) ||
1975 		    (rdev->pdev->device == 0x665f))
1976 			new_smc = true;
1977 		new_chip_name = "bonaire";
1978 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1979 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1980 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1981 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1982 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1983 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1984 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1985 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1986 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1987 		num_fw = 8;
1988 		break;
1989 	case CHIP_HAWAII:
1990 		chip_name = "HAWAII";
1991 		if (rdev->pdev->revision == 0x80)
1992 			new_smc = true;
1993 		new_chip_name = "hawaii";
1994 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1995 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1996 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1997 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1998 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1999 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2000 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2001 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2002 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2003 		num_fw = 8;
2004 		break;
2005 	case CHIP_KAVERI:
2006 		chip_name = "KAVERI";
2007 		new_chip_name = "kaveri";
2008 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2009 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2010 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2011 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2012 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2013 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2014 		num_fw = 7;
2015 		break;
2016 	case CHIP_KABINI:
2017 		chip_name = "KABINI";
2018 		new_chip_name = "kabini";
2019 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2020 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2021 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2022 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2023 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2024 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025 		num_fw = 6;
2026 		break;
2027 	case CHIP_MULLINS:
2028 		chip_name = "MULLINS";
2029 		new_chip_name = "mullins";
2030 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2031 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2032 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2033 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2034 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2035 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2036 		num_fw = 6;
2037 		break;
2038 	default: BUG();
2039 	}
2040 
2041 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2042 
2043 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
2044 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2045 	if (err) {
2046 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
2047 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2048 		if (err)
2049 			goto out;
2050 		if (rdev->pfp_fw->datasize != pfp_req_size) {
2051 			printk(KERN_ERR
2052 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2053 			       rdev->pfp_fw->datasize, fw_name);
2054 			err = -EINVAL;
2055 			goto out;
2056 		}
2057 	} else {
2058 		err = radeon_ucode_validate(rdev->pfp_fw);
2059 		if (err) {
2060 			printk(KERN_ERR
2061 			       "cik_fw: validation failed for firmware \"%s\"\n",
2062 			       fw_name);
2063 			goto out;
2064 		} else {
2065 			new_fw++;
2066 		}
2067 	}
2068 
2069 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
2070 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2071 	if (err) {
2072 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
2073 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2074 		if (err)
2075 			goto out;
2076 		if (rdev->me_fw->datasize != me_req_size) {
2077 			printk(KERN_ERR
2078 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2079 			       rdev->me_fw->datasize, fw_name);
2080 			err = -EINVAL;
2081 		}
2082 	} else {
2083 		err = radeon_ucode_validate(rdev->me_fw);
2084 		if (err) {
2085 			printk(KERN_ERR
2086 			       "cik_fw: validation failed for firmware \"%s\"\n",
2087 			       fw_name);
2088 			goto out;
2089 		} else {
2090 			new_fw++;
2091 		}
2092 	}
2093 
2094 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
2095 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2096 	if (err) {
2097 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
2098 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2099 		if (err)
2100 			goto out;
2101 		if (rdev->ce_fw->datasize != ce_req_size) {
2102 			printk(KERN_ERR
2103 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2104 			       rdev->ce_fw->datasize, fw_name);
2105 			err = -EINVAL;
2106 		}
2107 	} else {
2108 		err = radeon_ucode_validate(rdev->ce_fw);
2109 		if (err) {
2110 			printk(KERN_ERR
2111 			       "cik_fw: validation failed for firmware \"%s\"\n",
2112 			       fw_name);
2113 			goto out;
2114 		} else {
2115 			new_fw++;
2116 		}
2117 	}
2118 
2119 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name);
2120 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2121 	if (err) {
2122 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
2123 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2124 		if (err)
2125 			goto out;
2126 		if (rdev->mec_fw->datasize != mec_req_size) {
2127 			printk(KERN_ERR
2128 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2129 			       rdev->mec_fw->datasize, fw_name);
2130 			err = -EINVAL;
2131 		}
2132 	} else {
2133 		err = radeon_ucode_validate(rdev->mec_fw);
2134 		if (err) {
2135 			printk(KERN_ERR
2136 			       "cik_fw: validation failed for firmware \"%s\"\n",
2137 			       fw_name);
2138 			goto out;
2139 		} else {
2140 			new_fw++;
2141 		}
2142 	}
2143 
2144 	if (rdev->family == CHIP_KAVERI) {
2145 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name);
2146 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2147 		if (err) {
2148 			goto out;
2149 		} else {
2150 			err = radeon_ucode_validate(rdev->mec2_fw);
2151 			if (err) {
2152 				goto out;
2153 			} else {
2154 				new_fw++;
2155 			}
2156 		}
2157 	}
2158 
2159 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
2160 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2161 	if (err) {
2162 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
2163 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2164 		if (err)
2165 			goto out;
2166 		if (rdev->rlc_fw->datasize != rlc_req_size) {
2167 			printk(KERN_ERR
2168 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2169 			       rdev->rlc_fw->datasize, fw_name);
2170 			err = -EINVAL;
2171 		}
2172 	} else {
2173 		err = radeon_ucode_validate(rdev->rlc_fw);
2174 		if (err) {
2175 			printk(KERN_ERR
2176 			       "cik_fw: validation failed for firmware \"%s\"\n",
2177 			       fw_name);
2178 			goto out;
2179 		} else {
2180 			new_fw++;
2181 		}
2182 	}
2183 
2184 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name);
2185 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2186 	if (err) {
2187 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
2188 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2189 		if (err)
2190 			goto out;
2191 		if (rdev->sdma_fw->datasize != sdma_req_size) {
2192 			printk(KERN_ERR
2193 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2194 			       rdev->sdma_fw->datasize, fw_name);
2195 			err = -EINVAL;
2196 		}
2197 	} else {
2198 		err = radeon_ucode_validate(rdev->sdma_fw);
2199 		if (err) {
2200 			printk(KERN_ERR
2201 			       "cik_fw: validation failed for firmware \"%s\"\n",
2202 			       fw_name);
2203 			goto out;
2204 		} else {
2205 			new_fw++;
2206 		}
2207 	}
2208 
2209 	/* No SMC, MC ucode on APUs */
2210 	if (!(rdev->flags & RADEON_IS_IGP)) {
2211 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
2212 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2213 		if (err) {
2214 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
2215 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2216 			if (err) {
2217 				ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
2218 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2219 				if (err)
2220 					goto out;
2221 			}
2222 			if ((rdev->mc_fw->datasize != mc_req_size) &&
2223 			    (rdev->mc_fw->datasize != mc2_req_size)){
2224 				printk(KERN_ERR
2225 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2226 				       rdev->mc_fw->datasize, fw_name);
2227 				err = -EINVAL;
2228 			}
2229 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
2230 		} else {
2231 			err = radeon_ucode_validate(rdev->mc_fw);
2232 			if (err) {
2233 				printk(KERN_ERR
2234 				       "cik_fw: validation failed for firmware \"%s\"\n",
2235 				       fw_name);
2236 				goto out;
2237 			} else {
2238 				new_fw++;
2239 			}
2240 		}
2241 
2242 		if (new_smc)
2243 			ksnprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_k_smc", new_chip_name);
2244 		else
2245 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
2246 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2247 		if (err) {
2248 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
2249 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2250 			if (err) {
2251 				printk(KERN_ERR
2252 				       "smc: error loading firmware \"%s\"\n",
2253 				       fw_name);
2254 				release_firmware(rdev->smc_fw);
2255 				rdev->smc_fw = NULL;
2256 				err = 0;
2257 			} else if (rdev->smc_fw->datasize != smc_req_size) {
2258 				printk(KERN_ERR
2259 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2260 				       rdev->smc_fw->datasize, fw_name);
2261 				err = -EINVAL;
2262 			}
2263 		} else {
2264 			err = radeon_ucode_validate(rdev->smc_fw);
2265 			if (err) {
2266 				printk(KERN_ERR
2267 				       "cik_fw: validation failed for firmware \"%s\"\n",
2268 				       fw_name);
2269 				goto out;
2270 			} else {
2271 				new_fw++;
2272 			}
2273 		}
2274 	}
2275 
2276 	if (new_fw == 0) {
2277 		rdev->new_fw = false;
2278 	} else if (new_fw < num_fw) {
2279 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2280 		err = -EINVAL;
2281 	} else {
2282 		rdev->new_fw = true;
2283 	}
2284 
2285 out:
2286 	if (err) {
2287 		if (err != -EINVAL)
2288 			printk(KERN_ERR
2289 			       "cik_cp: Failed to load firmware \"%s\"\n",
2290 			       fw_name);
2291 		release_firmware(rdev->pfp_fw);
2292 		rdev->pfp_fw = NULL;
2293 		release_firmware(rdev->me_fw);
2294 		rdev->me_fw = NULL;
2295 		release_firmware(rdev->ce_fw);
2296 		rdev->ce_fw = NULL;
2297 		release_firmware(rdev->mec_fw);
2298 		rdev->mec_fw = NULL;
2299 		release_firmware(rdev->mec2_fw);
2300 		rdev->mec2_fw = NULL;
2301 		release_firmware(rdev->rlc_fw);
2302 		rdev->rlc_fw = NULL;
2303 		release_firmware(rdev->sdma_fw);
2304 		rdev->sdma_fw = NULL;
2305 		release_firmware(rdev->mc_fw);
2306 		rdev->mc_fw = NULL;
2307 		release_firmware(rdev->smc_fw);
2308 		rdev->smc_fw = NULL;
2309 	}
2310 	return err;
2311 }
2312 
2313 /**
2314  * cik_fini_microcode - drop the firmwares image references
2315  *
2316  * @rdev: radeon_device pointer
2317  *
2318  * Drop the pfp, me, mec, mec2, rlc, sdma, mc, smc and ce firmware image references.
2319  * Called at driver shutdown.
2320  */
2321 static void cik_fini_microcode(struct radeon_device *rdev)
2322 {
2323 	release_firmware(rdev->pfp_fw);
2324 	rdev->pfp_fw = NULL;
2325 	release_firmware(rdev->me_fw);
2326 	rdev->me_fw = NULL;
2327 	release_firmware(rdev->ce_fw);
2328 	rdev->ce_fw = NULL;
2329 	release_firmware(rdev->mec_fw);
2330 	rdev->mec_fw = NULL;
2331 	release_firmware(rdev->mec2_fw);
2332 	rdev->mec2_fw = NULL;
2333 	release_firmware(rdev->rlc_fw);
2334 	rdev->rlc_fw = NULL;
2335 	release_firmware(rdev->sdma_fw);
2336 	rdev->sdma_fw = NULL;
2337 	release_firmware(rdev->mc_fw);
2338 	rdev->mc_fw = NULL;
2339 	release_firmware(rdev->smc_fw);
2340 	rdev->smc_fw = NULL;
2341 }
2342 
2343 /*
2344  * Core functions
2345  */
2346 /**
2347  * cik_tiling_mode_table_init - init the hw tiling table
2348  *
2349  * @rdev: radeon_device pointer
2350  *
2351  * Starting with SI, the tiling setup is done globally in a
2352  * set of 32 tiling modes.  Rather than selecting each set of
2353  * parameters per surface as on older asics, we just select
2354  * which index in the tiling table we want to use, and the
2355  * surface uses those parameters (CIK).
2356  */
2357 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2358 {
2359 	u32 *tile = rdev->config.cik.tile_mode_array;
2360 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2361 	const u32 num_tile_mode_states =
2362 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2363 	const u32 num_secondary_tile_mode_states =
2364 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2365 	u32 reg_offset, split_equal_to_row_size;
2366 	u32 num_pipe_configs;
2367 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2368 		rdev->config.cik.max_shader_engines;
2369 
2370 	switch (rdev->config.cik.mem_row_size_in_kb) {
2371 	case 1:
2372 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2373 		break;
2374 	case 2:
2375 	default:
2376 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2377 		break;
2378 	case 4:
2379 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2380 		break;
2381 	}
2382 
2383 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2384 	if (num_pipe_configs > 8)
2385 		num_pipe_configs = 16;
2386 
2387 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2388 		tile[reg_offset] = 0;
2389 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2390 		macrotile[reg_offset] = 0;
2391 
2392 	switch(num_pipe_configs) {
2393 	case 16:
2394 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2396 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2398 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2402 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2404 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2406 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2408 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2410 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2411 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2412 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 			   TILE_SPLIT(split_equal_to_row_size));
2414 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2415 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2418 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2419 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2421 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2422 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2423 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 			   TILE_SPLIT(split_equal_to_row_size));
2425 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2426 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2427 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2430 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2436 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2437 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2439 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2441 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2443 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2444 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2445 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2450 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2451 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2452 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2454 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2458 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2460 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2461 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2462 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2464 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2465 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2466 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2467 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2468 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2469 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2470 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2471 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472 
2473 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476 			   NUM_BANKS(ADDR_SURF_16_BANK));
2477 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 			   NUM_BANKS(ADDR_SURF_16_BANK));
2481 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484 			   NUM_BANKS(ADDR_SURF_16_BANK));
2485 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 			   NUM_BANKS(ADDR_SURF_16_BANK));
2489 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492 			   NUM_BANKS(ADDR_SURF_8_BANK));
2493 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496 			   NUM_BANKS(ADDR_SURF_4_BANK));
2497 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 			   NUM_BANKS(ADDR_SURF_2_BANK));
2501 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2503 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2504 			   NUM_BANKS(ADDR_SURF_16_BANK));
2505 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2507 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2508 			   NUM_BANKS(ADDR_SURF_16_BANK));
2509 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2511 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2512 			    NUM_BANKS(ADDR_SURF_16_BANK));
2513 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2515 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2516 			    NUM_BANKS(ADDR_SURF_8_BANK));
2517 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2520 			    NUM_BANKS(ADDR_SURF_4_BANK));
2521 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 			    NUM_BANKS(ADDR_SURF_2_BANK));
2525 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2527 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2528 			    NUM_BANKS(ADDR_SURF_2_BANK));
2529 
2530 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2531 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2532 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2533 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2534 		break;
2535 
2536 	case 8:
2537 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2539 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2541 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2545 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2547 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2549 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2553 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2555 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 			   TILE_SPLIT(split_equal_to_row_size));
2557 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2560 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2561 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2562 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2564 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2565 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 			   TILE_SPLIT(split_equal_to_row_size));
2568 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2569 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2570 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2571 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2573 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2575 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2577 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2578 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2579 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2580 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2581 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2582 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2583 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2586 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2588 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2590 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2592 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2593 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2594 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2595 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2597 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2598 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2600 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2601 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2603 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2605 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2607 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2608 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2609 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2610 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2611 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2612 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2615 
2616 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2618 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2619 				NUM_BANKS(ADDR_SURF_16_BANK));
2620 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2622 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2623 				NUM_BANKS(ADDR_SURF_16_BANK));
2624 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2626 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2627 				NUM_BANKS(ADDR_SURF_16_BANK));
2628 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2630 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631 				NUM_BANKS(ADDR_SURF_16_BANK));
2632 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2635 				NUM_BANKS(ADDR_SURF_8_BANK));
2636 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639 				NUM_BANKS(ADDR_SURF_4_BANK));
2640 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643 				NUM_BANKS(ADDR_SURF_2_BANK));
2644 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2646 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2647 				NUM_BANKS(ADDR_SURF_16_BANK));
2648 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2650 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2651 				NUM_BANKS(ADDR_SURF_16_BANK));
2652 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2654 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2655 				NUM_BANKS(ADDR_SURF_16_BANK));
2656 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2659 				NUM_BANKS(ADDR_SURF_16_BANK));
2660 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2663 				NUM_BANKS(ADDR_SURF_8_BANK));
2664 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2666 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2667 				NUM_BANKS(ADDR_SURF_4_BANK));
2668 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2670 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2671 				NUM_BANKS(ADDR_SURF_2_BANK));
2672 
2673 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2674 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2675 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2676 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2677 		break;
2678 
2679 	case 4:
2680 		if (num_rbs == 4) {
2681 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2683 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2685 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2687 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2689 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2691 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2693 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2695 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2697 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2699 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 			   TILE_SPLIT(split_equal_to_row_size));
2701 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2702 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2705 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2706 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2708 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2709 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2710 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 			   TILE_SPLIT(split_equal_to_row_size));
2712 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2713 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2714 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2715 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2717 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2719 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2721 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2722 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2723 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2724 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2725 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2726 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2727 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2730 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2732 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2737 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2741 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2744 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2747 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2748 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2749 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2751 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2753 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2755 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2756 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2757 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759 
2760 		} else if (num_rbs < 4) {
2761 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2763 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2764 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2765 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2767 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2769 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2771 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2773 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2777 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2778 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2779 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 			   TILE_SPLIT(split_equal_to_row_size));
2781 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2783 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2785 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2788 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2789 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2790 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			   TILE_SPLIT(split_equal_to_row_size));
2792 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2793 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2794 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2795 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2796 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2797 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2799 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2800 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2801 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2802 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2803 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2804 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2805 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2806 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2807 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2810 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2811 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2812 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2813 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2814 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2815 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2816 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2819 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2820 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2821 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2822 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2824 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2825 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2826 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2827 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2828 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2829 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2830 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2831 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2832 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2833 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2834 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2835 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2836 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2837 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2838 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2839 		}
2840 
2841 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2843 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844 				NUM_BANKS(ADDR_SURF_16_BANK));
2845 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2846 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2847 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848 				NUM_BANKS(ADDR_SURF_16_BANK));
2849 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852 				NUM_BANKS(ADDR_SURF_16_BANK));
2853 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2856 				NUM_BANKS(ADDR_SURF_16_BANK));
2857 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2860 				NUM_BANKS(ADDR_SURF_16_BANK));
2861 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864 				NUM_BANKS(ADDR_SURF_8_BANK));
2865 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2868 				NUM_BANKS(ADDR_SURF_4_BANK));
2869 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2870 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2871 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2872 				NUM_BANKS(ADDR_SURF_16_BANK));
2873 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2874 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2875 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2876 				NUM_BANKS(ADDR_SURF_16_BANK));
2877 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2879 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2880 				NUM_BANKS(ADDR_SURF_16_BANK));
2881 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2883 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2884 				NUM_BANKS(ADDR_SURF_16_BANK));
2885 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2887 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2888 				NUM_BANKS(ADDR_SURF_16_BANK));
2889 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2891 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2892 				NUM_BANKS(ADDR_SURF_8_BANK));
2893 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2895 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2896 				NUM_BANKS(ADDR_SURF_4_BANK));
2897 
2898 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2899 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2900 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2901 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2902 		break;
2903 
2904 	case 2:
2905 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2907 			   PIPE_CONFIG(ADDR_SURF_P2) |
2908 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2909 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2911 			   PIPE_CONFIG(ADDR_SURF_P2) |
2912 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2913 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2915 			   PIPE_CONFIG(ADDR_SURF_P2) |
2916 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2917 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2919 			   PIPE_CONFIG(ADDR_SURF_P2) |
2920 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2921 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2922 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2923 			   PIPE_CONFIG(ADDR_SURF_P2) |
2924 			   TILE_SPLIT(split_equal_to_row_size));
2925 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926 			   PIPE_CONFIG(ADDR_SURF_P2) |
2927 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2929 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2930 			   PIPE_CONFIG(ADDR_SURF_P2) |
2931 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2932 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2933 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934 			   PIPE_CONFIG(ADDR_SURF_P2) |
2935 			   TILE_SPLIT(split_equal_to_row_size));
2936 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2937 			   PIPE_CONFIG(ADDR_SURF_P2);
2938 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2939 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2940 			   PIPE_CONFIG(ADDR_SURF_P2));
2941 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943 			    PIPE_CONFIG(ADDR_SURF_P2) |
2944 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2946 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2947 			    PIPE_CONFIG(ADDR_SURF_P2) |
2948 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2950 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2951 			    PIPE_CONFIG(ADDR_SURF_P2) |
2952 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2953 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954 			    PIPE_CONFIG(ADDR_SURF_P2) |
2955 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2956 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2957 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958 			    PIPE_CONFIG(ADDR_SURF_P2) |
2959 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2961 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2962 			    PIPE_CONFIG(ADDR_SURF_P2) |
2963 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2965 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966 			    PIPE_CONFIG(ADDR_SURF_P2) |
2967 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2969 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2970 			    PIPE_CONFIG(ADDR_SURF_P2));
2971 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2972 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2973 			    PIPE_CONFIG(ADDR_SURF_P2) |
2974 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2975 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2976 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2977 			    PIPE_CONFIG(ADDR_SURF_P2) |
2978 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2979 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2980 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2981 			    PIPE_CONFIG(ADDR_SURF_P2) |
2982 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983 
2984 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2985 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2986 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2987 				NUM_BANKS(ADDR_SURF_16_BANK));
2988 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2989 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2990 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 				NUM_BANKS(ADDR_SURF_16_BANK));
2992 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2993 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2994 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 				NUM_BANKS(ADDR_SURF_16_BANK));
2996 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2997 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2998 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999 				NUM_BANKS(ADDR_SURF_16_BANK));
3000 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3002 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 				NUM_BANKS(ADDR_SURF_16_BANK));
3004 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3006 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007 				NUM_BANKS(ADDR_SURF_16_BANK));
3008 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011 				NUM_BANKS(ADDR_SURF_8_BANK));
3012 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3013 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3014 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3015 				NUM_BANKS(ADDR_SURF_16_BANK));
3016 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3017 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3018 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3019 				NUM_BANKS(ADDR_SURF_16_BANK));
3020 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3021 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3022 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023 				NUM_BANKS(ADDR_SURF_16_BANK));
3024 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3025 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3026 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 				NUM_BANKS(ADDR_SURF_16_BANK));
3028 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3030 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031 				NUM_BANKS(ADDR_SURF_16_BANK));
3032 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3033 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3034 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3035 				NUM_BANKS(ADDR_SURF_16_BANK));
3036 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3038 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3039 				NUM_BANKS(ADDR_SURF_8_BANK));
3040 
3041 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3042 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3043 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3044 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3045 		break;
3046 
3047 	default:
3048 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3049 	}
3050 }
3051 
3052 /**
3053  * cik_select_se_sh - select which SE, SH to address
3054  *
3055  * @rdev: radeon_device pointer
3056  * @se_num: shader engine to address
3057  * @sh_num: sh block to address
3058  *
3059  * Select which SE, SH combinations to address. Certain
3060  * registers are instanced per SE or SH.  0xffffffff means
3061  * broadcast to all SEs or SHs (CIK).
3062  */
3063 static void cik_select_se_sh(struct radeon_device *rdev,
3064 			     u32 se_num, u32 sh_num)
3065 {
3066 	u32 data = INSTANCE_BROADCAST_WRITES;
3067 
3068 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3069 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3070 	else if (se_num == 0xffffffff)
3071 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3072 	else if (sh_num == 0xffffffff)
3073 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3074 	else
3075 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3076 	WREG32(GRBM_GFX_INDEX, data);
3077 }
3078 
3079 /**
3080  * cik_create_bitmask - create a bitmask
3081  *
3082  * @bit_width: length of the mask
3083  *
3084  * create a variable length bit mask (CIK).
3085  * Returns the bitmask.
3086  */
3087 static u32 cik_create_bitmask(u32 bit_width)
3088 {
3089 	u32 i, mask = 0;
3090 
3091 	for (i = 0; i < bit_width; i++) {
3092 		mask <<= 1;
3093 		mask |= 1;
3094 	}
3095 	return mask;
3096 }
3097 
3098 /**
3099  * cik_get_rb_disabled - computes the mask of disabled RBs
3100  *
3101  * @rdev: radeon_device pointer
3102  * @max_rb_num: max RBs (render backends) for the asic
3103  * @se_num: number of SEs (shader engines) for the asic
3104  * @sh_per_se: number of SH blocks per SE for the asic
3105  *
3106  * Calculates the bitmask of disabled RBs (CIK).
3107  * Returns the disabled RB bitmask.
3108  */
3109 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3110 			      u32 max_rb_num_per_se,
3111 			      u32 sh_per_se)
3112 {
3113 	u32 data, mask;
3114 
3115 	data = RREG32(CC_RB_BACKEND_DISABLE);
3116 	if (data & 1)
3117 		data &= BACKEND_DISABLE_MASK;
3118 	else
3119 		data = 0;
3120 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3121 
3122 	data >>= BACKEND_DISABLE_SHIFT;
3123 
3124 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3125 
3126 	return data & mask;
3127 }
3128 
3129 /**
3130  * cik_setup_rb - setup the RBs on the asic
3131  *
3132  * @rdev: radeon_device pointer
3133  * @se_num: number of SEs (shader engines) for the asic
3134  * @sh_per_se: number of SH blocks per SE for the asic
3135  * @max_rb_num: max RBs (render backends) for the asic
3136  *
3137  * Configures per-SE/SH RB registers (CIK).
3138  */
3139 static void cik_setup_rb(struct radeon_device *rdev,
3140 			 u32 se_num, u32 sh_per_se,
3141 			 u32 max_rb_num_per_se)
3142 {
3143 	int i, j;
3144 	u32 data, mask;
3145 	u32 disabled_rbs = 0;
3146 	u32 enabled_rbs = 0;
3147 
3148 	mutex_lock(&rdev->grbm_idx_mutex);
3149 	for (i = 0; i < se_num; i++) {
3150 		for (j = 0; j < sh_per_se; j++) {
3151 			cik_select_se_sh(rdev, i, j);
3152 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3153 			if (rdev->family == CHIP_HAWAII)
3154 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3155 			else
3156 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3157 		}
3158 	}
3159 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3160 	mutex_unlock(&rdev->grbm_idx_mutex);
3161 
3162 	mask = 1;
3163 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3164 		if (!(disabled_rbs & mask))
3165 			enabled_rbs |= mask;
3166 		mask <<= 1;
3167 	}
3168 
3169 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3170 
3171 	mutex_lock(&rdev->grbm_idx_mutex);
3172 	for (i = 0; i < se_num; i++) {
3173 		cik_select_se_sh(rdev, i, 0xffffffff);
3174 		data = 0;
3175 		for (j = 0; j < sh_per_se; j++) {
3176 			switch (enabled_rbs & 3) {
3177 			case 0:
3178 				if (j == 0)
3179 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3180 				else
3181 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3182 				break;
3183 			case 1:
3184 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3185 				break;
3186 			case 2:
3187 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3188 				break;
3189 			case 3:
3190 			default:
3191 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3192 				break;
3193 			}
3194 			enabled_rbs >>= 2;
3195 		}
3196 		WREG32(PA_SC_RASTER_CONFIG, data);
3197 	}
3198 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3199 	mutex_unlock(&rdev->grbm_idx_mutex);
3200 }
3201 
3202 /**
3203  * cik_gpu_init - setup the 3D engine
3204  *
3205  * @rdev: radeon_device pointer
3206  *
3207  * Configures the 3D engine and tiling configuration
3208  * registers so that the 3D engine is usable.
3209  */
3210 static void cik_gpu_init(struct radeon_device *rdev)
3211 {
3212 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3213 	u32 mc_shared_chmap, mc_arb_ramcfg;
3214 	u32 hdp_host_path_cntl;
3215 	u32 tmp;
3216 	int i, j;
3217 
3218 	switch (rdev->family) {
3219 	case CHIP_BONAIRE:
3220 		rdev->config.cik.max_shader_engines = 2;
3221 		rdev->config.cik.max_tile_pipes = 4;
3222 		rdev->config.cik.max_cu_per_sh = 7;
3223 		rdev->config.cik.max_sh_per_se = 1;
3224 		rdev->config.cik.max_backends_per_se = 2;
3225 		rdev->config.cik.max_texture_channel_caches = 4;
3226 		rdev->config.cik.max_gprs = 256;
3227 		rdev->config.cik.max_gs_threads = 32;
3228 		rdev->config.cik.max_hw_contexts = 8;
3229 
3230 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3231 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3232 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3233 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3234 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3235 		break;
3236 	case CHIP_HAWAII:
3237 		rdev->config.cik.max_shader_engines = 4;
3238 		rdev->config.cik.max_tile_pipes = 16;
3239 		rdev->config.cik.max_cu_per_sh = 11;
3240 		rdev->config.cik.max_sh_per_se = 1;
3241 		rdev->config.cik.max_backends_per_se = 4;
3242 		rdev->config.cik.max_texture_channel_caches = 16;
3243 		rdev->config.cik.max_gprs = 256;
3244 		rdev->config.cik.max_gs_threads = 32;
3245 		rdev->config.cik.max_hw_contexts = 8;
3246 
3247 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3248 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3249 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3250 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3251 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3252 		break;
3253 	case CHIP_KAVERI:
3254 		rdev->config.cik.max_shader_engines = 1;
3255 		rdev->config.cik.max_tile_pipes = 4;
3256 		rdev->config.cik.max_cu_per_sh = 8;
3257 		rdev->config.cik.max_backends_per_se = 2;
3258 		rdev->config.cik.max_sh_per_se = 1;
3259 		rdev->config.cik.max_texture_channel_caches = 4;
3260 		rdev->config.cik.max_gprs = 256;
3261 		rdev->config.cik.max_gs_threads = 16;
3262 		rdev->config.cik.max_hw_contexts = 8;
3263 
3264 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3265 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3266 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3267 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3268 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3269 		break;
3270 	case CHIP_KABINI:
3271 	case CHIP_MULLINS:
3272 	default:
3273 		rdev->config.cik.max_shader_engines = 1;
3274 		rdev->config.cik.max_tile_pipes = 2;
3275 		rdev->config.cik.max_cu_per_sh = 2;
3276 		rdev->config.cik.max_sh_per_se = 1;
3277 		rdev->config.cik.max_backends_per_se = 1;
3278 		rdev->config.cik.max_texture_channel_caches = 2;
3279 		rdev->config.cik.max_gprs = 256;
3280 		rdev->config.cik.max_gs_threads = 16;
3281 		rdev->config.cik.max_hw_contexts = 8;
3282 
3283 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3284 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3285 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3286 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3287 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3288 		break;
3289 	}
3290 
3291 	/* Initialize HDP */
3292 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3293 		WREG32((0x2c14 + j), 0x00000000);
3294 		WREG32((0x2c18 + j), 0x00000000);
3295 		WREG32((0x2c1c + j), 0x00000000);
3296 		WREG32((0x2c20 + j), 0x00000000);
3297 		WREG32((0x2c24 + j), 0x00000000);
3298 	}
3299 
3300 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3301 	WREG32(SRBM_INT_CNTL, 0x1);
3302 	WREG32(SRBM_INT_ACK, 0x1);
3303 
3304 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3305 
3306 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3307 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3308 
3309 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3310 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3311 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3312 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3313 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3314 		rdev->config.cik.mem_row_size_in_kb = 4;
3315 	/* XXX use MC settings? */
3316 	rdev->config.cik.shader_engine_tile_size = 32;
3317 	rdev->config.cik.num_gpus = 1;
3318 	rdev->config.cik.multi_gpu_tile_size = 64;
3319 
3320 	/* fix up row size */
3321 	gb_addr_config &= ~ROW_SIZE_MASK;
3322 	switch (rdev->config.cik.mem_row_size_in_kb) {
3323 	case 1:
3324 	default:
3325 		gb_addr_config |= ROW_SIZE(0);
3326 		break;
3327 	case 2:
3328 		gb_addr_config |= ROW_SIZE(1);
3329 		break;
3330 	case 4:
3331 		gb_addr_config |= ROW_SIZE(2);
3332 		break;
3333 	}
3334 
3335 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3336 	 * not have bank info, so create a custom tiling dword.
3337 	 * bits 3:0   num_pipes
3338 	 * bits 7:4   num_banks
3339 	 * bits 11:8  group_size
3340 	 * bits 15:12 row_size
3341 	 */
3342 	rdev->config.cik.tile_config = 0;
3343 	switch (rdev->config.cik.num_tile_pipes) {
3344 	case 1:
3345 		rdev->config.cik.tile_config |= (0 << 0);
3346 		break;
3347 	case 2:
3348 		rdev->config.cik.tile_config |= (1 << 0);
3349 		break;
3350 	case 4:
3351 		rdev->config.cik.tile_config |= (2 << 0);
3352 		break;
3353 	case 8:
3354 	default:
3355 		/* XXX what about 12? */
3356 		rdev->config.cik.tile_config |= (3 << 0);
3357 		break;
3358 	}
3359 	rdev->config.cik.tile_config |=
3360 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3361 	rdev->config.cik.tile_config |=
3362 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3363 	rdev->config.cik.tile_config |=
3364 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3365 
3366 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3367 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3368 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3369 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3370 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3371 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3372 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3373 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3374 
3375 	cik_tiling_mode_table_init(rdev);
3376 
3377 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3378 		     rdev->config.cik.max_sh_per_se,
3379 		     rdev->config.cik.max_backends_per_se);
3380 
3381 	rdev->config.cik.active_cus = 0;
3382 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3383 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3384 			rdev->config.cik.active_cus +=
3385 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3386 		}
3387 	}
3388 
3389 	/* set HW defaults for 3D engine */
3390 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3391 
3392 	mutex_lock(&rdev->grbm_idx_mutex);
3393 	/*
3394 	 * making sure that the following register writes will be broadcasted
3395 	 * to all the shaders
3396 	 */
3397 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3398 	WREG32(SX_DEBUG_1, 0x20);
3399 
3400 	WREG32(TA_CNTL_AUX, 0x00010000);
3401 
3402 	tmp = RREG32(SPI_CONFIG_CNTL);
3403 	tmp |= 0x03000000;
3404 	WREG32(SPI_CONFIG_CNTL, tmp);
3405 
3406 	WREG32(SQ_CONFIG, 1);
3407 
3408 	WREG32(DB_DEBUG, 0);
3409 
3410 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3411 	tmp |= 0x00000400;
3412 	WREG32(DB_DEBUG2, tmp);
3413 
3414 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3415 	tmp |= 0x00020200;
3416 	WREG32(DB_DEBUG3, tmp);
3417 
3418 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3419 	tmp |= 0x00018208;
3420 	WREG32(CB_HW_CONTROL, tmp);
3421 
3422 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3423 
3424 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3425 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3426 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3427 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3428 
3429 	WREG32(VGT_NUM_INSTANCES, 1);
3430 
3431 	WREG32(CP_PERFMON_CNTL, 0);
3432 
3433 	WREG32(SQ_CONFIG, 0);
3434 
3435 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3436 					  FORCE_EOV_MAX_REZ_CNT(255)));
3437 
3438 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3439 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3440 
3441 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3442 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3443 
3444 	tmp = RREG32(HDP_MISC_CNTL);
3445 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3446 	WREG32(HDP_MISC_CNTL, tmp);
3447 
3448 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3449 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3450 
3451 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3452 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3453 	mutex_unlock(&rdev->grbm_idx_mutex);
3454 
3455 	udelay(50);
3456 }
3457 
3458 /*
3459  * GPU scratch registers helpers function.
3460  */
3461 /**
3462  * cik_scratch_init - setup driver info for CP scratch regs
3463  *
3464  * @rdev: radeon_device pointer
3465  *
3466  * Set up the number and offset of the CP scratch registers.
3467  * NOTE: use of CP scratch registers is a legacy inferface and
3468  * is not used by default on newer asics (r6xx+).  On newer asics,
3469  * memory buffers are used for fences rather than scratch regs.
3470  */
3471 static void cik_scratch_init(struct radeon_device *rdev)
3472 {
3473 	int i;
3474 
3475 	rdev->scratch.num_reg = 7;
3476 	rdev->scratch.reg_base = SCRATCH_REG0;
3477 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3478 		rdev->scratch.free[i] = true;
3479 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3480 	}
3481 }
3482 
3483 /**
3484  * cik_ring_test - basic gfx ring test
3485  *
3486  * @rdev: radeon_device pointer
3487  * @ring: radeon_ring structure holding ring information
3488  *
3489  * Allocate a scratch register and write to it using the gfx ring (CIK).
3490  * Provides a basic gfx ring test to verify that the ring is working.
3491  * Used by cik_cp_gfx_resume();
3492  * Returns 0 on success, error on failure.
3493  */
3494 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3495 {
3496 	uint32_t scratch;
3497 	uint32_t tmp = 0;
3498 	unsigned i;
3499 	int r;
3500 
3501 	r = radeon_scratch_get(rdev, &scratch);
3502 	if (r) {
3503 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3504 		return r;
3505 	}
3506 	WREG32(scratch, 0xCAFEDEAD);
3507 	r = radeon_ring_lock(rdev, ring, 3);
3508 	if (r) {
3509 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3510 		radeon_scratch_free(rdev, scratch);
3511 		return r;
3512 	}
3513 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3514 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3515 	radeon_ring_write(ring, 0xDEADBEEF);
3516 	radeon_ring_unlock_commit(rdev, ring, false);
3517 
3518 	for (i = 0; i < rdev->usec_timeout; i++) {
3519 		tmp = RREG32(scratch);
3520 		if (tmp == 0xDEADBEEF)
3521 			break;
3522 		DRM_UDELAY(1);
3523 	}
3524 	if (i < rdev->usec_timeout) {
3525 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3526 	} else {
3527 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3528 			  ring->idx, scratch, tmp);
3529 		r = -EINVAL;
3530 	}
3531 	radeon_scratch_free(rdev, scratch);
3532 	return r;
3533 }
3534 
3535 /**
3536  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3537  *
3538  * @rdev: radeon_device pointer
3539  * @ridx: radeon ring index
3540  *
3541  * Emits an hdp flush on the cp.
3542  */
3543 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3544 				       int ridx)
3545 {
3546 	struct radeon_ring *ring = &rdev->ring[ridx];
3547 	u32 ref_and_mask;
3548 
3549 	switch (ring->idx) {
3550 	case CAYMAN_RING_TYPE_CP1_INDEX:
3551 	case CAYMAN_RING_TYPE_CP2_INDEX:
3552 	default:
3553 		switch (ring->me) {
3554 		case 0:
3555 			ref_and_mask = CP2 << ring->pipe;
3556 			break;
3557 		case 1:
3558 			ref_and_mask = CP6 << ring->pipe;
3559 			break;
3560 		default:
3561 			return;
3562 		}
3563 		break;
3564 	case RADEON_RING_TYPE_GFX_INDEX:
3565 		ref_and_mask = CP0;
3566 		break;
3567 	}
3568 
3569 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3570 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3571 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3572 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3573 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3574 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3575 	radeon_ring_write(ring, ref_and_mask);
3576 	radeon_ring_write(ring, ref_and_mask);
3577 	radeon_ring_write(ring, 0x20); /* poll interval */
3578 }
3579 
3580 /**
3581  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3582  *
3583  * @rdev: radeon_device pointer
3584  * @fence: radeon fence object
3585  *
3586  * Emits a fence sequnce number on the gfx ring and flushes
3587  * GPU caches.
3588  */
3589 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3590 			     struct radeon_fence *fence)
3591 {
3592 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3593 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3594 
3595 	/* Workaround for cache flush problems. First send a dummy EOP
3596 	 * event down the pipe with seq one below.
3597 	 */
3598 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3599 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3600 				 EOP_TC_ACTION_EN |
3601 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3602 				 EVENT_INDEX(5)));
3603 	radeon_ring_write(ring, addr & 0xfffffffc);
3604 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3605 				DATA_SEL(1) | INT_SEL(0));
3606 	radeon_ring_write(ring, fence->seq - 1);
3607 	radeon_ring_write(ring, 0);
3608 
3609 	/* Then send the real EOP event down the pipe. */
3610 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3611 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3612 				 EOP_TC_ACTION_EN |
3613 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3614 				 EVENT_INDEX(5)));
3615 	radeon_ring_write(ring, addr & 0xfffffffc);
3616 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3617 	radeon_ring_write(ring, fence->seq);
3618 	radeon_ring_write(ring, 0);
3619 }
3620 
3621 /**
3622  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3623  *
3624  * @rdev: radeon_device pointer
3625  * @fence: radeon fence object
3626  *
3627  * Emits a fence sequnce number on the compute ring and flushes
3628  * GPU caches.
3629  */
3630 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3631 				 struct radeon_fence *fence)
3632 {
3633 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3634 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3635 
3636 	/* RELEASE_MEM - flush caches, send int */
3637 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3638 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3639 				 EOP_TC_ACTION_EN |
3640 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3641 				 EVENT_INDEX(5)));
3642 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3643 	radeon_ring_write(ring, addr & 0xfffffffc);
3644 	radeon_ring_write(ring, upper_32_bits(addr));
3645 	radeon_ring_write(ring, fence->seq);
3646 	radeon_ring_write(ring, 0);
3647 }
3648 
3649 /**
3650  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3651  *
3652  * @rdev: radeon_device pointer
3653  * @ring: radeon ring buffer object
3654  * @semaphore: radeon semaphore object
3655  * @emit_wait: Is this a sempahore wait?
3656  *
3657  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3658  * from running ahead of semaphore waits.
3659  */
3660 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3661 			     struct radeon_ring *ring,
3662 			     struct radeon_semaphore *semaphore,
3663 			     bool emit_wait)
3664 {
3665 	uint64_t addr = semaphore->gpu_addr;
3666 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3667 
3668 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3669 	radeon_ring_write(ring, lower_32_bits(addr));
3670 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3671 
3672 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3673 		/* Prevent the PFP from running ahead of the semaphore wait */
3674 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3675 		radeon_ring_write(ring, 0x0);
3676 	}
3677 
3678 	return true;
3679 }
3680 
3681 /**
3682  * cik_copy_cpdma - copy pages using the CP DMA engine
3683  *
3684  * @rdev: radeon_device pointer
3685  * @src_offset: src GPU address
3686  * @dst_offset: dst GPU address
3687  * @num_gpu_pages: number of GPU pages to xfer
3688  * @resv: reservation object to sync to
3689  *
3690  * Copy GPU paging using the CP DMA engine (CIK+).
3691  * Used by the radeon ttm implementation to move pages if
3692  * registered as the asic copy callback.
3693  */
3694 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3695 				    uint64_t src_offset, uint64_t dst_offset,
3696 				    unsigned num_gpu_pages,
3697 				    struct reservation_object *resv)
3698 {
3699 	struct radeon_fence *fence;
3700 	struct radeon_sync sync;
3701 	int ring_index = rdev->asic->copy.blit_ring_index;
3702 	struct radeon_ring *ring = &rdev->ring[ring_index];
3703 	u32 size_in_bytes, cur_size_in_bytes, control;
3704 	int i, num_loops;
3705 	int r = 0;
3706 
3707 	radeon_sync_create(&sync);
3708 
3709 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3710 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3711 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3712 	if (r) {
3713 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3714 		radeon_sync_free(rdev, &sync, NULL);
3715 		return ERR_PTR(r);
3716 	}
3717 
3718 	radeon_sync_resv(rdev, &sync, resv, false);
3719 	radeon_sync_rings(rdev, &sync, ring->idx);
3720 
3721 	for (i = 0; i < num_loops; i++) {
3722 		cur_size_in_bytes = size_in_bytes;
3723 		if (cur_size_in_bytes > 0x1fffff)
3724 			cur_size_in_bytes = 0x1fffff;
3725 		size_in_bytes -= cur_size_in_bytes;
3726 		control = 0;
3727 		if (size_in_bytes == 0)
3728 			control |= PACKET3_DMA_DATA_CP_SYNC;
3729 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3730 		radeon_ring_write(ring, control);
3731 		radeon_ring_write(ring, lower_32_bits(src_offset));
3732 		radeon_ring_write(ring, upper_32_bits(src_offset));
3733 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3734 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3735 		radeon_ring_write(ring, cur_size_in_bytes);
3736 		src_offset += cur_size_in_bytes;
3737 		dst_offset += cur_size_in_bytes;
3738 	}
3739 
3740 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3741 	if (r) {
3742 		radeon_ring_unlock_undo(rdev, ring);
3743 		radeon_sync_free(rdev, &sync, NULL);
3744 		return ERR_PTR(r);
3745 	}
3746 
3747 	radeon_ring_unlock_commit(rdev, ring, false);
3748 	radeon_sync_free(rdev, &sync, fence);
3749 
3750 	return fence;
3751 }
3752 
3753 /*
3754  * IB stuff
3755  */
3756 /**
3757  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3758  *
3759  * @rdev: radeon_device pointer
3760  * @ib: radeon indirect buffer object
3761  *
3762  * Emits a DE (drawing engine) or CE (constant engine) IB
3763  * on the gfx ring.  IBs are usually generated by userspace
3764  * acceleration drivers and submitted to the kernel for
3765  * scheduling on the ring.  This function schedules the IB
3766  * on the gfx ring for execution by the GPU.
3767  */
3768 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3769 {
3770 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3771 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3772 	u32 header, control = INDIRECT_BUFFER_VALID;
3773 
3774 	if (ib->is_const_ib) {
3775 		/* set switch buffer packet before const IB */
3776 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3777 		radeon_ring_write(ring, 0);
3778 
3779 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3780 	} else {
3781 		u32 next_rptr;
3782 		if (ring->rptr_save_reg) {
3783 			next_rptr = ring->wptr + 3 + 4;
3784 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3785 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3786 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3787 			radeon_ring_write(ring, next_rptr);
3788 		} else if (rdev->wb.enabled) {
3789 			next_rptr = ring->wptr + 5 + 4;
3790 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3791 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3792 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3793 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3794 			radeon_ring_write(ring, next_rptr);
3795 		}
3796 
3797 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3798 	}
3799 
3800 	control |= ib->length_dw | (vm_id << 24);
3801 
3802 	radeon_ring_write(ring, header);
3803 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3804 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3805 	radeon_ring_write(ring, control);
3806 }
3807 
3808 /**
3809  * cik_ib_test - basic gfx ring IB test
3810  *
3811  * @rdev: radeon_device pointer
3812  * @ring: radeon_ring structure holding ring information
3813  *
3814  * Allocate an IB and execute it on the gfx ring (CIK).
3815  * Provides a basic gfx ring test to verify that IBs are working.
3816  * Returns 0 on success, error on failure.
3817  */
3818 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3819 {
3820 	struct radeon_ib ib;
3821 	uint32_t scratch;
3822 	uint32_t tmp = 0;
3823 	unsigned i;
3824 	int r;
3825 
3826 	r = radeon_scratch_get(rdev, &scratch);
3827 	if (r) {
3828 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3829 		return r;
3830 	}
3831 	WREG32(scratch, 0xCAFEDEAD);
3832 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3833 	if (r) {
3834 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3835 		radeon_scratch_free(rdev, scratch);
3836 		return r;
3837 	}
3838 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3839 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3840 	ib.ptr[2] = 0xDEADBEEF;
3841 	ib.length_dw = 3;
3842 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3843 	if (r) {
3844 		radeon_scratch_free(rdev, scratch);
3845 		radeon_ib_free(rdev, &ib);
3846 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3847 		return r;
3848 	}
3849 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3850 		RADEON_USEC_IB_TEST_TIMEOUT));
3851 	if (r < 0) {
3852 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3853 		radeon_scratch_free(rdev, scratch);
3854 		radeon_ib_free(rdev, &ib);
3855 		return r;
3856 	} else if (r == 0) {
3857 		DRM_ERROR("radeon: fence wait timed out.\n");
3858 		radeon_scratch_free(rdev, scratch);
3859 		radeon_ib_free(rdev, &ib);
3860 		return -ETIMEDOUT;
3861 	}
3862 	r = 0;
3863 	for (i = 0; i < rdev->usec_timeout; i++) {
3864 		tmp = RREG32(scratch);
3865 		if (tmp == 0xDEADBEEF)
3866 			break;
3867 		DRM_UDELAY(1);
3868 	}
3869 	if (i < rdev->usec_timeout) {
3870 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3871 	} else {
3872 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3873 			  scratch, tmp);
3874 		r = -EINVAL;
3875 	}
3876 	radeon_scratch_free(rdev, scratch);
3877 	radeon_ib_free(rdev, &ib);
3878 	return r;
3879 }
3880 
3881 /*
3882  * CP.
3883  * On CIK, gfx and compute now have independant command processors.
3884  *
3885  * GFX
3886  * Gfx consists of a single ring and can process both gfx jobs and
3887  * compute jobs.  The gfx CP consists of three microengines (ME):
3888  * PFP - Pre-Fetch Parser
3889  * ME - Micro Engine
3890  * CE - Constant Engine
3891  * The PFP and ME make up what is considered the Drawing Engine (DE).
3892  * The CE is an asynchronous engine used for updating buffer desciptors
3893  * used by the DE so that they can be loaded into cache in parallel
3894  * while the DE is processing state update packets.
3895  *
3896  * Compute
3897  * The compute CP consists of two microengines (ME):
3898  * MEC1 - Compute MicroEngine 1
3899  * MEC2 - Compute MicroEngine 2
3900  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3901  * The queues are exposed to userspace and are programmed directly
3902  * by the compute runtime.
3903  */
3904 /**
3905  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3906  *
3907  * @rdev: radeon_device pointer
3908  * @enable: enable or disable the MEs
3909  *
3910  * Halts or unhalts the gfx MEs.
3911  */
3912 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3913 {
3914 	if (enable)
3915 		WREG32(CP_ME_CNTL, 0);
3916 	else {
3917 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3918 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3919 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3920 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3921 	}
3922 	udelay(50);
3923 }
3924 
3925 /**
3926  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3927  *
3928  * @rdev: radeon_device pointer
3929  *
3930  * Loads the gfx PFP, ME, and CE ucode.
3931  * Returns 0 for success, -EINVAL if the ucode is not available.
3932  */
3933 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3934 {
3935 	int i;
3936 
3937 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3938 		return -EINVAL;
3939 
3940 	cik_cp_gfx_enable(rdev, false);
3941 
3942 	if (rdev->new_fw) {
3943 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3944 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3945 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3946 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3947 		const struct gfx_firmware_header_v1_0 *me_hdr =
3948 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3949 		const __le32 *fw_data;
3950 		u32 fw_size;
3951 
3952 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3953 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3954 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3955 
3956 		/* PFP */
3957 		fw_data = (const __le32 *)
3958 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3959 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3960 		WREG32(CP_PFP_UCODE_ADDR, 0);
3961 		for (i = 0; i < fw_size; i++)
3962 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3963 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3964 
3965 		/* CE */
3966 		fw_data = (const __le32 *)
3967 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3968 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3969 		WREG32(CP_CE_UCODE_ADDR, 0);
3970 		for (i = 0; i < fw_size; i++)
3971 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3972 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3973 
3974 		/* ME */
3975 		fw_data = (const __be32 *)
3976 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3977 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3978 		WREG32(CP_ME_RAM_WADDR, 0);
3979 		for (i = 0; i < fw_size; i++)
3980 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3981 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3982 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3983 	} else {
3984 		const __be32 *fw_data;
3985 
3986 		/* PFP */
3987 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3988 		WREG32(CP_PFP_UCODE_ADDR, 0);
3989 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3990 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3991 		WREG32(CP_PFP_UCODE_ADDR, 0);
3992 
3993 		/* CE */
3994 		fw_data = (const __be32 *)rdev->ce_fw->data;
3995 		WREG32(CP_CE_UCODE_ADDR, 0);
3996 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3997 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3998 		WREG32(CP_CE_UCODE_ADDR, 0);
3999 
4000 		/* ME */
4001 		fw_data = (const __be32 *)rdev->me_fw->data;
4002 		WREG32(CP_ME_RAM_WADDR, 0);
4003 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4004 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4005 		WREG32(CP_ME_RAM_WADDR, 0);
4006 	}
4007 
4008 	return 0;
4009 }
4010 
4011 /**
4012  * cik_cp_gfx_start - start the gfx ring
4013  *
4014  * @rdev: radeon_device pointer
4015  *
4016  * Enables the ring and loads the clear state context and other
4017  * packets required to init the ring.
4018  * Returns 0 for success, error for failure.
4019  */
4020 static int cik_cp_gfx_start(struct radeon_device *rdev)
4021 {
4022 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4023 	int r, i;
4024 
4025 	/* init the CP */
4026 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4027 	WREG32(CP_ENDIAN_SWAP, 0);
4028 	WREG32(CP_DEVICE_ID, 1);
4029 
4030 	cik_cp_gfx_enable(rdev, true);
4031 
4032 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4033 	if (r) {
4034 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4035 		return r;
4036 	}
4037 
4038 	/* init the CE partitions.  CE only used for gfx on CIK */
4039 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4040 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4041 	radeon_ring_write(ring, 0x8000);
4042 	radeon_ring_write(ring, 0x8000);
4043 
4044 	/* setup clear context state */
4045 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4046 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4047 
4048 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4049 	radeon_ring_write(ring, 0x80000000);
4050 	radeon_ring_write(ring, 0x80000000);
4051 
4052 	for (i = 0; i < cik_default_size; i++)
4053 		radeon_ring_write(ring, cik_default_state[i]);
4054 
4055 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4056 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4057 
4058 	/* set clear context state */
4059 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4060 	radeon_ring_write(ring, 0);
4061 
4062 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4063 	radeon_ring_write(ring, 0x00000316);
4064 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4065 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4066 
4067 	radeon_ring_unlock_commit(rdev, ring, false);
4068 
4069 	return 0;
4070 }
4071 
4072 /**
4073  * cik_cp_gfx_fini - stop the gfx ring
4074  *
4075  * @rdev: radeon_device pointer
4076  *
4077  * Stop the gfx ring and tear down the driver ring
4078  * info.
4079  */
4080 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4081 {
4082 	cik_cp_gfx_enable(rdev, false);
4083 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4084 }
4085 
4086 /**
4087  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4088  *
4089  * @rdev: radeon_device pointer
4090  *
4091  * Program the location and size of the gfx ring buffer
4092  * and test it to make sure it's working.
4093  * Returns 0 for success, error for failure.
4094  */
4095 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4096 {
4097 	struct radeon_ring *ring;
4098 	u32 tmp;
4099 	u32 rb_bufsz;
4100 	u64 rb_addr;
4101 	int r;
4102 
4103 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4104 	if (rdev->family != CHIP_HAWAII)
4105 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4106 
4107 	/* Set the write pointer delay */
4108 	WREG32(CP_RB_WPTR_DELAY, 0);
4109 
4110 	/* set the RB to use vmid 0 */
4111 	WREG32(CP_RB_VMID, 0);
4112 
4113 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4114 
4115 	/* ring 0 - compute and gfx */
4116 	/* Set ring buffer size */
4117 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4118 	rb_bufsz = order_base_2(ring->ring_size / 8);
4119 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4120 #ifdef __BIG_ENDIAN
4121 	tmp |= BUF_SWAP_32BIT;
4122 #endif
4123 	WREG32(CP_RB0_CNTL, tmp);
4124 
4125 	/* Initialize the ring buffer's read and write pointers */
4126 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4127 	ring->wptr = 0;
4128 	WREG32(CP_RB0_WPTR, ring->wptr);
4129 
4130 	/* set the wb address wether it's enabled or not */
4131 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4132 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4133 
4134 	/* scratch register shadowing is no longer supported */
4135 	WREG32(SCRATCH_UMSK, 0);
4136 
4137 	if (!rdev->wb.enabled)
4138 		tmp |= RB_NO_UPDATE;
4139 
4140 	mdelay(1);
4141 	WREG32(CP_RB0_CNTL, tmp);
4142 
4143 	rb_addr = ring->gpu_addr >> 8;
4144 	WREG32(CP_RB0_BASE, rb_addr);
4145 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4146 
4147 	/* start the ring */
4148 	cik_cp_gfx_start(rdev);
4149 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4150 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4151 	if (r) {
4152 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4153 		return r;
4154 	}
4155 
4156 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4157 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4158 
4159 	return 0;
4160 }
4161 
4162 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4163 		     struct radeon_ring *ring)
4164 {
4165 	u32 rptr;
4166 
4167 	if (rdev->wb.enabled)
4168 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4169 	else
4170 		rptr = RREG32(CP_RB0_RPTR);
4171 
4172 	return rptr;
4173 }
4174 
4175 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4176 		     struct radeon_ring *ring)
4177 {
4178 	return RREG32(CP_RB0_WPTR);
4179 }
4180 
4181 void cik_gfx_set_wptr(struct radeon_device *rdev,
4182 		      struct radeon_ring *ring)
4183 {
4184 	WREG32(CP_RB0_WPTR, ring->wptr);
4185 	(void)RREG32(CP_RB0_WPTR);
4186 }
4187 
4188 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4189 			 struct radeon_ring *ring)
4190 {
4191 	u32 rptr;
4192 
4193 	if (rdev->wb.enabled) {
4194 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4195 	} else {
4196 		mutex_lock(&rdev->srbm_mutex);
4197 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4198 		rptr = RREG32(CP_HQD_PQ_RPTR);
4199 		cik_srbm_select(rdev, 0, 0, 0, 0);
4200 		mutex_unlock(&rdev->srbm_mutex);
4201 	}
4202 
4203 	return rptr;
4204 }
4205 
4206 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4207 			 struct radeon_ring *ring)
4208 {
4209 	u32 wptr;
4210 
4211 	if (rdev->wb.enabled) {
4212 		/* XXX check if swapping is necessary on BE */
4213 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4214 	} else {
4215 		mutex_lock(&rdev->srbm_mutex);
4216 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4217 		wptr = RREG32(CP_HQD_PQ_WPTR);
4218 		cik_srbm_select(rdev, 0, 0, 0, 0);
4219 		mutex_unlock(&rdev->srbm_mutex);
4220 	}
4221 
4222 	return wptr;
4223 }
4224 
4225 void cik_compute_set_wptr(struct radeon_device *rdev,
4226 			  struct radeon_ring *ring)
4227 {
4228 	/* XXX check if swapping is necessary on BE */
4229 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4230 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4231 }
4232 
4233 static void cik_compute_stop(struct radeon_device *rdev,
4234 			     struct radeon_ring *ring)
4235 {
4236 	u32 j, tmp;
4237 
4238 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4239 	/* Disable wptr polling. */
4240 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4241 	tmp &= ~WPTR_POLL_EN;
4242 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4243 	/* Disable HQD. */
4244 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4245 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4246 		for (j = 0; j < rdev->usec_timeout; j++) {
4247 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4248 				break;
4249 			udelay(1);
4250 		}
4251 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4252 		WREG32(CP_HQD_PQ_RPTR, 0);
4253 		WREG32(CP_HQD_PQ_WPTR, 0);
4254 	}
4255 	cik_srbm_select(rdev, 0, 0, 0, 0);
4256 }
4257 
4258 /**
4259  * cik_cp_compute_enable - enable/disable the compute CP MEs
4260  *
4261  * @rdev: radeon_device pointer
4262  * @enable: enable or disable the MEs
4263  *
4264  * Halts or unhalts the compute MEs.
4265  */
4266 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4267 {
4268 	if (enable)
4269 		WREG32(CP_MEC_CNTL, 0);
4270 	else {
4271 		/*
4272 		 * To make hibernation reliable we need to clear compute ring
4273 		 * configuration before halting the compute ring.
4274 		 */
4275 		mutex_lock(&rdev->srbm_mutex);
4276 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4277 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4278 		mutex_unlock(&rdev->srbm_mutex);
4279 
4280 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4281 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4282 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4283 	}
4284 	udelay(50);
4285 }
4286 
4287 /**
4288  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4289  *
4290  * @rdev: radeon_device pointer
4291  *
4292  * Loads the compute MEC1&2 ucode.
4293  * Returns 0 for success, -EINVAL if the ucode is not available.
4294  */
4295 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4296 {
4297 	int i;
4298 
4299 	if (!rdev->mec_fw)
4300 		return -EINVAL;
4301 
4302 	cik_cp_compute_enable(rdev, false);
4303 
4304 	if (rdev->new_fw) {
4305 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4306 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4307 		const __le32 *fw_data;
4308 		u32 fw_size;
4309 
4310 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4311 
4312 		/* MEC1 */
4313 		fw_data = (const __le32 *)
4314 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4315 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4316 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4317 		for (i = 0; i < fw_size; i++)
4318 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4319 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4320 
4321 		/* MEC2 */
4322 		if (rdev->family == CHIP_KAVERI) {
4323 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4324 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4325 
4326 			fw_data = (const __le32 *)
4327 				(rdev->mec2_fw->data +
4328 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4329 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4330 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4331 			for (i = 0; i < fw_size; i++)
4332 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4333 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4334 		}
4335 	} else {
4336 		const __be32 *fw_data;
4337 
4338 		/* MEC1 */
4339 		fw_data = (const __be32 *)rdev->mec_fw->data;
4340 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4341 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4342 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4343 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4344 
4345 		if (rdev->family == CHIP_KAVERI) {
4346 			/* MEC2 */
4347 			fw_data = (const __be32 *)rdev->mec_fw->data;
4348 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4349 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4350 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4351 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4352 		}
4353 	}
4354 
4355 	return 0;
4356 }
4357 
4358 /**
4359  * cik_cp_compute_start - start the compute queues
4360  *
4361  * @rdev: radeon_device pointer
4362  *
4363  * Enable the compute queues.
4364  * Returns 0 for success, error for failure.
4365  */
4366 static int cik_cp_compute_start(struct radeon_device *rdev)
4367 {
4368 	cik_cp_compute_enable(rdev, true);
4369 
4370 	return 0;
4371 }
4372 
4373 /**
4374  * cik_cp_compute_fini - stop the compute queues
4375  *
4376  * @rdev: radeon_device pointer
4377  *
4378  * Stop the compute queues and tear down the driver queue
4379  * info.
4380  */
4381 static void cik_cp_compute_fini(struct radeon_device *rdev)
4382 {
4383 	int i, idx, r;
4384 
4385 	cik_cp_compute_enable(rdev, false);
4386 
4387 	for (i = 0; i < 2; i++) {
4388 		if (i == 0)
4389 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4390 		else
4391 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4392 
4393 		if (rdev->ring[idx].mqd_obj) {
4394 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4395 			if (unlikely(r != 0))
4396 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4397 
4398 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4399 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4400 
4401 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4402 			rdev->ring[idx].mqd_obj = NULL;
4403 		}
4404 	}
4405 }
4406 
4407 static void cik_mec_fini(struct radeon_device *rdev)
4408 {
4409 	int r;
4410 
4411 	if (rdev->mec.hpd_eop_obj) {
4412 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4413 		if (unlikely(r != 0))
4414 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4415 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4416 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4417 
4418 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4419 		rdev->mec.hpd_eop_obj = NULL;
4420 	}
4421 }
4422 
4423 #define MEC_HPD_SIZE 2048
4424 
4425 static int cik_mec_init(struct radeon_device *rdev)
4426 {
4427 	int r;
4428 	u32 *hpd;
4429 
4430 	/*
4431 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4432 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4433 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4434 	 * be handled by KFD
4435 	 */
4436 	rdev->mec.num_mec = 1;
4437 	rdev->mec.num_pipe = 1;
4438 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4439 
4440 	if (rdev->mec.hpd_eop_obj == NULL) {
4441 		r = radeon_bo_create(rdev,
4442 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4443 				     PAGE_SIZE, true,
4444 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4445 				     &rdev->mec.hpd_eop_obj);
4446 		if (r) {
4447 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4448 			return r;
4449 		}
4450 	}
4451 
4452 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4453 	if (unlikely(r != 0)) {
4454 		cik_mec_fini(rdev);
4455 		return r;
4456 	}
4457 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4458 			  &rdev->mec.hpd_eop_gpu_addr);
4459 	if (r) {
4460 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4461 		cik_mec_fini(rdev);
4462 		return r;
4463 	}
4464 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4465 	if (r) {
4466 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4467 		cik_mec_fini(rdev);
4468 		return r;
4469 	}
4470 
4471 	/* clear memory.  Not sure if this is required or not */
4472 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4473 
4474 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4475 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4476 
4477 	return 0;
4478 }
4479 
4480 struct hqd_registers
4481 {
4482 	u32 cp_mqd_base_addr;
4483 	u32 cp_mqd_base_addr_hi;
4484 	u32 cp_hqd_active;
4485 	u32 cp_hqd_vmid;
4486 	u32 cp_hqd_persistent_state;
4487 	u32 cp_hqd_pipe_priority;
4488 	u32 cp_hqd_queue_priority;
4489 	u32 cp_hqd_quantum;
4490 	u32 cp_hqd_pq_base;
4491 	u32 cp_hqd_pq_base_hi;
4492 	u32 cp_hqd_pq_rptr;
4493 	u32 cp_hqd_pq_rptr_report_addr;
4494 	u32 cp_hqd_pq_rptr_report_addr_hi;
4495 	u32 cp_hqd_pq_wptr_poll_addr;
4496 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4497 	u32 cp_hqd_pq_doorbell_control;
4498 	u32 cp_hqd_pq_wptr;
4499 	u32 cp_hqd_pq_control;
4500 	u32 cp_hqd_ib_base_addr;
4501 	u32 cp_hqd_ib_base_addr_hi;
4502 	u32 cp_hqd_ib_rptr;
4503 	u32 cp_hqd_ib_control;
4504 	u32 cp_hqd_iq_timer;
4505 	u32 cp_hqd_iq_rptr;
4506 	u32 cp_hqd_dequeue_request;
4507 	u32 cp_hqd_dma_offload;
4508 	u32 cp_hqd_sema_cmd;
4509 	u32 cp_hqd_msg_type;
4510 	u32 cp_hqd_atomic0_preop_lo;
4511 	u32 cp_hqd_atomic0_preop_hi;
4512 	u32 cp_hqd_atomic1_preop_lo;
4513 	u32 cp_hqd_atomic1_preop_hi;
4514 	u32 cp_hqd_hq_scheduler0;
4515 	u32 cp_hqd_hq_scheduler1;
4516 	u32 cp_mqd_control;
4517 };
4518 
4519 struct bonaire_mqd
4520 {
4521 	u32 header;
4522 	u32 dispatch_initiator;
4523 	u32 dimensions[3];
4524 	u32 start_idx[3];
4525 	u32 num_threads[3];
4526 	u32 pipeline_stat_enable;
4527 	u32 perf_counter_enable;
4528 	u32 pgm[2];
4529 	u32 tba[2];
4530 	u32 tma[2];
4531 	u32 pgm_rsrc[2];
4532 	u32 vmid;
4533 	u32 resource_limits;
4534 	u32 static_thread_mgmt01[2];
4535 	u32 tmp_ring_size;
4536 	u32 static_thread_mgmt23[2];
4537 	u32 restart[3];
4538 	u32 thread_trace_enable;
4539 	u32 reserved1;
4540 	u32 user_data[16];
4541 	u32 vgtcs_invoke_count[2];
4542 	struct hqd_registers queue_state;
4543 	u32 dequeue_cntr;
4544 	u32 interrupt_queue[64];
4545 };
4546 
4547 /**
4548  * cik_cp_compute_resume - setup the compute queue registers
4549  *
4550  * @rdev: radeon_device pointer
4551  *
4552  * Program the compute queues and test them to make sure they
4553  * are working.
4554  * Returns 0 for success, error for failure.
4555  */
4556 static int cik_cp_compute_resume(struct radeon_device *rdev)
4557 {
4558 	int r, i, j, idx;
4559 	u32 tmp;
4560 	bool use_doorbell = true;
4561 	u64 hqd_gpu_addr;
4562 	u64 mqd_gpu_addr;
4563 	u64 eop_gpu_addr;
4564 	u64 wb_gpu_addr;
4565 	u32 *buf;
4566 	struct bonaire_mqd *mqd;
4567 
4568 	r = cik_cp_compute_start(rdev);
4569 	if (r)
4570 		return r;
4571 
4572 	/* fix up chicken bits */
4573 	tmp = RREG32(CP_CPF_DEBUG);
4574 	tmp |= (1 << 23);
4575 	WREG32(CP_CPF_DEBUG, tmp);
4576 
4577 	/* init the pipes */
4578 	mutex_lock(&rdev->srbm_mutex);
4579 
4580 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4581 
4582 	cik_srbm_select(rdev, 0, 0, 0, 0);
4583 
4584 	/* write the EOP addr */
4585 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4586 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4587 
4588 	/* set the VMID assigned */
4589 	WREG32(CP_HPD_EOP_VMID, 0);
4590 
4591 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4592 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4593 	tmp &= ~EOP_SIZE_MASK;
4594 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4595 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4596 
4597 	mutex_unlock(&rdev->srbm_mutex);
4598 
4599 	/* init the queues.  Just two for now. */
4600 	for (i = 0; i < 2; i++) {
4601 		if (i == 0)
4602 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4603 		else
4604 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4605 
4606 		if (rdev->ring[idx].mqd_obj == NULL) {
4607 			r = radeon_bo_create(rdev,
4608 					     sizeof(struct bonaire_mqd),
4609 					     PAGE_SIZE, true,
4610 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4611 					     NULL, &rdev->ring[idx].mqd_obj);
4612 			if (r) {
4613 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4614 				return r;
4615 			}
4616 		}
4617 
4618 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4619 		if (unlikely(r != 0)) {
4620 			cik_cp_compute_fini(rdev);
4621 			return r;
4622 		}
4623 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4624 				  &mqd_gpu_addr);
4625 		if (r) {
4626 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4627 			cik_cp_compute_fini(rdev);
4628 			return r;
4629 		}
4630 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4631 		if (r) {
4632 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4633 			cik_cp_compute_fini(rdev);
4634 			return r;
4635 		}
4636 
4637 		/* init the mqd struct */
4638 		memset(buf, 0, sizeof(struct bonaire_mqd));
4639 
4640 		mqd = (struct bonaire_mqd *)buf;
4641 		mqd->header = 0xC0310800;
4642 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4643 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4644 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4645 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4646 
4647 		mutex_lock(&rdev->srbm_mutex);
4648 		cik_srbm_select(rdev, rdev->ring[idx].me,
4649 				rdev->ring[idx].pipe,
4650 				rdev->ring[idx].queue, 0);
4651 
4652 		/* disable wptr polling */
4653 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4654 		tmp &= ~WPTR_POLL_EN;
4655 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4656 
4657 		/* enable doorbell? */
4658 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4659 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4660 		if (use_doorbell)
4661 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4662 		else
4663 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4664 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4665 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4666 
4667 		/* disable the queue if it's active */
4668 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4669 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4670 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4671 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4672 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4673 			for (j = 0; j < rdev->usec_timeout; j++) {
4674 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4675 					break;
4676 				udelay(1);
4677 			}
4678 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4679 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4680 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4681 		}
4682 
4683 		/* set the pointer to the MQD */
4684 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4685 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4686 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4687 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4688 		/* set MQD vmid to 0 */
4689 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4690 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4691 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4692 
4693 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4694 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4695 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4696 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4697 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4698 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4699 
4700 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4701 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4702 		mqd->queue_state.cp_hqd_pq_control &=
4703 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4704 
4705 		mqd->queue_state.cp_hqd_pq_control |=
4706 			order_base_2(rdev->ring[idx].ring_size / 8);
4707 		mqd->queue_state.cp_hqd_pq_control |=
4708 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4709 #ifdef __BIG_ENDIAN
4710 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4711 #endif
4712 		mqd->queue_state.cp_hqd_pq_control &=
4713 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4714 		mqd->queue_state.cp_hqd_pq_control |=
4715 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4716 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4717 
4718 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4719 		if (i == 0)
4720 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4721 		else
4722 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4723 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4724 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4725 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4726 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4727 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4728 
4729 		/* set the wb address wether it's enabled or not */
4730 		if (i == 0)
4731 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4732 		else
4733 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4734 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4735 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4736 			upper_32_bits(wb_gpu_addr) & 0xffff;
4737 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4738 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4739 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4740 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4741 
4742 		/* enable the doorbell if requested */
4743 		if (use_doorbell) {
4744 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4745 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4746 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4747 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4748 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4749 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4750 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4751 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4752 
4753 		} else {
4754 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4755 		}
4756 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4757 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4758 
4759 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4760 		rdev->ring[idx].wptr = 0;
4761 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4762 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4763 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4764 
4765 		/* set the vmid for the queue */
4766 		mqd->queue_state.cp_hqd_vmid = 0;
4767 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4768 
4769 		/* activate the queue */
4770 		mqd->queue_state.cp_hqd_active = 1;
4771 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4772 
4773 		cik_srbm_select(rdev, 0, 0, 0, 0);
4774 		mutex_unlock(&rdev->srbm_mutex);
4775 
4776 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4777 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4778 
4779 		rdev->ring[idx].ready = true;
4780 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4781 		if (r)
4782 			rdev->ring[idx].ready = false;
4783 	}
4784 
4785 	return 0;
4786 }
4787 
4788 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4789 {
4790 	cik_cp_gfx_enable(rdev, enable);
4791 	cik_cp_compute_enable(rdev, enable);
4792 }
4793 
4794 static int cik_cp_load_microcode(struct radeon_device *rdev)
4795 {
4796 	int r;
4797 
4798 	r = cik_cp_gfx_load_microcode(rdev);
4799 	if (r)
4800 		return r;
4801 	r = cik_cp_compute_load_microcode(rdev);
4802 	if (r)
4803 		return r;
4804 
4805 	return 0;
4806 }
4807 
4808 static void cik_cp_fini(struct radeon_device *rdev)
4809 {
4810 	cik_cp_gfx_fini(rdev);
4811 	cik_cp_compute_fini(rdev);
4812 }
4813 
4814 static int cik_cp_resume(struct radeon_device *rdev)
4815 {
4816 	int r;
4817 
4818 	cik_enable_gui_idle_interrupt(rdev, false);
4819 
4820 	r = cik_cp_load_microcode(rdev);
4821 	if (r)
4822 		return r;
4823 
4824 	r = cik_cp_gfx_resume(rdev);
4825 	if (r)
4826 		return r;
4827 	r = cik_cp_compute_resume(rdev);
4828 	if (r)
4829 		return r;
4830 
4831 	cik_enable_gui_idle_interrupt(rdev, true);
4832 
4833 	return 0;
4834 }
4835 
4836 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4837 {
4838 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4839 		RREG32(GRBM_STATUS));
4840 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4841 		RREG32(GRBM_STATUS2));
4842 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4843 		RREG32(GRBM_STATUS_SE0));
4844 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4845 		RREG32(GRBM_STATUS_SE1));
4846 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4847 		RREG32(GRBM_STATUS_SE2));
4848 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4849 		RREG32(GRBM_STATUS_SE3));
4850 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4851 		RREG32(SRBM_STATUS));
4852 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4853 		RREG32(SRBM_STATUS2));
4854 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4855 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4856 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4857 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4858 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4859 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4860 		 RREG32(CP_STALLED_STAT1));
4861 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4862 		 RREG32(CP_STALLED_STAT2));
4863 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4864 		 RREG32(CP_STALLED_STAT3));
4865 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4866 		 RREG32(CP_CPF_BUSY_STAT));
4867 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4868 		 RREG32(CP_CPF_STALLED_STAT1));
4869 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4870 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4871 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4872 		 RREG32(CP_CPC_STALLED_STAT1));
4873 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4874 }
4875 
4876 /**
4877  * cik_gpu_check_soft_reset - check which blocks are busy
4878  *
4879  * @rdev: radeon_device pointer
4880  *
4881  * Check which blocks are busy and return the relevant reset
4882  * mask to be used by cik_gpu_soft_reset().
4883  * Returns a mask of the blocks to be reset.
4884  */
4885 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4886 {
4887 	u32 reset_mask = 0;
4888 	u32 tmp;
4889 
4890 	/* GRBM_STATUS */
4891 	tmp = RREG32(GRBM_STATUS);
4892 	if (tmp & (PA_BUSY | SC_BUSY |
4893 		   BCI_BUSY | SX_BUSY |
4894 		   TA_BUSY | VGT_BUSY |
4895 		   DB_BUSY | CB_BUSY |
4896 		   GDS_BUSY | SPI_BUSY |
4897 		   IA_BUSY | IA_BUSY_NO_DMA))
4898 		reset_mask |= RADEON_RESET_GFX;
4899 
4900 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4901 		reset_mask |= RADEON_RESET_CP;
4902 
4903 	/* GRBM_STATUS2 */
4904 	tmp = RREG32(GRBM_STATUS2);
4905 	if (tmp & RLC_BUSY)
4906 		reset_mask |= RADEON_RESET_RLC;
4907 
4908 	/* SDMA0_STATUS_REG */
4909 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4910 	if (!(tmp & SDMA_IDLE))
4911 		reset_mask |= RADEON_RESET_DMA;
4912 
4913 	/* SDMA1_STATUS_REG */
4914 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4915 	if (!(tmp & SDMA_IDLE))
4916 		reset_mask |= RADEON_RESET_DMA1;
4917 
4918 	/* SRBM_STATUS2 */
4919 	tmp = RREG32(SRBM_STATUS2);
4920 	if (tmp & SDMA_BUSY)
4921 		reset_mask |= RADEON_RESET_DMA;
4922 
4923 	if (tmp & SDMA1_BUSY)
4924 		reset_mask |= RADEON_RESET_DMA1;
4925 
4926 	/* SRBM_STATUS */
4927 	tmp = RREG32(SRBM_STATUS);
4928 
4929 	if (tmp & IH_BUSY)
4930 		reset_mask |= RADEON_RESET_IH;
4931 
4932 	if (tmp & SEM_BUSY)
4933 		reset_mask |= RADEON_RESET_SEM;
4934 
4935 	if (tmp & GRBM_RQ_PENDING)
4936 		reset_mask |= RADEON_RESET_GRBM;
4937 
4938 	if (tmp & VMC_BUSY)
4939 		reset_mask |= RADEON_RESET_VMC;
4940 
4941 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4942 		   MCC_BUSY | MCD_BUSY))
4943 		reset_mask |= RADEON_RESET_MC;
4944 
4945 	if (evergreen_is_display_hung(rdev))
4946 		reset_mask |= RADEON_RESET_DISPLAY;
4947 
4948 	/* Skip MC reset as it's mostly likely not hung, just busy */
4949 	if (reset_mask & RADEON_RESET_MC) {
4950 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4951 		reset_mask &= ~RADEON_RESET_MC;
4952 	}
4953 
4954 	return reset_mask;
4955 }
4956 
4957 /**
4958  * cik_gpu_soft_reset - soft reset GPU
4959  *
4960  * @rdev: radeon_device pointer
4961  * @reset_mask: mask of which blocks to reset
4962  *
4963  * Soft reset the blocks specified in @reset_mask.
4964  */
4965 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4966 {
4967 	struct evergreen_mc_save save;
4968 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4969 	u32 tmp;
4970 
4971 	if (reset_mask == 0)
4972 		return;
4973 
4974 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4975 
4976 	cik_print_gpu_status_regs(rdev);
4977 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4978 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4979 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4980 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4981 
4982 	/* disable CG/PG */
4983 	cik_fini_pg(rdev);
4984 	cik_fini_cg(rdev);
4985 
4986 	/* stop the rlc */
4987 	cik_rlc_stop(rdev);
4988 
4989 	/* Disable GFX parsing/prefetching */
4990 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4991 
4992 	/* Disable MEC parsing/prefetching */
4993 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4994 
4995 	if (reset_mask & RADEON_RESET_DMA) {
4996 		/* sdma0 */
4997 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4998 		tmp |= SDMA_HALT;
4999 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5000 	}
5001 	if (reset_mask & RADEON_RESET_DMA1) {
5002 		/* sdma1 */
5003 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5004 		tmp |= SDMA_HALT;
5005 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5006 	}
5007 
5008 	evergreen_mc_stop(rdev, &save);
5009 	if (evergreen_mc_wait_for_idle(rdev)) {
5010 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5011 	}
5012 
5013 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5014 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5015 
5016 	if (reset_mask & RADEON_RESET_CP) {
5017 		grbm_soft_reset |= SOFT_RESET_CP;
5018 
5019 		srbm_soft_reset |= SOFT_RESET_GRBM;
5020 	}
5021 
5022 	if (reset_mask & RADEON_RESET_DMA)
5023 		srbm_soft_reset |= SOFT_RESET_SDMA;
5024 
5025 	if (reset_mask & RADEON_RESET_DMA1)
5026 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5027 
5028 	if (reset_mask & RADEON_RESET_DISPLAY)
5029 		srbm_soft_reset |= SOFT_RESET_DC;
5030 
5031 	if (reset_mask & RADEON_RESET_RLC)
5032 		grbm_soft_reset |= SOFT_RESET_RLC;
5033 
5034 	if (reset_mask & RADEON_RESET_SEM)
5035 		srbm_soft_reset |= SOFT_RESET_SEM;
5036 
5037 	if (reset_mask & RADEON_RESET_IH)
5038 		srbm_soft_reset |= SOFT_RESET_IH;
5039 
5040 	if (reset_mask & RADEON_RESET_GRBM)
5041 		srbm_soft_reset |= SOFT_RESET_GRBM;
5042 
5043 	if (reset_mask & RADEON_RESET_VMC)
5044 		srbm_soft_reset |= SOFT_RESET_VMC;
5045 
5046 	if (!(rdev->flags & RADEON_IS_IGP)) {
5047 		if (reset_mask & RADEON_RESET_MC)
5048 			srbm_soft_reset |= SOFT_RESET_MC;
5049 	}
5050 
5051 	if (grbm_soft_reset) {
5052 		tmp = RREG32(GRBM_SOFT_RESET);
5053 		tmp |= grbm_soft_reset;
5054 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5055 		WREG32(GRBM_SOFT_RESET, tmp);
5056 		tmp = RREG32(GRBM_SOFT_RESET);
5057 
5058 		udelay(50);
5059 
5060 		tmp &= ~grbm_soft_reset;
5061 		WREG32(GRBM_SOFT_RESET, tmp);
5062 		tmp = RREG32(GRBM_SOFT_RESET);
5063 	}
5064 
5065 	if (srbm_soft_reset) {
5066 		tmp = RREG32(SRBM_SOFT_RESET);
5067 		tmp |= srbm_soft_reset;
5068 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5069 		WREG32(SRBM_SOFT_RESET, tmp);
5070 		tmp = RREG32(SRBM_SOFT_RESET);
5071 
5072 		udelay(50);
5073 
5074 		tmp &= ~srbm_soft_reset;
5075 		WREG32(SRBM_SOFT_RESET, tmp);
5076 		tmp = RREG32(SRBM_SOFT_RESET);
5077 	}
5078 
5079 	/* Wait a little for things to settle down */
5080 	udelay(50);
5081 
5082 	evergreen_mc_resume(rdev, &save);
5083 	udelay(50);
5084 
5085 	cik_print_gpu_status_regs(rdev);
5086 }
5087 
5088 struct kv_reset_save_regs {
5089 	u32 gmcon_reng_execute;
5090 	u32 gmcon_misc;
5091 	u32 gmcon_misc3;
5092 };
5093 
5094 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5095 				   struct kv_reset_save_regs *save)
5096 {
5097 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5098 	save->gmcon_misc = RREG32(GMCON_MISC);
5099 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5100 
5101 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5102 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5103 						STCTRL_STUTTER_EN));
5104 }
5105 
5106 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5107 				      struct kv_reset_save_regs *save)
5108 {
5109 	int i;
5110 
5111 	WREG32(GMCON_PGFSM_WRITE, 0);
5112 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5113 
5114 	for (i = 0; i < 5; i++)
5115 		WREG32(GMCON_PGFSM_WRITE, 0);
5116 
5117 	WREG32(GMCON_PGFSM_WRITE, 0);
5118 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5119 
5120 	for (i = 0; i < 5; i++)
5121 		WREG32(GMCON_PGFSM_WRITE, 0);
5122 
5123 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5124 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5125 
5126 	for (i = 0; i < 5; i++)
5127 		WREG32(GMCON_PGFSM_WRITE, 0);
5128 
5129 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5130 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5131 
5132 	for (i = 0; i < 5; i++)
5133 		WREG32(GMCON_PGFSM_WRITE, 0);
5134 
5135 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5136 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5137 
5138 	for (i = 0; i < 5; i++)
5139 		WREG32(GMCON_PGFSM_WRITE, 0);
5140 
5141 	WREG32(GMCON_PGFSM_WRITE, 0);
5142 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5143 
5144 	for (i = 0; i < 5; i++)
5145 		WREG32(GMCON_PGFSM_WRITE, 0);
5146 
5147 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5148 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5149 
5150 	for (i = 0; i < 5; i++)
5151 		WREG32(GMCON_PGFSM_WRITE, 0);
5152 
5153 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5154 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5155 
5156 	for (i = 0; i < 5; i++)
5157 		WREG32(GMCON_PGFSM_WRITE, 0);
5158 
5159 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5160 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5161 
5162 	for (i = 0; i < 5; i++)
5163 		WREG32(GMCON_PGFSM_WRITE, 0);
5164 
5165 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5166 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5167 
5168 	for (i = 0; i < 5; i++)
5169 		WREG32(GMCON_PGFSM_WRITE, 0);
5170 
5171 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5172 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5173 
5174 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5175 	WREG32(GMCON_MISC, save->gmcon_misc);
5176 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5177 }
5178 
5179 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5180 {
5181 	struct evergreen_mc_save save;
5182 	struct kv_reset_save_regs kv_save = { 0 };
5183 	u32 tmp, i;
5184 
5185 	dev_info(rdev->dev, "GPU pci config reset\n");
5186 
5187 	/* disable dpm? */
5188 
5189 	/* disable cg/pg */
5190 	cik_fini_pg(rdev);
5191 	cik_fini_cg(rdev);
5192 
5193 	/* Disable GFX parsing/prefetching */
5194 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5195 
5196 	/* Disable MEC parsing/prefetching */
5197 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5198 
5199 	/* sdma0 */
5200 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5201 	tmp |= SDMA_HALT;
5202 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5203 	/* sdma1 */
5204 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5205 	tmp |= SDMA_HALT;
5206 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5207 	/* XXX other engines? */
5208 
5209 	/* halt the rlc, disable cp internal ints */
5210 	cik_rlc_stop(rdev);
5211 
5212 	udelay(50);
5213 
5214 	/* disable mem access */
5215 	evergreen_mc_stop(rdev, &save);
5216 	if (evergreen_mc_wait_for_idle(rdev)) {
5217 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5218 	}
5219 
5220 	if (rdev->flags & RADEON_IS_IGP)
5221 		kv_save_regs_for_reset(rdev, &kv_save);
5222 
5223 	/* disable BM */
5224 	pci_clear_master(rdev->pdev);
5225 	/* reset */
5226 	radeon_pci_config_reset(rdev);
5227 
5228 	udelay(100);
5229 
5230 	/* wait for asic to come out of reset */
5231 	for (i = 0; i < rdev->usec_timeout; i++) {
5232 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5233 			break;
5234 		udelay(1);
5235 	}
5236 
5237 	/* does asic init need to be run first??? */
5238 	if (rdev->flags & RADEON_IS_IGP)
5239 		kv_restore_regs_for_reset(rdev, &kv_save);
5240 }
5241 
5242 /**
5243  * cik_asic_reset - soft reset GPU
5244  *
5245  * @rdev: radeon_device pointer
5246  * @hard: force hard reset
5247  *
5248  * Look up which blocks are hung and attempt
5249  * to reset them.
5250  * Returns 0 for success.
5251  */
5252 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5253 {
5254 	u32 reset_mask;
5255 
5256 	if (hard) {
5257 		cik_gpu_pci_config_reset(rdev);
5258 		return 0;
5259 	}
5260 
5261 	reset_mask = cik_gpu_check_soft_reset(rdev);
5262 
5263 	if (reset_mask)
5264 		r600_set_bios_scratch_engine_hung(rdev, true);
5265 
5266 	/* try soft reset */
5267 	cik_gpu_soft_reset(rdev, reset_mask);
5268 
5269 	reset_mask = cik_gpu_check_soft_reset(rdev);
5270 
5271 	/* try pci config reset */
5272 	if (reset_mask && radeon_hard_reset)
5273 		cik_gpu_pci_config_reset(rdev);
5274 
5275 	reset_mask = cik_gpu_check_soft_reset(rdev);
5276 
5277 	if (!reset_mask)
5278 		r600_set_bios_scratch_engine_hung(rdev, false);
5279 
5280 	return 0;
5281 }
5282 
5283 /**
5284  * cik_gfx_is_lockup - check if the 3D engine is locked up
5285  *
5286  * @rdev: radeon_device pointer
5287  * @ring: radeon_ring structure holding ring information
5288  *
5289  * Check if the 3D engine is locked up (CIK).
5290  * Returns true if the engine is locked, false if not.
5291  */
5292 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5293 {
5294 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5295 
5296 	if (!(reset_mask & (RADEON_RESET_GFX |
5297 			    RADEON_RESET_COMPUTE |
5298 			    RADEON_RESET_CP))) {
5299 		radeon_ring_lockup_update(rdev, ring);
5300 		return false;
5301 	}
5302 	return radeon_ring_test_lockup(rdev, ring);
5303 }
5304 
5305 /* MC */
5306 /**
5307  * cik_mc_program - program the GPU memory controller
5308  *
5309  * @rdev: radeon_device pointer
5310  *
5311  * Set the location of vram, gart, and AGP in the GPU's
5312  * physical address space (CIK).
5313  */
5314 static void cik_mc_program(struct radeon_device *rdev)
5315 {
5316 	struct evergreen_mc_save save;
5317 	u32 tmp;
5318 	int i, j;
5319 
5320 	/* Initialize HDP */
5321 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5322 		WREG32((0x2c14 + j), 0x00000000);
5323 		WREG32((0x2c18 + j), 0x00000000);
5324 		WREG32((0x2c1c + j), 0x00000000);
5325 		WREG32((0x2c20 + j), 0x00000000);
5326 		WREG32((0x2c24 + j), 0x00000000);
5327 	}
5328 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5329 
5330 	evergreen_mc_stop(rdev, &save);
5331 	if (radeon_mc_wait_for_idle(rdev)) {
5332 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5333 	}
5334 	/* Lockout access through VGA aperture*/
5335 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5336 	/* Update configuration */
5337 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5338 	       rdev->mc.vram_start >> 12);
5339 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5340 	       rdev->mc.vram_end >> 12);
5341 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5342 	       rdev->vram_scratch.gpu_addr >> 12);
5343 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5344 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5345 	WREG32(MC_VM_FB_LOCATION, tmp);
5346 	/* XXX double check these! */
5347 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5348 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5349 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5350 	WREG32(MC_VM_AGP_BASE, 0);
5351 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5352 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5353 	if (radeon_mc_wait_for_idle(rdev)) {
5354 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5355 	}
5356 	evergreen_mc_resume(rdev, &save);
5357 	/* we need to own VRAM, so turn off the VGA renderer here
5358 	 * to stop it overwriting our objects */
5359 	rv515_vga_render_disable(rdev);
5360 }
5361 
5362 /**
5363  * cik_mc_init - initialize the memory controller driver params
5364  *
5365  * @rdev: radeon_device pointer
5366  *
5367  * Look up the amount of vram, vram width, and decide how to place
5368  * vram and gart within the GPU's physical address space (CIK).
5369  * Returns 0 for success.
5370  */
5371 static int cik_mc_init(struct radeon_device *rdev)
5372 {
5373 	u32 tmp;
5374 	int chansize, numchan;
5375 
5376 	/* Get VRAM informations */
5377 	rdev->mc.vram_is_ddr = true;
5378 	tmp = RREG32(MC_ARB_RAMCFG);
5379 	if (tmp & CHANSIZE_MASK) {
5380 		chansize = 64;
5381 	} else {
5382 		chansize = 32;
5383 	}
5384 	tmp = RREG32(MC_SHARED_CHMAP);
5385 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5386 	case 0:
5387 	default:
5388 		numchan = 1;
5389 		break;
5390 	case 1:
5391 		numchan = 2;
5392 		break;
5393 	case 2:
5394 		numchan = 4;
5395 		break;
5396 	case 3:
5397 		numchan = 8;
5398 		break;
5399 	case 4:
5400 		numchan = 3;
5401 		break;
5402 	case 5:
5403 		numchan = 6;
5404 		break;
5405 	case 6:
5406 		numchan = 10;
5407 		break;
5408 	case 7:
5409 		numchan = 12;
5410 		break;
5411 	case 8:
5412 		numchan = 16;
5413 		break;
5414 	}
5415 	rdev->mc.vram_width = numchan * chansize;
5416 	/* Could aper size report 0 ? */
5417 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5418 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5419 	/* size in MB on si */
5420 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5421 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5422 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5423 	si_vram_gtt_location(rdev, &rdev->mc);
5424 	radeon_update_bandwidth_info(rdev);
5425 
5426 	return 0;
5427 }
5428 
5429 /*
5430  * GART
5431  * VMID 0 is the physical GPU addresses as used by the kernel.
5432  * VMIDs 1-15 are used for userspace clients and are handled
5433  * by the radeon vm/hsa code.
5434  */
5435 /**
5436  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5437  *
5438  * @rdev: radeon_device pointer
5439  *
5440  * Flush the TLB for the VMID 0 page table (CIK).
5441  */
5442 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5443 {
5444 	/* flush hdp cache */
5445 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5446 
5447 	/* bits 0-15 are the VM contexts0-15 */
5448 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5449 }
5450 
5451 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5452 {
5453 	int i;
5454 	uint32_t sh_mem_bases, sh_mem_config;
5455 
5456 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5457 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5458 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5459 
5460 	mutex_lock(&rdev->srbm_mutex);
5461 	for (i = 8; i < 16; i++) {
5462 		cik_srbm_select(rdev, 0, 0, 0, i);
5463 		/* CP and shaders */
5464 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5465 		WREG32(SH_MEM_APE1_BASE, 1);
5466 		WREG32(SH_MEM_APE1_LIMIT, 0);
5467 		WREG32(SH_MEM_BASES, sh_mem_bases);
5468 	}
5469 	cik_srbm_select(rdev, 0, 0, 0, 0);
5470 	mutex_unlock(&rdev->srbm_mutex);
5471 }
5472 
5473 /**
5474  * cik_pcie_gart_enable - gart enable
5475  *
5476  * @rdev: radeon_device pointer
5477  *
5478  * This sets up the TLBs, programs the page tables for VMID0,
5479  * sets up the hw for VMIDs 1-15 which are allocated on
5480  * demand, and sets up the global locations for the LDS, GDS,
5481  * and GPUVM for FSA64 clients (CIK).
5482  * Returns 0 for success, errors for failure.
5483  */
5484 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5485 {
5486 	int r, i;
5487 
5488 	if (rdev->gart.robj == NULL) {
5489 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5490 		return -EINVAL;
5491 	}
5492 	r = radeon_gart_table_vram_pin(rdev);
5493 	if (r)
5494 		return r;
5495 	/* Setup TLB control */
5496 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5497 	       (0xA << 7) |
5498 	       ENABLE_L1_TLB |
5499 	       ENABLE_L1_FRAGMENT_PROCESSING |
5500 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5501 	       ENABLE_ADVANCED_DRIVER_MODEL |
5502 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5503 	/* Setup L2 cache */
5504 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5505 	       ENABLE_L2_FRAGMENT_PROCESSING |
5506 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5507 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5508 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5509 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5510 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5511 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5512 	       BANK_SELECT(4) |
5513 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5514 	/* setup context0 */
5515 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5516 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5517 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5518 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5519 			(u32)(rdev->dummy_page.addr >> 12));
5520 	WREG32(VM_CONTEXT0_CNTL2, 0);
5521 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5522 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5523 
5524 	WREG32(0x15D4, 0);
5525 	WREG32(0x15D8, 0);
5526 	WREG32(0x15DC, 0);
5527 
5528 	/* restore context1-15 */
5529 	/* set vm size, must be a multiple of 4 */
5530 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5531 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5532 	for (i = 1; i < 16; i++) {
5533 		if (i < 8)
5534 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5535 			       rdev->vm_manager.saved_table_addr[i]);
5536 		else
5537 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5538 			       rdev->vm_manager.saved_table_addr[i]);
5539 	}
5540 
5541 	/* enable context1-15 */
5542 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5543 	       (u32)(rdev->dummy_page.addr >> 12));
5544 	WREG32(VM_CONTEXT1_CNTL2, 4);
5545 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5546 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5547 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5548 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5549 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5550 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5551 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5552 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5553 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5554 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5555 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5556 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5557 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5558 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5559 
5560 	if (rdev->family == CHIP_KAVERI) {
5561 		u32 tmp = RREG32(CHUB_CONTROL);
5562 		tmp &= ~BYPASS_VM;
5563 		WREG32(CHUB_CONTROL, tmp);
5564 	}
5565 
5566 	/* XXX SH_MEM regs */
5567 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5568 	mutex_lock(&rdev->srbm_mutex);
5569 	for (i = 0; i < 16; i++) {
5570 		cik_srbm_select(rdev, 0, 0, 0, i);
5571 		/* CP and shaders */
5572 		WREG32(SH_MEM_CONFIG, 0);
5573 		WREG32(SH_MEM_APE1_BASE, 1);
5574 		WREG32(SH_MEM_APE1_LIMIT, 0);
5575 		WREG32(SH_MEM_BASES, 0);
5576 		/* SDMA GFX */
5577 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5578 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5579 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5580 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5581 		/* XXX SDMA RLC - todo */
5582 	}
5583 	cik_srbm_select(rdev, 0, 0, 0, 0);
5584 	mutex_unlock(&rdev->srbm_mutex);
5585 
5586 	cik_pcie_init_compute_vmid(rdev);
5587 
5588 	cik_pcie_gart_tlb_flush(rdev);
5589 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5590 		 (unsigned)(rdev->mc.gtt_size >> 20),
5591 		 (unsigned long long)rdev->gart.table_addr);
5592 	rdev->gart.ready = true;
5593 	return 0;
5594 }
5595 
5596 /**
5597  * cik_pcie_gart_disable - gart disable
5598  *
5599  * @rdev: radeon_device pointer
5600  *
5601  * This disables all VM page table (CIK).
5602  */
5603 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5604 {
5605 	unsigned i;
5606 
5607 	for (i = 1; i < 16; ++i) {
5608 		uint32_t reg;
5609 		if (i < 8)
5610 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5611 		else
5612 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5613 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5614 	}
5615 
5616 	/* Disable all tables */
5617 	WREG32(VM_CONTEXT0_CNTL, 0);
5618 	WREG32(VM_CONTEXT1_CNTL, 0);
5619 	/* Setup TLB control */
5620 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5621 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5622 	/* Setup L2 cache */
5623 	WREG32(VM_L2_CNTL,
5624 	       ENABLE_L2_FRAGMENT_PROCESSING |
5625 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5626 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5627 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5628 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5629 	WREG32(VM_L2_CNTL2, 0);
5630 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5631 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5632 	radeon_gart_table_vram_unpin(rdev);
5633 }
5634 
5635 /**
5636  * cik_pcie_gart_fini - vm fini callback
5637  *
5638  * @rdev: radeon_device pointer
5639  *
5640  * Tears down the driver GART/VM setup (CIK).
5641  */
5642 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5643 {
5644 	cik_pcie_gart_disable(rdev);
5645 	radeon_gart_table_vram_free(rdev);
5646 	radeon_gart_fini(rdev);
5647 }
5648 
5649 /* vm parser */
5650 /**
5651  * cik_ib_parse - vm ib_parse callback
5652  *
5653  * @rdev: radeon_device pointer
5654  * @ib: indirect buffer pointer
5655  *
5656  * CIK uses hw IB checking so this is a nop (CIK).
5657  */
5658 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5659 {
5660 	return 0;
5661 }
5662 
5663 /*
5664  * vm
5665  * VMID 0 is the physical GPU addresses as used by the kernel.
5666  * VMIDs 1-15 are used for userspace clients and are handled
5667  * by the radeon vm/hsa code.
5668  */
5669 /**
5670  * cik_vm_init - cik vm init callback
5671  *
5672  * @rdev: radeon_device pointer
5673  *
5674  * Inits cik specific vm parameters (number of VMs, base of vram for
5675  * VMIDs 1-15) (CIK).
5676  * Returns 0 for success.
5677  */
5678 int cik_vm_init(struct radeon_device *rdev)
5679 {
5680 	/*
5681 	 * number of VMs
5682 	 * VMID 0 is reserved for System
5683 	 * radeon graphics/compute will use VMIDs 1-7
5684 	 * amdkfd will use VMIDs 8-15
5685 	 */
5686 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5687 	/* base offset of vram pages */
5688 	if (rdev->flags & RADEON_IS_IGP) {
5689 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5690 		tmp <<= 22;
5691 		rdev->vm_manager.vram_base_offset = tmp;
5692 	} else
5693 		rdev->vm_manager.vram_base_offset = 0;
5694 
5695 	return 0;
5696 }
5697 
5698 /**
5699  * cik_vm_fini - cik vm fini callback
5700  *
5701  * @rdev: radeon_device pointer
5702  *
5703  * Tear down any asic specific VM setup (CIK).
5704  */
5705 void cik_vm_fini(struct radeon_device *rdev)
5706 {
5707 }
5708 
5709 /**
5710  * cik_vm_decode_fault - print human readable fault info
5711  *
5712  * @rdev: radeon_device pointer
5713  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5714  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5715  *
5716  * Print human readable fault information (CIK).
5717  */
5718 static void cik_vm_decode_fault(struct radeon_device *rdev,
5719 				u32 status, u32 addr, u32 mc_client)
5720 {
5721 	u32 mc_id;
5722 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5723 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5724 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5725 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5726 
5727 	if (rdev->family == CHIP_HAWAII)
5728 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5729 	else
5730 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5731 
5732 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5733 	       protections, vmid, addr,
5734 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5735 	       block, mc_client, mc_id);
5736 }
5737 
5738 /**
5739  * cik_vm_flush - cik vm flush using the CP
5740  *
5741  * @rdev: radeon_device pointer
5742  *
5743  * Update the page table base and flush the VM TLB
5744  * using the CP (CIK).
5745  */
5746 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5747 		  unsigned vm_id, uint64_t pd_addr)
5748 {
5749 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5750 
5751 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5752 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5753 				 WRITE_DATA_DST_SEL(0)));
5754 	if (vm_id < 8) {
5755 		radeon_ring_write(ring,
5756 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5757 	} else {
5758 		radeon_ring_write(ring,
5759 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5760 	}
5761 	radeon_ring_write(ring, 0);
5762 	radeon_ring_write(ring, pd_addr >> 12);
5763 
5764 	/* update SH_MEM_* regs */
5765 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5766 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5767 				 WRITE_DATA_DST_SEL(0)));
5768 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5769 	radeon_ring_write(ring, 0);
5770 	radeon_ring_write(ring, VMID(vm_id));
5771 
5772 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5773 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5774 				 WRITE_DATA_DST_SEL(0)));
5775 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5776 	radeon_ring_write(ring, 0);
5777 
5778 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5779 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5780 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5781 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5782 
5783 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5784 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5785 				 WRITE_DATA_DST_SEL(0)));
5786 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5787 	radeon_ring_write(ring, 0);
5788 	radeon_ring_write(ring, VMID(0));
5789 
5790 	/* HDP flush */
5791 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5792 
5793 	/* bits 0-15 are the VM contexts0-15 */
5794 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5795 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5796 				 WRITE_DATA_DST_SEL(0)));
5797 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5798 	radeon_ring_write(ring, 0);
5799 	radeon_ring_write(ring, 1 << vm_id);
5800 
5801 	/* wait for the invalidate to complete */
5802 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5803 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5804 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5805 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5806 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5807 	radeon_ring_write(ring, 0);
5808 	radeon_ring_write(ring, 0); /* ref */
5809 	radeon_ring_write(ring, 0); /* mask */
5810 	radeon_ring_write(ring, 0x20); /* poll interval */
5811 
5812 	/* compute doesn't have PFP */
5813 	if (usepfp) {
5814 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5815 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5816 		radeon_ring_write(ring, 0x0);
5817 	}
5818 }
5819 
5820 /*
5821  * RLC
5822  * The RLC is a multi-purpose microengine that handles a
5823  * variety of functions, the most important of which is
5824  * the interrupt controller.
5825  */
5826 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5827 					  bool enable)
5828 {
5829 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5830 
5831 	if (enable)
5832 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5833 	else
5834 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5835 	WREG32(CP_INT_CNTL_RING0, tmp);
5836 }
5837 
5838 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5839 {
5840 	u32 tmp;
5841 
5842 	tmp = RREG32(RLC_LB_CNTL);
5843 	if (enable)
5844 		tmp |= LOAD_BALANCE_ENABLE;
5845 	else
5846 		tmp &= ~LOAD_BALANCE_ENABLE;
5847 	WREG32(RLC_LB_CNTL, tmp);
5848 }
5849 
5850 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5851 {
5852 	u32 i, j, k;
5853 	u32 mask;
5854 
5855 	mutex_lock(&rdev->grbm_idx_mutex);
5856 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5857 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5858 			cik_select_se_sh(rdev, i, j);
5859 			for (k = 0; k < rdev->usec_timeout; k++) {
5860 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5861 					break;
5862 				udelay(1);
5863 			}
5864 		}
5865 	}
5866 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5867 	mutex_unlock(&rdev->grbm_idx_mutex);
5868 
5869 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5870 	for (k = 0; k < rdev->usec_timeout; k++) {
5871 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5872 			break;
5873 		udelay(1);
5874 	}
5875 }
5876 
5877 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5878 {
5879 	u32 tmp;
5880 
5881 	tmp = RREG32(RLC_CNTL);
5882 	if (tmp != rlc)
5883 		WREG32(RLC_CNTL, rlc);
5884 }
5885 
5886 static u32 cik_halt_rlc(struct radeon_device *rdev)
5887 {
5888 	u32 data, orig;
5889 
5890 	orig = data = RREG32(RLC_CNTL);
5891 
5892 	if (data & RLC_ENABLE) {
5893 		u32 i;
5894 
5895 		data &= ~RLC_ENABLE;
5896 		WREG32(RLC_CNTL, data);
5897 
5898 		for (i = 0; i < rdev->usec_timeout; i++) {
5899 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5900 				break;
5901 			udelay(1);
5902 		}
5903 
5904 		cik_wait_for_rlc_serdes(rdev);
5905 	}
5906 
5907 	return orig;
5908 }
5909 
5910 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5911 {
5912 	u32 tmp, i, mask;
5913 
5914 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5915 	WREG32(RLC_GPR_REG2, tmp);
5916 
5917 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5918 	for (i = 0; i < rdev->usec_timeout; i++) {
5919 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5920 			break;
5921 		udelay(1);
5922 	}
5923 
5924 	for (i = 0; i < rdev->usec_timeout; i++) {
5925 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5926 			break;
5927 		udelay(1);
5928 	}
5929 }
5930 
5931 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5932 {
5933 	u32 tmp;
5934 
5935 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5936 	WREG32(RLC_GPR_REG2, tmp);
5937 }
5938 
5939 /**
5940  * cik_rlc_stop - stop the RLC ME
5941  *
5942  * @rdev: radeon_device pointer
5943  *
5944  * Halt the RLC ME (MicroEngine) (CIK).
5945  */
5946 static void cik_rlc_stop(struct radeon_device *rdev)
5947 {
5948 	WREG32(RLC_CNTL, 0);
5949 
5950 	cik_enable_gui_idle_interrupt(rdev, false);
5951 
5952 	cik_wait_for_rlc_serdes(rdev);
5953 }
5954 
5955 /**
5956  * cik_rlc_start - start the RLC ME
5957  *
5958  * @rdev: radeon_device pointer
5959  *
5960  * Unhalt the RLC ME (MicroEngine) (CIK).
5961  */
5962 static void cik_rlc_start(struct radeon_device *rdev)
5963 {
5964 	WREG32(RLC_CNTL, RLC_ENABLE);
5965 
5966 	cik_enable_gui_idle_interrupt(rdev, true);
5967 
5968 	udelay(50);
5969 }
5970 
5971 /**
5972  * cik_rlc_resume - setup the RLC hw
5973  *
5974  * @rdev: radeon_device pointer
5975  *
5976  * Initialize the RLC registers, load the ucode,
5977  * and start the RLC (CIK).
5978  * Returns 0 for success, -EINVAL if the ucode is not available.
5979  */
5980 static int cik_rlc_resume(struct radeon_device *rdev)
5981 {
5982 	u32 i, size, tmp;
5983 
5984 	if (!rdev->rlc_fw)
5985 		return -EINVAL;
5986 
5987 	cik_rlc_stop(rdev);
5988 
5989 	/* disable CG */
5990 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5991 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5992 
5993 	si_rlc_reset(rdev);
5994 
5995 	cik_init_pg(rdev);
5996 
5997 	cik_init_cg(rdev);
5998 
5999 	WREG32(RLC_LB_CNTR_INIT, 0);
6000 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6001 
6002 	mutex_lock(&rdev->grbm_idx_mutex);
6003 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6004 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6005 	WREG32(RLC_LB_PARAMS, 0x00600408);
6006 	WREG32(RLC_LB_CNTL, 0x80000004);
6007 	mutex_unlock(&rdev->grbm_idx_mutex);
6008 
6009 	WREG32(RLC_MC_CNTL, 0);
6010 	WREG32(RLC_UCODE_CNTL, 0);
6011 
6012 	if (rdev->new_fw) {
6013 		const struct rlc_firmware_header_v1_0 *hdr =
6014 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6015 		const __le32 *fw_data = (const __le32 *)
6016 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6017 
6018 		radeon_ucode_print_rlc_hdr(&hdr->header);
6019 
6020 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6021 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6022 		for (i = 0; i < size; i++)
6023 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6024 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6025 	} else {
6026 		const __be32 *fw_data;
6027 
6028 		switch (rdev->family) {
6029 		case CHIP_BONAIRE:
6030 		case CHIP_HAWAII:
6031 		default:
6032 			size = BONAIRE_RLC_UCODE_SIZE;
6033 			break;
6034 		case CHIP_KAVERI:
6035 			size = KV_RLC_UCODE_SIZE;
6036 			break;
6037 		case CHIP_KABINI:
6038 			size = KB_RLC_UCODE_SIZE;
6039 			break;
6040 		case CHIP_MULLINS:
6041 			size = ML_RLC_UCODE_SIZE;
6042 			break;
6043 		}
6044 
6045 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6046 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6047 		for (i = 0; i < size; i++)
6048 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6049 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6050 	}
6051 
6052 	/* XXX - find out what chips support lbpw */
6053 	cik_enable_lbpw(rdev, false);
6054 
6055 	if (rdev->family == CHIP_BONAIRE)
6056 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6057 
6058 	cik_rlc_start(rdev);
6059 
6060 	return 0;
6061 }
6062 
6063 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6064 {
6065 	u32 data, orig, tmp, tmp2;
6066 
6067 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6068 
6069 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6070 		cik_enable_gui_idle_interrupt(rdev, true);
6071 
6072 		tmp = cik_halt_rlc(rdev);
6073 
6074 		mutex_lock(&rdev->grbm_idx_mutex);
6075 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6076 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6077 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6078 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6079 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6080 		mutex_unlock(&rdev->grbm_idx_mutex);
6081 
6082 		cik_update_rlc(rdev, tmp);
6083 
6084 		data |= CGCG_EN | CGLS_EN;
6085 	} else {
6086 		cik_enable_gui_idle_interrupt(rdev, false);
6087 
6088 		RREG32(CB_CGTT_SCLK_CTRL);
6089 		RREG32(CB_CGTT_SCLK_CTRL);
6090 		RREG32(CB_CGTT_SCLK_CTRL);
6091 		RREG32(CB_CGTT_SCLK_CTRL);
6092 
6093 		data &= ~(CGCG_EN | CGLS_EN);
6094 	}
6095 
6096 	if (orig != data)
6097 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6098 
6099 }
6100 
6101 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6102 {
6103 	u32 data, orig, tmp = 0;
6104 
6105 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6106 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6107 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6108 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6109 				data |= CP_MEM_LS_EN;
6110 				if (orig != data)
6111 					WREG32(CP_MEM_SLP_CNTL, data);
6112 			}
6113 		}
6114 
6115 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6116 		data |= 0x00000001;
6117 		data &= 0xfffffffd;
6118 		if (orig != data)
6119 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6120 
6121 		tmp = cik_halt_rlc(rdev);
6122 
6123 		mutex_lock(&rdev->grbm_idx_mutex);
6124 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6125 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6126 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6127 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6128 		WREG32(RLC_SERDES_WR_CTRL, data);
6129 		mutex_unlock(&rdev->grbm_idx_mutex);
6130 
6131 		cik_update_rlc(rdev, tmp);
6132 
6133 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6134 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6135 			data &= ~SM_MODE_MASK;
6136 			data |= SM_MODE(0x2);
6137 			data |= SM_MODE_ENABLE;
6138 			data &= ~CGTS_OVERRIDE;
6139 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6140 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6141 				data &= ~CGTS_LS_OVERRIDE;
6142 			data &= ~ON_MONITOR_ADD_MASK;
6143 			data |= ON_MONITOR_ADD_EN;
6144 			data |= ON_MONITOR_ADD(0x96);
6145 			if (orig != data)
6146 				WREG32(CGTS_SM_CTRL_REG, data);
6147 		}
6148 	} else {
6149 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6150 		data |= 0x00000003;
6151 		if (orig != data)
6152 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6153 
6154 		data = RREG32(RLC_MEM_SLP_CNTL);
6155 		if (data & RLC_MEM_LS_EN) {
6156 			data &= ~RLC_MEM_LS_EN;
6157 			WREG32(RLC_MEM_SLP_CNTL, data);
6158 		}
6159 
6160 		data = RREG32(CP_MEM_SLP_CNTL);
6161 		if (data & CP_MEM_LS_EN) {
6162 			data &= ~CP_MEM_LS_EN;
6163 			WREG32(CP_MEM_SLP_CNTL, data);
6164 		}
6165 
6166 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6167 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6168 		if (orig != data)
6169 			WREG32(CGTS_SM_CTRL_REG, data);
6170 
6171 		tmp = cik_halt_rlc(rdev);
6172 
6173 		mutex_lock(&rdev->grbm_idx_mutex);
6174 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6175 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6176 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6177 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6178 		WREG32(RLC_SERDES_WR_CTRL, data);
6179 		mutex_unlock(&rdev->grbm_idx_mutex);
6180 
6181 		cik_update_rlc(rdev, tmp);
6182 	}
6183 }
6184 
6185 static const u32 mc_cg_registers[] =
6186 {
6187 	MC_HUB_MISC_HUB_CG,
6188 	MC_HUB_MISC_SIP_CG,
6189 	MC_HUB_MISC_VM_CG,
6190 	MC_XPB_CLK_GAT,
6191 	ATC_MISC_CG,
6192 	MC_CITF_MISC_WR_CG,
6193 	MC_CITF_MISC_RD_CG,
6194 	MC_CITF_MISC_VM_CG,
6195 	VM_L2_CG,
6196 };
6197 
6198 static void cik_enable_mc_ls(struct radeon_device *rdev,
6199 			     bool enable)
6200 {
6201 	int i;
6202 	u32 orig, data;
6203 
6204 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6205 		orig = data = RREG32(mc_cg_registers[i]);
6206 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6207 			data |= MC_LS_ENABLE;
6208 		else
6209 			data &= ~MC_LS_ENABLE;
6210 		if (data != orig)
6211 			WREG32(mc_cg_registers[i], data);
6212 	}
6213 }
6214 
6215 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6216 			       bool enable)
6217 {
6218 	int i;
6219 	u32 orig, data;
6220 
6221 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6222 		orig = data = RREG32(mc_cg_registers[i]);
6223 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6224 			data |= MC_CG_ENABLE;
6225 		else
6226 			data &= ~MC_CG_ENABLE;
6227 		if (data != orig)
6228 			WREG32(mc_cg_registers[i], data);
6229 	}
6230 }
6231 
6232 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6233 				 bool enable)
6234 {
6235 	u32 orig, data;
6236 
6237 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6238 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6239 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6240 	} else {
6241 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6242 		data |= 0xff000000;
6243 		if (data != orig)
6244 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6245 
6246 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6247 		data |= 0xff000000;
6248 		if (data != orig)
6249 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6250 	}
6251 }
6252 
6253 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6254 				 bool enable)
6255 {
6256 	u32 orig, data;
6257 
6258 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6259 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6260 		data |= 0x100;
6261 		if (orig != data)
6262 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6263 
6264 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6265 		data |= 0x100;
6266 		if (orig != data)
6267 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6268 	} else {
6269 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6270 		data &= ~0x100;
6271 		if (orig != data)
6272 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6273 
6274 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6275 		data &= ~0x100;
6276 		if (orig != data)
6277 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6278 	}
6279 }
6280 
6281 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6282 				bool enable)
6283 {
6284 	u32 orig, data;
6285 
6286 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6287 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6288 		data = 0xfff;
6289 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6290 
6291 		orig = data = RREG32(UVD_CGC_CTRL);
6292 		data |= DCM;
6293 		if (orig != data)
6294 			WREG32(UVD_CGC_CTRL, data);
6295 	} else {
6296 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6297 		data &= ~0xfff;
6298 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6299 
6300 		orig = data = RREG32(UVD_CGC_CTRL);
6301 		data &= ~DCM;
6302 		if (orig != data)
6303 			WREG32(UVD_CGC_CTRL, data);
6304 	}
6305 }
6306 
6307 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6308 			       bool enable)
6309 {
6310 	u32 orig, data;
6311 
6312 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6313 
6314 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6315 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6316 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6317 	else
6318 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6319 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6320 
6321 	if (orig != data)
6322 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6323 }
6324 
6325 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6326 				bool enable)
6327 {
6328 	u32 orig, data;
6329 
6330 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6331 
6332 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6333 		data &= ~CLOCK_GATING_DIS;
6334 	else
6335 		data |= CLOCK_GATING_DIS;
6336 
6337 	if (orig != data)
6338 		WREG32(HDP_HOST_PATH_CNTL, data);
6339 }
6340 
6341 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6342 			      bool enable)
6343 {
6344 	u32 orig, data;
6345 
6346 	orig = data = RREG32(HDP_MEM_POWER_LS);
6347 
6348 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6349 		data |= HDP_LS_ENABLE;
6350 	else
6351 		data &= ~HDP_LS_ENABLE;
6352 
6353 	if (orig != data)
6354 		WREG32(HDP_MEM_POWER_LS, data);
6355 }
6356 
6357 void cik_update_cg(struct radeon_device *rdev,
6358 		   u32 block, bool enable)
6359 {
6360 
6361 	if (block & RADEON_CG_BLOCK_GFX) {
6362 		cik_enable_gui_idle_interrupt(rdev, false);
6363 		/* order matters! */
6364 		if (enable) {
6365 			cik_enable_mgcg(rdev, true);
6366 			cik_enable_cgcg(rdev, true);
6367 		} else {
6368 			cik_enable_cgcg(rdev, false);
6369 			cik_enable_mgcg(rdev, false);
6370 		}
6371 		cik_enable_gui_idle_interrupt(rdev, true);
6372 	}
6373 
6374 	if (block & RADEON_CG_BLOCK_MC) {
6375 		if (!(rdev->flags & RADEON_IS_IGP)) {
6376 			cik_enable_mc_mgcg(rdev, enable);
6377 			cik_enable_mc_ls(rdev, enable);
6378 		}
6379 	}
6380 
6381 	if (block & RADEON_CG_BLOCK_SDMA) {
6382 		cik_enable_sdma_mgcg(rdev, enable);
6383 		cik_enable_sdma_mgls(rdev, enable);
6384 	}
6385 
6386 	if (block & RADEON_CG_BLOCK_BIF) {
6387 		cik_enable_bif_mgls(rdev, enable);
6388 	}
6389 
6390 	if (block & RADEON_CG_BLOCK_UVD) {
6391 		if (rdev->has_uvd)
6392 			cik_enable_uvd_mgcg(rdev, enable);
6393 	}
6394 
6395 	if (block & RADEON_CG_BLOCK_HDP) {
6396 		cik_enable_hdp_mgcg(rdev, enable);
6397 		cik_enable_hdp_ls(rdev, enable);
6398 	}
6399 
6400 	if (block & RADEON_CG_BLOCK_VCE) {
6401 		vce_v2_0_enable_mgcg(rdev, enable);
6402 	}
6403 }
6404 
6405 static void cik_init_cg(struct radeon_device *rdev)
6406 {
6407 
6408 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6409 
6410 	if (rdev->has_uvd)
6411 		si_init_uvd_internal_cg(rdev);
6412 
6413 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6414 			     RADEON_CG_BLOCK_SDMA |
6415 			     RADEON_CG_BLOCK_BIF |
6416 			     RADEON_CG_BLOCK_UVD |
6417 			     RADEON_CG_BLOCK_HDP), true);
6418 }
6419 
6420 static void cik_fini_cg(struct radeon_device *rdev)
6421 {
6422 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6423 			     RADEON_CG_BLOCK_SDMA |
6424 			     RADEON_CG_BLOCK_BIF |
6425 			     RADEON_CG_BLOCK_UVD |
6426 			     RADEON_CG_BLOCK_HDP), false);
6427 
6428 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6429 }
6430 
6431 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6432 					  bool enable)
6433 {
6434 	u32 data, orig;
6435 
6436 	orig = data = RREG32(RLC_PG_CNTL);
6437 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6438 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6439 	else
6440 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6441 	if (orig != data)
6442 		WREG32(RLC_PG_CNTL, data);
6443 }
6444 
6445 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6446 					  bool enable)
6447 {
6448 	u32 data, orig;
6449 
6450 	orig = data = RREG32(RLC_PG_CNTL);
6451 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6452 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6453 	else
6454 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6455 	if (orig != data)
6456 		WREG32(RLC_PG_CNTL, data);
6457 }
6458 
6459 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6460 {
6461 	u32 data, orig;
6462 
6463 	orig = data = RREG32(RLC_PG_CNTL);
6464 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6465 		data &= ~DISABLE_CP_PG;
6466 	else
6467 		data |= DISABLE_CP_PG;
6468 	if (orig != data)
6469 		WREG32(RLC_PG_CNTL, data);
6470 }
6471 
6472 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6473 {
6474 	u32 data, orig;
6475 
6476 	orig = data = RREG32(RLC_PG_CNTL);
6477 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6478 		data &= ~DISABLE_GDS_PG;
6479 	else
6480 		data |= DISABLE_GDS_PG;
6481 	if (orig != data)
6482 		WREG32(RLC_PG_CNTL, data);
6483 }
6484 
6485 #define CP_ME_TABLE_SIZE    96
6486 #define CP_ME_TABLE_OFFSET  2048
6487 #define CP_MEC_TABLE_OFFSET 4096
6488 
6489 void cik_init_cp_pg_table(struct radeon_device *rdev)
6490 {
6491 	volatile u32 *dst_ptr;
6492 	int me, i, max_me = 4;
6493 	u32 bo_offset = 0;
6494 	u32 table_offset, table_size;
6495 
6496 	if (rdev->family == CHIP_KAVERI)
6497 		max_me = 5;
6498 
6499 	if (rdev->rlc.cp_table_ptr == NULL)
6500 		return;
6501 
6502 	/* write the cp table buffer */
6503 	dst_ptr = rdev->rlc.cp_table_ptr;
6504 	for (me = 0; me < max_me; me++) {
6505 		if (rdev->new_fw) {
6506 			const __le32 *fw_data;
6507 			const struct gfx_firmware_header_v1_0 *hdr;
6508 
6509 			if (me == 0) {
6510 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6511 				fw_data = (const __le32 *)
6512 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6513 				table_offset = le32_to_cpu(hdr->jt_offset);
6514 				table_size = le32_to_cpu(hdr->jt_size);
6515 			} else if (me == 1) {
6516 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6517 				fw_data = (const __le32 *)
6518 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6519 				table_offset = le32_to_cpu(hdr->jt_offset);
6520 				table_size = le32_to_cpu(hdr->jt_size);
6521 			} else if (me == 2) {
6522 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6523 				fw_data = (const __le32 *)
6524 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6525 				table_offset = le32_to_cpu(hdr->jt_offset);
6526 				table_size = le32_to_cpu(hdr->jt_size);
6527 			} else if (me == 3) {
6528 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6529 				fw_data = (const __le32 *)
6530 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6531 				table_offset = le32_to_cpu(hdr->jt_offset);
6532 				table_size = le32_to_cpu(hdr->jt_size);
6533 			} else {
6534 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6535 				fw_data = (const __le32 *)
6536 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6537 				table_offset = le32_to_cpu(hdr->jt_offset);
6538 				table_size = le32_to_cpu(hdr->jt_size);
6539 			}
6540 
6541 			for (i = 0; i < table_size; i ++) {
6542 				dst_ptr[bo_offset + i] =
6543 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6544 			}
6545 			bo_offset += table_size;
6546 		} else {
6547 			const __be32 *fw_data;
6548 			table_size = CP_ME_TABLE_SIZE;
6549 
6550 			if (me == 0) {
6551 				fw_data = (const __be32 *)rdev->ce_fw->data;
6552 				table_offset = CP_ME_TABLE_OFFSET;
6553 			} else if (me == 1) {
6554 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6555 				table_offset = CP_ME_TABLE_OFFSET;
6556 			} else if (me == 2) {
6557 				fw_data = (const __be32 *)rdev->me_fw->data;
6558 				table_offset = CP_ME_TABLE_OFFSET;
6559 			} else {
6560 				fw_data = (const __be32 *)rdev->mec_fw->data;
6561 				table_offset = CP_MEC_TABLE_OFFSET;
6562 			}
6563 
6564 			for (i = 0; i < table_size; i ++) {
6565 				dst_ptr[bo_offset + i] =
6566 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6567 			}
6568 			bo_offset += table_size;
6569 		}
6570 	}
6571 }
6572 
6573 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6574 				bool enable)
6575 {
6576 	u32 data, orig;
6577 
6578 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6579 		orig = data = RREG32(RLC_PG_CNTL);
6580 		data |= GFX_PG_ENABLE;
6581 		if (orig != data)
6582 			WREG32(RLC_PG_CNTL, data);
6583 
6584 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6585 		data |= AUTO_PG_EN;
6586 		if (orig != data)
6587 			WREG32(RLC_AUTO_PG_CTRL, data);
6588 	} else {
6589 		orig = data = RREG32(RLC_PG_CNTL);
6590 		data &= ~GFX_PG_ENABLE;
6591 		if (orig != data)
6592 			WREG32(RLC_PG_CNTL, data);
6593 
6594 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6595 		data &= ~AUTO_PG_EN;
6596 		if (orig != data)
6597 			WREG32(RLC_AUTO_PG_CTRL, data);
6598 
6599 		data = RREG32(DB_RENDER_CONTROL);
6600 	}
6601 }
6602 
6603 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6604 {
6605 	u32 mask = 0, tmp, tmp1;
6606 	int i;
6607 
6608 	mutex_lock(&rdev->grbm_idx_mutex);
6609 	cik_select_se_sh(rdev, se, sh);
6610 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6611 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6612 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6613 	mutex_unlock(&rdev->grbm_idx_mutex);
6614 
6615 	tmp &= 0xffff0000;
6616 
6617 	tmp |= tmp1;
6618 	tmp >>= 16;
6619 
6620 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6621 		mask <<= 1;
6622 		mask |= 1;
6623 	}
6624 
6625 	return (~tmp) & mask;
6626 }
6627 
6628 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6629 {
6630 	u32 i, j, k, active_cu_number = 0;
6631 	u32 mask, counter, cu_bitmap;
6632 	u32 tmp = 0;
6633 
6634 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6635 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6636 			mask = 1;
6637 			cu_bitmap = 0;
6638 			counter = 0;
6639 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6640 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6641 					if (counter < 2)
6642 						cu_bitmap |= mask;
6643 					counter ++;
6644 				}
6645 				mask <<= 1;
6646 			}
6647 
6648 			active_cu_number += counter;
6649 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6650 		}
6651 	}
6652 
6653 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6654 
6655 	tmp = RREG32(RLC_MAX_PG_CU);
6656 	tmp &= ~MAX_PU_CU_MASK;
6657 	tmp |= MAX_PU_CU(active_cu_number);
6658 	WREG32(RLC_MAX_PG_CU, tmp);
6659 }
6660 
6661 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6662 				       bool enable)
6663 {
6664 	u32 data, orig;
6665 
6666 	orig = data = RREG32(RLC_PG_CNTL);
6667 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6668 		data |= STATIC_PER_CU_PG_ENABLE;
6669 	else
6670 		data &= ~STATIC_PER_CU_PG_ENABLE;
6671 	if (orig != data)
6672 		WREG32(RLC_PG_CNTL, data);
6673 }
6674 
6675 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6676 					bool enable)
6677 {
6678 	u32 data, orig;
6679 
6680 	orig = data = RREG32(RLC_PG_CNTL);
6681 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6682 		data |= DYN_PER_CU_PG_ENABLE;
6683 	else
6684 		data &= ~DYN_PER_CU_PG_ENABLE;
6685 	if (orig != data)
6686 		WREG32(RLC_PG_CNTL, data);
6687 }
6688 
6689 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6690 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6691 
6692 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6693 {
6694 	u32 data, orig;
6695 	u32 i;
6696 
6697 	if (rdev->rlc.cs_data) {
6698 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6699 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6700 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6701 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6702 	} else {
6703 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6704 		for (i = 0; i < 3; i++)
6705 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6706 	}
6707 	if (rdev->rlc.reg_list) {
6708 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6709 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6710 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6711 	}
6712 
6713 	orig = data = RREG32(RLC_PG_CNTL);
6714 	data |= GFX_PG_SRC;
6715 	if (orig != data)
6716 		WREG32(RLC_PG_CNTL, data);
6717 
6718 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6719 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6720 
6721 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6722 	data &= ~IDLE_POLL_COUNT_MASK;
6723 	data |= IDLE_POLL_COUNT(0x60);
6724 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6725 
6726 	data = 0x10101010;
6727 	WREG32(RLC_PG_DELAY, data);
6728 
6729 	data = RREG32(RLC_PG_DELAY_2);
6730 	data &= ~0xff;
6731 	data |= 0x3;
6732 	WREG32(RLC_PG_DELAY_2, data);
6733 
6734 	data = RREG32(RLC_AUTO_PG_CTRL);
6735 	data &= ~GRBM_REG_SGIT_MASK;
6736 	data |= GRBM_REG_SGIT(0x700);
6737 	WREG32(RLC_AUTO_PG_CTRL, data);
6738 
6739 }
6740 
6741 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6742 {
6743 	cik_enable_gfx_cgpg(rdev, enable);
6744 	cik_enable_gfx_static_mgpg(rdev, enable);
6745 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6746 }
6747 
6748 u32 cik_get_csb_size(struct radeon_device *rdev)
6749 {
6750 	u32 count = 0;
6751 	const struct cs_section_def *sect = NULL;
6752 	const struct cs_extent_def *ext = NULL;
6753 
6754 	if (rdev->rlc.cs_data == NULL)
6755 		return 0;
6756 
6757 	/* begin clear state */
6758 	count += 2;
6759 	/* context control state */
6760 	count += 3;
6761 
6762 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6763 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6764 			if (sect->id == SECT_CONTEXT)
6765 				count += 2 + ext->reg_count;
6766 			else
6767 				return 0;
6768 		}
6769 	}
6770 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6771 	count += 4;
6772 	/* end clear state */
6773 	count += 2;
6774 	/* clear state */
6775 	count += 2;
6776 
6777 	return count;
6778 }
6779 
6780 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6781 {
6782 	u32 count = 0, i;
6783 	const struct cs_section_def *sect = NULL;
6784 	const struct cs_extent_def *ext = NULL;
6785 
6786 	if (rdev->rlc.cs_data == NULL)
6787 		return;
6788 	if (buffer == NULL)
6789 		return;
6790 
6791 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6792 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6793 
6794 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6795 	buffer[count++] = cpu_to_le32(0x80000000);
6796 	buffer[count++] = cpu_to_le32(0x80000000);
6797 
6798 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6799 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6800 			if (sect->id == SECT_CONTEXT) {
6801 				buffer[count++] =
6802 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6803 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6804 				for (i = 0; i < ext->reg_count; i++)
6805 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6806 			} else {
6807 				return;
6808 			}
6809 		}
6810 	}
6811 
6812 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6813 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6814 	switch (rdev->family) {
6815 	case CHIP_BONAIRE:
6816 		buffer[count++] = cpu_to_le32(0x16000012);
6817 		buffer[count++] = cpu_to_le32(0x00000000);
6818 		break;
6819 	case CHIP_KAVERI:
6820 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6821 		buffer[count++] = cpu_to_le32(0x00000000);
6822 		break;
6823 	case CHIP_KABINI:
6824 	case CHIP_MULLINS:
6825 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6826 		buffer[count++] = cpu_to_le32(0x00000000);
6827 		break;
6828 	case CHIP_HAWAII:
6829 		buffer[count++] = cpu_to_le32(0x3a00161a);
6830 		buffer[count++] = cpu_to_le32(0x0000002e);
6831 		break;
6832 	default:
6833 		buffer[count++] = cpu_to_le32(0x00000000);
6834 		buffer[count++] = cpu_to_le32(0x00000000);
6835 		break;
6836 	}
6837 
6838 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6839 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6840 
6841 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6842 	buffer[count++] = cpu_to_le32(0);
6843 }
6844 
6845 static void cik_init_pg(struct radeon_device *rdev)
6846 {
6847 	if (rdev->pg_flags) {
6848 		cik_enable_sck_slowdown_on_pu(rdev, true);
6849 		cik_enable_sck_slowdown_on_pd(rdev, true);
6850 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6851 			cik_init_gfx_cgpg(rdev);
6852 			cik_enable_cp_pg(rdev, true);
6853 			cik_enable_gds_pg(rdev, true);
6854 		}
6855 		cik_init_ao_cu_mask(rdev);
6856 		cik_update_gfx_pg(rdev, true);
6857 	}
6858 }
6859 
6860 static void cik_fini_pg(struct radeon_device *rdev)
6861 {
6862 	if (rdev->pg_flags) {
6863 		cik_update_gfx_pg(rdev, false);
6864 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6865 			cik_enable_cp_pg(rdev, false);
6866 			cik_enable_gds_pg(rdev, false);
6867 		}
6868 	}
6869 }
6870 
6871 /*
6872  * Interrupts
6873  * Starting with r6xx, interrupts are handled via a ring buffer.
6874  * Ring buffers are areas of GPU accessible memory that the GPU
6875  * writes interrupt vectors into and the host reads vectors out of.
6876  * There is a rptr (read pointer) that determines where the
6877  * host is currently reading, and a wptr (write pointer)
6878  * which determines where the GPU has written.  When the
6879  * pointers are equal, the ring is idle.  When the GPU
6880  * writes vectors to the ring buffer, it increments the
6881  * wptr.  When there is an interrupt, the host then starts
6882  * fetching commands and processing them until the pointers are
6883  * equal again at which point it updates the rptr.
6884  */
6885 
6886 /**
6887  * cik_enable_interrupts - Enable the interrupt ring buffer
6888  *
6889  * @rdev: radeon_device pointer
6890  *
6891  * Enable the interrupt ring buffer (CIK).
6892  */
6893 static void cik_enable_interrupts(struct radeon_device *rdev)
6894 {
6895 	u32 ih_cntl = RREG32(IH_CNTL);
6896 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6897 
6898 	ih_cntl |= ENABLE_INTR;
6899 	ih_rb_cntl |= IH_RB_ENABLE;
6900 	WREG32(IH_CNTL, ih_cntl);
6901 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6902 	rdev->ih.enabled = true;
6903 }
6904 
6905 /**
6906  * cik_disable_interrupts - Disable the interrupt ring buffer
6907  *
6908  * @rdev: radeon_device pointer
6909  *
6910  * Disable the interrupt ring buffer (CIK).
6911  */
6912 static void cik_disable_interrupts(struct radeon_device *rdev)
6913 {
6914 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6915 	u32 ih_cntl = RREG32(IH_CNTL);
6916 
6917 	ih_rb_cntl &= ~IH_RB_ENABLE;
6918 	ih_cntl &= ~ENABLE_INTR;
6919 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6920 	WREG32(IH_CNTL, ih_cntl);
6921 	/* set rptr, wptr to 0 */
6922 	WREG32(IH_RB_RPTR, 0);
6923 	WREG32(IH_RB_WPTR, 0);
6924 	rdev->ih.enabled = false;
6925 	rdev->ih.rptr = 0;
6926 }
6927 
6928 /**
6929  * cik_disable_interrupt_state - Disable all interrupt sources
6930  *
6931  * @rdev: radeon_device pointer
6932  *
6933  * Clear all interrupt enable bits used by the driver (CIK).
6934  */
6935 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6936 {
6937 	u32 tmp;
6938 
6939 	/* gfx ring */
6940 	tmp = RREG32(CP_INT_CNTL_RING0) &
6941 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6942 	WREG32(CP_INT_CNTL_RING0, tmp);
6943 	/* sdma */
6944 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6945 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6946 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6947 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6948 	/* compute queues */
6949 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6950 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6951 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6952 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6953 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6954 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6955 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6956 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6957 	/* grbm */
6958 	WREG32(GRBM_INT_CNTL, 0);
6959 	/* SRBM */
6960 	WREG32(SRBM_INT_CNTL, 0);
6961 	/* vline/vblank, etc. */
6962 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6963 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6964 	if (rdev->num_crtc >= 4) {
6965 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6966 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6967 	}
6968 	if (rdev->num_crtc >= 6) {
6969 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6970 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6971 	}
6972 	/* pflip */
6973 	if (rdev->num_crtc >= 2) {
6974 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6975 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6976 	}
6977 	if (rdev->num_crtc >= 4) {
6978 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6979 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6980 	}
6981 	if (rdev->num_crtc >= 6) {
6982 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6983 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6984 	}
6985 
6986 	/* dac hotplug */
6987 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6988 
6989 	/* digital hotplug */
6990 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6991 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6992 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6993 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6994 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6995 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6996 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6997 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6998 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6999 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7000 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7001 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7002 
7003 }
7004 
7005 /**
7006  * cik_irq_init - init and enable the interrupt ring
7007  *
7008  * @rdev: radeon_device pointer
7009  *
7010  * Allocate a ring buffer for the interrupt controller,
7011  * enable the RLC, disable interrupts, enable the IH
7012  * ring buffer and enable it (CIK).
7013  * Called at device load and reume.
7014  * Returns 0 for success, errors for failure.
7015  */
7016 static int cik_irq_init(struct radeon_device *rdev)
7017 {
7018 	int ret = 0;
7019 	int rb_bufsz;
7020 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7021 
7022 	/* allocate ring */
7023 	ret = r600_ih_ring_alloc(rdev);
7024 	if (ret)
7025 		return ret;
7026 
7027 	/* disable irqs */
7028 	cik_disable_interrupts(rdev);
7029 
7030 	/* init rlc */
7031 	ret = cik_rlc_resume(rdev);
7032 	if (ret) {
7033 		r600_ih_ring_fini(rdev);
7034 		return ret;
7035 	}
7036 
7037 	/* setup interrupt control */
7038 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7039 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7040 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7041 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7042 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7043 	 */
7044 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7045 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7046 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7047 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7048 
7049 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7050 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7051 
7052 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7053 		      IH_WPTR_OVERFLOW_CLEAR |
7054 		      (rb_bufsz << 1));
7055 
7056 	if (rdev->wb.enabled)
7057 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7058 
7059 	/* set the writeback address whether it's enabled or not */
7060 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7061 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7062 
7063 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7064 
7065 	/* set rptr, wptr to 0 */
7066 	WREG32(IH_RB_RPTR, 0);
7067 	WREG32(IH_RB_WPTR, 0);
7068 
7069 	/* Default settings for IH_CNTL (disabled at first) */
7070 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7071 	/* RPTR_REARM only works if msi's are enabled */
7072 	if (rdev->msi_enabled)
7073 		ih_cntl |= RPTR_REARM;
7074 	WREG32(IH_CNTL, ih_cntl);
7075 
7076 	/* force the active interrupt state to all disabled */
7077 	cik_disable_interrupt_state(rdev);
7078 
7079 	pci_set_master(rdev->pdev);
7080 
7081 	/* enable irqs */
7082 	cik_enable_interrupts(rdev);
7083 
7084 	return ret;
7085 }
7086 
7087 /**
7088  * cik_irq_set - enable/disable interrupt sources
7089  *
7090  * @rdev: radeon_device pointer
7091  *
7092  * Enable interrupt sources on the GPU (vblanks, hpd,
7093  * etc.) (CIK).
7094  * Returns 0 for success, errors for failure.
7095  */
7096 int cik_irq_set(struct radeon_device *rdev)
7097 {
7098 	u32 cp_int_cntl;
7099 	u32 cp_m1p0;
7100 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7101 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7102 	u32 grbm_int_cntl = 0;
7103 	u32 dma_cntl, dma_cntl1;
7104 
7105 	if (!rdev->irq.installed) {
7106 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7107 		return -EINVAL;
7108 	}
7109 	/* don't enable anything if the ih is disabled */
7110 	if (!rdev->ih.enabled) {
7111 		cik_disable_interrupts(rdev);
7112 		/* force the active interrupt state to all disabled */
7113 		cik_disable_interrupt_state(rdev);
7114 		return 0;
7115 	}
7116 
7117 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7118 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7119 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7120 
7121 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7122 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7123 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7124 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7125 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7126 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7127 
7128 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7129 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7130 
7131 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7132 
7133 	/* enable CP interrupts on all rings */
7134 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7135 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7136 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7137 	}
7138 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7139 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7140 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7141 		if (ring->me == 1) {
7142 			switch (ring->pipe) {
7143 			case 0:
7144 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7145 				break;
7146 			default:
7147 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7148 				break;
7149 			}
7150 		} else {
7151 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7152 		}
7153 	}
7154 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7155 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7156 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7157 		if (ring->me == 1) {
7158 			switch (ring->pipe) {
7159 			case 0:
7160 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7161 				break;
7162 			default:
7163 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7164 				break;
7165 			}
7166 		} else {
7167 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7168 		}
7169 	}
7170 
7171 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7172 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7173 		dma_cntl |= TRAP_ENABLE;
7174 	}
7175 
7176 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7177 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7178 		dma_cntl1 |= TRAP_ENABLE;
7179 	}
7180 
7181 	if (rdev->irq.crtc_vblank_int[0] ||
7182 	    atomic_read(&rdev->irq.pflip[0])) {
7183 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7184 		crtc1 |= VBLANK_INTERRUPT_MASK;
7185 	}
7186 	if (rdev->irq.crtc_vblank_int[1] ||
7187 	    atomic_read(&rdev->irq.pflip[1])) {
7188 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7189 		crtc2 |= VBLANK_INTERRUPT_MASK;
7190 	}
7191 	if (rdev->irq.crtc_vblank_int[2] ||
7192 	    atomic_read(&rdev->irq.pflip[2])) {
7193 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7194 		crtc3 |= VBLANK_INTERRUPT_MASK;
7195 	}
7196 	if (rdev->irq.crtc_vblank_int[3] ||
7197 	    atomic_read(&rdev->irq.pflip[3])) {
7198 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7199 		crtc4 |= VBLANK_INTERRUPT_MASK;
7200 	}
7201 	if (rdev->irq.crtc_vblank_int[4] ||
7202 	    atomic_read(&rdev->irq.pflip[4])) {
7203 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7204 		crtc5 |= VBLANK_INTERRUPT_MASK;
7205 	}
7206 	if (rdev->irq.crtc_vblank_int[5] ||
7207 	    atomic_read(&rdev->irq.pflip[5])) {
7208 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7209 		crtc6 |= VBLANK_INTERRUPT_MASK;
7210 	}
7211 	if (rdev->irq.hpd[0]) {
7212 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7213 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7214 	}
7215 	if (rdev->irq.hpd[1]) {
7216 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7217 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218 	}
7219 	if (rdev->irq.hpd[2]) {
7220 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7221 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222 	}
7223 	if (rdev->irq.hpd[3]) {
7224 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7225 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226 	}
7227 	if (rdev->irq.hpd[4]) {
7228 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7229 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7230 	}
7231 	if (rdev->irq.hpd[5]) {
7232 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7233 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7234 	}
7235 
7236 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7237 
7238 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7239 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7240 
7241 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7242 
7243 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7244 
7245 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7246 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7247 	if (rdev->num_crtc >= 4) {
7248 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7249 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7250 	}
7251 	if (rdev->num_crtc >= 6) {
7252 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7253 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7254 	}
7255 
7256 	if (rdev->num_crtc >= 2) {
7257 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7258 		       GRPH_PFLIP_INT_MASK);
7259 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7260 		       GRPH_PFLIP_INT_MASK);
7261 	}
7262 	if (rdev->num_crtc >= 4) {
7263 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7264 		       GRPH_PFLIP_INT_MASK);
7265 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7266 		       GRPH_PFLIP_INT_MASK);
7267 	}
7268 	if (rdev->num_crtc >= 6) {
7269 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7270 		       GRPH_PFLIP_INT_MASK);
7271 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7272 		       GRPH_PFLIP_INT_MASK);
7273 	}
7274 
7275 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7276 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7277 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7278 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7279 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7280 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7281 
7282 	/* posting read */
7283 	RREG32(SRBM_STATUS);
7284 
7285 	return 0;
7286 }
7287 
7288 /**
7289  * cik_irq_ack - ack interrupt sources
7290  *
7291  * @rdev: radeon_device pointer
7292  *
7293  * Ack interrupt sources on the GPU (vblanks, hpd,
7294  * etc.) (CIK).  Certain interrupts sources are sw
7295  * generated and do not require an explicit ack.
7296  */
7297 static inline void cik_irq_ack(struct radeon_device *rdev)
7298 {
7299 	u32 tmp;
7300 
7301 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7302 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7303 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7304 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7305 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7306 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7307 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7308 
7309 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7310 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7311 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7312 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7313 	if (rdev->num_crtc >= 4) {
7314 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7315 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7316 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7317 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7318 	}
7319 	if (rdev->num_crtc >= 6) {
7320 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7321 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7322 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7323 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7324 	}
7325 
7326 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7327 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7328 		       GRPH_PFLIP_INT_CLEAR);
7329 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7330 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7331 		       GRPH_PFLIP_INT_CLEAR);
7332 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7333 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7334 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7335 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7336 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7337 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7338 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7339 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7340 
7341 	if (rdev->num_crtc >= 4) {
7342 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7343 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7344 			       GRPH_PFLIP_INT_CLEAR);
7345 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7346 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7347 			       GRPH_PFLIP_INT_CLEAR);
7348 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7349 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7350 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7351 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7352 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7353 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7354 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7355 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7356 	}
7357 
7358 	if (rdev->num_crtc >= 6) {
7359 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7360 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7361 			       GRPH_PFLIP_INT_CLEAR);
7362 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7363 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7364 			       GRPH_PFLIP_INT_CLEAR);
7365 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7366 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7367 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7368 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7369 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7370 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7371 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7372 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7373 	}
7374 
7375 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7376 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7377 		tmp |= DC_HPDx_INT_ACK;
7378 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7379 	}
7380 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7381 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7382 		tmp |= DC_HPDx_INT_ACK;
7383 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7384 	}
7385 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7386 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7387 		tmp |= DC_HPDx_INT_ACK;
7388 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7389 	}
7390 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7391 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7392 		tmp |= DC_HPDx_INT_ACK;
7393 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7394 	}
7395 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7396 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7397 		tmp |= DC_HPDx_INT_ACK;
7398 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7399 	}
7400 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7401 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7402 		tmp |= DC_HPDx_INT_ACK;
7403 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7404 	}
7405 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7406 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7407 		tmp |= DC_HPDx_RX_INT_ACK;
7408 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7409 	}
7410 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7411 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7412 		tmp |= DC_HPDx_RX_INT_ACK;
7413 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7414 	}
7415 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7416 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7417 		tmp |= DC_HPDx_RX_INT_ACK;
7418 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7419 	}
7420 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7421 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7422 		tmp |= DC_HPDx_RX_INT_ACK;
7423 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7424 	}
7425 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7426 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7427 		tmp |= DC_HPDx_RX_INT_ACK;
7428 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7429 	}
7430 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7431 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7432 		tmp |= DC_HPDx_RX_INT_ACK;
7433 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7434 	}
7435 }
7436 
7437 /**
7438  * cik_irq_disable - disable interrupts
7439  *
7440  * @rdev: radeon_device pointer
7441  *
7442  * Disable interrupts on the hw (CIK).
7443  */
7444 static void cik_irq_disable(struct radeon_device *rdev)
7445 {
7446 	cik_disable_interrupts(rdev);
7447 	/* Wait and acknowledge irq */
7448 	mdelay(1);
7449 	cik_irq_ack(rdev);
7450 	cik_disable_interrupt_state(rdev);
7451 }
7452 
7453 /**
7454  * cik_irq_disable - disable interrupts for suspend
7455  *
7456  * @rdev: radeon_device pointer
7457  *
7458  * Disable interrupts and stop the RLC (CIK).
7459  * Used for suspend.
7460  */
7461 static void cik_irq_suspend(struct radeon_device *rdev)
7462 {
7463 	cik_irq_disable(rdev);
7464 	cik_rlc_stop(rdev);
7465 }
7466 
7467 /**
7468  * cik_irq_fini - tear down interrupt support
7469  *
7470  * @rdev: radeon_device pointer
7471  *
7472  * Disable interrupts on the hw and free the IH ring
7473  * buffer (CIK).
7474  * Used for driver unload.
7475  */
7476 static void cik_irq_fini(struct radeon_device *rdev)
7477 {
7478 	cik_irq_suspend(rdev);
7479 	r600_ih_ring_fini(rdev);
7480 }
7481 
7482 /**
7483  * cik_get_ih_wptr - get the IH ring buffer wptr
7484  *
7485  * @rdev: radeon_device pointer
7486  *
7487  * Get the IH ring buffer wptr from either the register
7488  * or the writeback memory buffer (CIK).  Also check for
7489  * ring buffer overflow and deal with it.
7490  * Used by cik_irq_process().
7491  * Returns the value of the wptr.
7492  */
7493 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7494 {
7495 	u32 wptr, tmp;
7496 
7497 	if (rdev->wb.enabled)
7498 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7499 	else
7500 		wptr = RREG32(IH_RB_WPTR);
7501 
7502 	if (wptr & RB_OVERFLOW) {
7503 		wptr &= ~RB_OVERFLOW;
7504 		/* When a ring buffer overflow happen start parsing interrupt
7505 		 * from the last not overwritten vector (wptr + 16). Hopefully
7506 		 * this should allow us to catchup.
7507 		 */
7508 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7509 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7510 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7511 		tmp = RREG32(IH_RB_CNTL);
7512 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7513 		WREG32(IH_RB_CNTL, tmp);
7514 	}
7515 	return (wptr & rdev->ih.ptr_mask);
7516 }
7517 
7518 /*        CIK IV Ring
7519  * Each IV ring entry is 128 bits:
7520  * [7:0]    - interrupt source id
7521  * [31:8]   - reserved
7522  * [59:32]  - interrupt source data
7523  * [63:60]  - reserved
7524  * [71:64]  - RINGID
7525  *            CP:
7526  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7527  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7528  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7529  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7530  *            PIPE_ID - ME0 0=3D
7531  *                    - ME1&2 compute dispatcher (4 pipes each)
7532  *            SDMA:
7533  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7534  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7535  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7536  * [79:72]  - VMID
7537  * [95:80]  - PASID
7538  * [127:96] - reserved
7539  */
7540 /**
7541  * cik_irq_process - interrupt handler
7542  *
7543  * @rdev: radeon_device pointer
7544  *
7545  * Interrupt hander (CIK).  Walk the IH ring,
7546  * ack interrupts and schedule work to handle
7547  * interrupt events.
7548  * Returns irq process return code.
7549  */
7550 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7551 {
7552 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7553 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7554 	u32 wptr;
7555 	u32 rptr;
7556 	u32 src_id, src_data, ring_id;
7557 	u8 me_id, pipe_id, queue_id;
7558 	u32 ring_index;
7559 	bool queue_hotplug = false;
7560 	bool queue_dp = false;
7561 	bool queue_reset = false;
7562 	u32 addr, status, mc_client;
7563 	bool queue_thermal = false;
7564 
7565 	if (!rdev->ih.enabled || rdev->shutdown)
7566 		return IRQ_NONE;
7567 
7568 	wptr = cik_get_ih_wptr(rdev);
7569 
7570 restart_ih:
7571 	/* is somebody else already processing irqs? */
7572 	if (atomic_xchg(&rdev->ih.lock, 1))
7573 		return IRQ_NONE;
7574 
7575 	rptr = rdev->ih.rptr;
7576 	DRM_DEBUG_VBLANK("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7577 
7578 	/* Order reading of wptr vs. reading of IH ring data */
7579 	rmb();
7580 
7581 	/* display interrupts */
7582 	cik_irq_ack(rdev);
7583 
7584 	while (rptr != wptr) {
7585 		/* wptr/rptr are in bytes! */
7586 		ring_index = rptr / 4;
7587 
7588 #pragma GCC diagnostic push
7589 #pragma GCC diagnostic ignored "-Wcast-qual"
7590 		radeon_kfd_interrupt(rdev,
7591 				(const void *) &rdev->ih.ring[ring_index]);
7592 #pragma GCC diagnostic pop
7593 
7594 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7595 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7596 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7597 
7598 		switch (src_id) {
7599 		case 1: /* D1 vblank/vline */
7600 			switch (src_data) {
7601 			case 0: /* D1 vblank */
7602 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7603 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7604 
7605 				if (rdev->irq.crtc_vblank_int[0]) {
7606 					drm_handle_vblank(rdev->ddev, 0);
7607 					rdev->pm.vblank_sync = true;
7608 					wake_up(&rdev->irq.vblank_queue);
7609 				}
7610 				if (atomic_read(&rdev->irq.pflip[0]))
7611 					radeon_crtc_handle_vblank(rdev, 0);
7612 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7613 				DRM_DEBUG_VBLANK("IH: D1 vblank\n");
7614 
7615 				break;
7616 			case 1: /* D1 vline */
7617 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7618 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7619 
7620 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7621 				DRM_DEBUG_VBLANK("IH: D1 vline\n");
7622 
7623 				break;
7624 			default:
7625 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7626 				break;
7627 			}
7628 			break;
7629 		case 2: /* D2 vblank/vline */
7630 			switch (src_data) {
7631 			case 0: /* D2 vblank */
7632 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7633 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7634 
7635 				if (rdev->irq.crtc_vblank_int[1]) {
7636 					drm_handle_vblank(rdev->ddev, 1);
7637 					rdev->pm.vblank_sync = true;
7638 					wake_up(&rdev->irq.vblank_queue);
7639 				}
7640 				if (atomic_read(&rdev->irq.pflip[1]))
7641 					radeon_crtc_handle_vblank(rdev, 1);
7642 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7643 				DRM_DEBUG_VBLANK("IH: D2 vblank\n");
7644 
7645 				break;
7646 			case 1: /* D2 vline */
7647 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7648 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7649 
7650 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7651 				DRM_DEBUG_VBLANK("IH: D2 vline\n");
7652 
7653 				break;
7654 			default:
7655 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7656 				break;
7657 			}
7658 			break;
7659 		case 3: /* D3 vblank/vline */
7660 			switch (src_data) {
7661 			case 0: /* D3 vblank */
7662 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7663 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7664 
7665 				if (rdev->irq.crtc_vblank_int[2]) {
7666 					drm_handle_vblank(rdev->ddev, 2);
7667 					rdev->pm.vblank_sync = true;
7668 					wake_up(&rdev->irq.vblank_queue);
7669 				}
7670 				if (atomic_read(&rdev->irq.pflip[2]))
7671 					radeon_crtc_handle_vblank(rdev, 2);
7672 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7673 				DRM_DEBUG_VBLANK("IH: D3 vblank\n");
7674 
7675 				break;
7676 			case 1: /* D3 vline */
7677 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7678 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7679 
7680 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7681 				DRM_DEBUG_VBLANK("IH: D3 vline\n");
7682 
7683 				break;
7684 			default:
7685 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7686 				break;
7687 			}
7688 			break;
7689 		case 4: /* D4 vblank/vline */
7690 			switch (src_data) {
7691 			case 0: /* D4 vblank */
7692 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7693 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7694 
7695 				if (rdev->irq.crtc_vblank_int[3]) {
7696 					drm_handle_vblank(rdev->ddev, 3);
7697 					rdev->pm.vblank_sync = true;
7698 					wake_up(&rdev->irq.vblank_queue);
7699 				}
7700 				if (atomic_read(&rdev->irq.pflip[3]))
7701 					radeon_crtc_handle_vblank(rdev, 3);
7702 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7703 				DRM_DEBUG_VBLANK("IH: D4 vblank\n");
7704 
7705 				break;
7706 			case 1: /* D4 vline */
7707 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7708 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7709 
7710 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7711 				DRM_DEBUG_VBLANK("IH: D4 vline\n");
7712 
7713 				break;
7714 			default:
7715 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7716 				break;
7717 			}
7718 			break;
7719 		case 5: /* D5 vblank/vline */
7720 			switch (src_data) {
7721 			case 0: /* D5 vblank */
7722 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7723 					DRM_DEBUG_VBLANK("IH: IH event w/o asserted irq bit?\n");
7724 
7725 				if (rdev->irq.crtc_vblank_int[4]) {
7726 					drm_handle_vblank(rdev->ddev, 4);
7727 					rdev->pm.vblank_sync = true;
7728 					wake_up(&rdev->irq.vblank_queue);
7729 				}
7730 				if (atomic_read(&rdev->irq.pflip[4]))
7731 					radeon_crtc_handle_vblank(rdev, 4);
7732 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7733 				DRM_DEBUG_VBLANK("IH: D5 vblank\n");
7734 
7735 				break;
7736 			case 1: /* D5 vline */
7737 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7738 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7739 
7740 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7741 				DRM_DEBUG("IH: D5 vline\n");
7742 
7743 				break;
7744 			default:
7745 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7746 				break;
7747 			}
7748 			break;
7749 		case 6: /* D6 vblank/vline */
7750 			switch (src_data) {
7751 			case 0: /* D6 vblank */
7752 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7753 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7754 
7755 				if (rdev->irq.crtc_vblank_int[5]) {
7756 					drm_handle_vblank(rdev->ddev, 5);
7757 					rdev->pm.vblank_sync = true;
7758 					wake_up(&rdev->irq.vblank_queue);
7759 				}
7760 				if (atomic_read(&rdev->irq.pflip[5]))
7761 					radeon_crtc_handle_vblank(rdev, 5);
7762 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7763 				DRM_DEBUG_VBLANK("IH: D6 vblank\n");
7764 
7765 				break;
7766 			case 1: /* D6 vline */
7767 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7768 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7769 
7770 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7771 				DRM_DEBUG_VBLANK("IH: D6 vline\n");
7772 
7773 				break;
7774 			default:
7775 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7776 				break;
7777 			}
7778 			break;
7779 		case 8: /* D1 page flip */
7780 		case 10: /* D2 page flip */
7781 		case 12: /* D3 page flip */
7782 		case 14: /* D4 page flip */
7783 		case 16: /* D5 page flip */
7784 		case 18: /* D6 page flip */
7785 			DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7786 			if (radeon_use_pflipirq > 0)
7787 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7788 			break;
7789 		case 42: /* HPD hotplug */
7790 			switch (src_data) {
7791 			case 0:
7792 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7793 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7794 
7795 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7796 				queue_hotplug = true;
7797 				DRM_DEBUG("IH: HPD1\n");
7798 
7799 				break;
7800 			case 1:
7801 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7802 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7803 
7804 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7805 				queue_hotplug = true;
7806 				DRM_DEBUG("IH: HPD2\n");
7807 
7808 				break;
7809 			case 2:
7810 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7811 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7812 
7813 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7814 				queue_hotplug = true;
7815 				DRM_DEBUG("IH: HPD3\n");
7816 
7817 				break;
7818 			case 3:
7819 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7820 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821 
7822 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7823 				queue_hotplug = true;
7824 				DRM_DEBUG("IH: HPD4\n");
7825 
7826 				break;
7827 			case 4:
7828 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7829 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830 
7831 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7832 				queue_hotplug = true;
7833 				DRM_DEBUG("IH: HPD5\n");
7834 
7835 				break;
7836 			case 5:
7837 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7838 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839 
7840 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7841 				queue_hotplug = true;
7842 				DRM_DEBUG("IH: HPD6\n");
7843 
7844 				break;
7845 			case 6:
7846 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7847 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848 
7849 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7850 				queue_dp = true;
7851 				DRM_DEBUG("IH: HPD_RX 1\n");
7852 
7853 				break;
7854 			case 7:
7855 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7856 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857 
7858 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7859 				queue_dp = true;
7860 				DRM_DEBUG("IH: HPD_RX 2\n");
7861 
7862 				break;
7863 			case 8:
7864 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7865 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866 
7867 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7868 				queue_dp = true;
7869 				DRM_DEBUG("IH: HPD_RX 3\n");
7870 
7871 				break;
7872 			case 9:
7873 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7874 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875 
7876 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7877 				queue_dp = true;
7878 				DRM_DEBUG("IH: HPD_RX 4\n");
7879 
7880 				break;
7881 			case 10:
7882 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7883 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7884 
7885 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7886 				queue_dp = true;
7887 				DRM_DEBUG("IH: HPD_RX 5\n");
7888 
7889 				break;
7890 			case 11:
7891 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7892 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7893 
7894 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7895 				queue_dp = true;
7896 				DRM_DEBUG("IH: HPD_RX 6\n");
7897 
7898 				break;
7899 			default:
7900 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7901 				break;
7902 			}
7903 			break;
7904 		case 96:
7905 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7906 			WREG32(SRBM_INT_ACK, 0x1);
7907 			break;
7908 		case 124: /* UVD */
7909 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7910 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7911 			break;
7912 		case 146:
7913 		case 147:
7914 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7915 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7916 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7917 			/* reset addr and status */
7918 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7919 			if (addr == 0x0 && status == 0x0)
7920 				break;
7921 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7922 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7923 				addr);
7924 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7925 				status);
7926 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7927 			break;
7928 		case 167: /* VCE */
7929 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7930 			switch (src_data) {
7931 			case 0:
7932 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7933 				break;
7934 			case 1:
7935 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7936 				break;
7937 			default:
7938 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7939 				break;
7940 			}
7941 			break;
7942 		case 176: /* GFX RB CP_INT */
7943 		case 177: /* GFX IB CP_INT */
7944 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7945 			break;
7946 		case 181: /* CP EOP event */
7947 			DRM_DEBUG("IH: CP EOP\n");
7948 			/* XXX check the bitfield order! */
7949 			me_id = (ring_id & 0x60) >> 5;
7950 			pipe_id = (ring_id & 0x18) >> 3;
7951 			queue_id = (ring_id & 0x7) >> 0;
7952 			switch (me_id) {
7953 			case 0:
7954 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7955 				break;
7956 			case 1:
7957 			case 2:
7958 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7959 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7960 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7961 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7962 				break;
7963 			}
7964 			break;
7965 		case 184: /* CP Privileged reg access */
7966 			DRM_ERROR("Illegal register access in command stream\n");
7967 			/* XXX check the bitfield order! */
7968 			me_id = (ring_id & 0x60) >> 5;
7969 			pipe_id = (ring_id & 0x18) >> 3;
7970 			queue_id = (ring_id & 0x7) >> 0;
7971 			switch (me_id) {
7972 			case 0:
7973 				/* This results in a full GPU reset, but all we need to do is soft
7974 				 * reset the CP for gfx
7975 				 */
7976 				queue_reset = true;
7977 				break;
7978 			case 1:
7979 				/* XXX compute */
7980 				queue_reset = true;
7981 				break;
7982 			case 2:
7983 				/* XXX compute */
7984 				queue_reset = true;
7985 				break;
7986 			}
7987 			break;
7988 		case 185: /* CP Privileged inst */
7989 			DRM_ERROR("Illegal instruction in command stream\n");
7990 			/* XXX check the bitfield order! */
7991 			me_id = (ring_id & 0x60) >> 5;
7992 			pipe_id = (ring_id & 0x18) >> 3;
7993 			queue_id = (ring_id & 0x7) >> 0;
7994 			switch (me_id) {
7995 			case 0:
7996 				/* This results in a full GPU reset, but all we need to do is soft
7997 				 * reset the CP for gfx
7998 				 */
7999 				queue_reset = true;
8000 				break;
8001 			case 1:
8002 				/* XXX compute */
8003 				queue_reset = true;
8004 				break;
8005 			case 2:
8006 				/* XXX compute */
8007 				queue_reset = true;
8008 				break;
8009 			}
8010 			break;
8011 		case 224: /* SDMA trap event */
8012 			/* XXX check the bitfield order! */
8013 			me_id = (ring_id & 0x3) >> 0;
8014 			queue_id = (ring_id & 0xc) >> 2;
8015 			DRM_DEBUG("IH: SDMA trap\n");
8016 			switch (me_id) {
8017 			case 0:
8018 				switch (queue_id) {
8019 				case 0:
8020 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8021 					break;
8022 				case 1:
8023 					/* XXX compute */
8024 					break;
8025 				case 2:
8026 					/* XXX compute */
8027 					break;
8028 				}
8029 				break;
8030 			case 1:
8031 				switch (queue_id) {
8032 				case 0:
8033 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8034 					break;
8035 				case 1:
8036 					/* XXX compute */
8037 					break;
8038 				case 2:
8039 					/* XXX compute */
8040 					break;
8041 				}
8042 				break;
8043 			}
8044 			break;
8045 		case 230: /* thermal low to high */
8046 			DRM_DEBUG("IH: thermal low to high\n");
8047 			rdev->pm.dpm.thermal.high_to_low = false;
8048 			queue_thermal = true;
8049 			break;
8050 		case 231: /* thermal high to low */
8051 			DRM_DEBUG("IH: thermal high to low\n");
8052 			rdev->pm.dpm.thermal.high_to_low = true;
8053 			queue_thermal = true;
8054 			break;
8055 		case 233: /* GUI IDLE */
8056 			DRM_DEBUG("IH: GUI idle\n");
8057 			break;
8058 		case 241: /* SDMA Privileged inst */
8059 		case 247: /* SDMA Privileged inst */
8060 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8061 			/* XXX check the bitfield order! */
8062 			me_id = (ring_id & 0x3) >> 0;
8063 			queue_id = (ring_id & 0xc) >> 2;
8064 			switch (me_id) {
8065 			case 0:
8066 				switch (queue_id) {
8067 				case 0:
8068 					queue_reset = true;
8069 					break;
8070 				case 1:
8071 					/* XXX compute */
8072 					queue_reset = true;
8073 					break;
8074 				case 2:
8075 					/* XXX compute */
8076 					queue_reset = true;
8077 					break;
8078 				}
8079 				break;
8080 			case 1:
8081 				switch (queue_id) {
8082 				case 0:
8083 					queue_reset = true;
8084 					break;
8085 				case 1:
8086 					/* XXX compute */
8087 					queue_reset = true;
8088 					break;
8089 				case 2:
8090 					/* XXX compute */
8091 					queue_reset = true;
8092 					break;
8093 				}
8094 				break;
8095 			}
8096 			break;
8097 		default:
8098 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8099 			break;
8100 		}
8101 
8102 		/* wptr/rptr are in bytes! */
8103 		rptr += 16;
8104 		rptr &= rdev->ih.ptr_mask;
8105 		WREG32(IH_RB_RPTR, rptr);
8106 	}
8107 	if (queue_dp)
8108 		schedule_work(&rdev->dp_work);
8109 	if (queue_hotplug)
8110 		schedule_delayed_work(&rdev->hotplug_work, 0);
8111 	if (queue_reset) {
8112 		rdev->needs_reset = true;
8113 		wake_up_all(&rdev->fence_queue);
8114 	}
8115 	if (queue_thermal)
8116 		schedule_work(&rdev->pm.dpm.thermal.work);
8117 	rdev->ih.rptr = rptr;
8118 	atomic_set(&rdev->ih.lock, 0);
8119 
8120 	/* make sure wptr hasn't changed while processing */
8121 	wptr = cik_get_ih_wptr(rdev);
8122 	if (wptr != rptr)
8123 		goto restart_ih;
8124 
8125 	return IRQ_HANDLED;
8126 }
8127 
8128 /*
8129  * startup/shutdown callbacks
8130  */
8131 static void cik_uvd_init(struct radeon_device *rdev)
8132 {
8133 	int r;
8134 
8135 	if (!rdev->has_uvd)
8136 		return;
8137 
8138 	r = radeon_uvd_init(rdev);
8139 	if (r) {
8140 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8141 		/*
8142 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8143 		 * to early fails cik_uvd_start() and thus nothing happens
8144 		 * there. So it is pointless to try to go through that code
8145 		 * hence why we disable uvd here.
8146 		 */
8147 		rdev->has_uvd = 0;
8148 		return;
8149 	}
8150 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8151 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8152 }
8153 
8154 static void cik_uvd_start(struct radeon_device *rdev)
8155 {
8156 	int r;
8157 
8158 	if (!rdev->has_uvd)
8159 		return;
8160 
8161 	r = radeon_uvd_resume(rdev);
8162 	if (r) {
8163 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8164 		goto error;
8165 	}
8166 	r = uvd_v4_2_resume(rdev);
8167 	if (r) {
8168 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8169 		goto error;
8170 	}
8171 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8172 	if (r) {
8173 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8174 		goto error;
8175 	}
8176 	return;
8177 
8178 error:
8179 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8180 }
8181 
8182 static void cik_uvd_resume(struct radeon_device *rdev)
8183 {
8184 	struct radeon_ring *ring;
8185 	int r;
8186 
8187 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8188 		return;
8189 
8190 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8191 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8192 	if (r) {
8193 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8194 		return;
8195 	}
8196 	r = uvd_v1_0_init(rdev);
8197 	if (r) {
8198 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8199 		return;
8200 	}
8201 }
8202 
8203 static void cik_vce_init(struct radeon_device *rdev)
8204 {
8205 	int r;
8206 
8207 	if (!rdev->has_vce)
8208 		return;
8209 
8210 	r = radeon_vce_init(rdev);
8211 	if (r) {
8212 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8213 		/*
8214 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8215 		 * to early fails cik_vce_start() and thus nothing happens
8216 		 * there. So it is pointless to try to go through that code
8217 		 * hence why we disable vce here.
8218 		 */
8219 		rdev->has_vce = 0;
8220 		return;
8221 	}
8222 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8223 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8224 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8225 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8226 }
8227 
8228 static void cik_vce_start(struct radeon_device *rdev)
8229 {
8230 	int r;
8231 
8232 	if (!rdev->has_vce)
8233 		return;
8234 
8235 	r = radeon_vce_resume(rdev);
8236 	if (r) {
8237 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8238 		goto error;
8239 	}
8240 	r = vce_v2_0_resume(rdev);
8241 	if (r) {
8242 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8243 		goto error;
8244 	}
8245 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8246 	if (r) {
8247 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8248 		goto error;
8249 	}
8250 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8251 	if (r) {
8252 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8253 		goto error;
8254 	}
8255 	return;
8256 
8257 error:
8258 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8259 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8260 }
8261 
8262 static void cik_vce_resume(struct radeon_device *rdev)
8263 {
8264 	struct radeon_ring *ring;
8265 	int r;
8266 
8267 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8268 		return;
8269 
8270 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8271 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8272 	if (r) {
8273 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8274 		return;
8275 	}
8276 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8277 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8278 	if (r) {
8279 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8280 		return;
8281 	}
8282 	r = vce_v1_0_init(rdev);
8283 	if (r) {
8284 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8285 		return;
8286 	}
8287 }
8288 
8289 /**
8290  * cik_startup - program the asic to a functional state
8291  *
8292  * @rdev: radeon_device pointer
8293  *
8294  * Programs the asic to a functional state (CIK).
8295  * Called by cik_init() and cik_resume().
8296  * Returns 0 for success, error for failure.
8297  */
8298 static int cik_startup(struct radeon_device *rdev)
8299 {
8300 	struct radeon_ring *ring;
8301 	u32 nop;
8302 	int r;
8303 
8304 	/* enable pcie gen2/3 link */
8305 	cik_pcie_gen3_enable(rdev);
8306 	/* enable aspm */
8307 	cik_program_aspm(rdev);
8308 
8309 	/* scratch needs to be initialized before MC */
8310 	r = r600_vram_scratch_init(rdev);
8311 	if (r)
8312 		return r;
8313 
8314 	cik_mc_program(rdev);
8315 
8316 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8317 		r = ci_mc_load_microcode(rdev);
8318 		if (r) {
8319 			DRM_ERROR("Failed to load MC firmware!\n");
8320 			return r;
8321 		}
8322 	}
8323 
8324 	r = cik_pcie_gart_enable(rdev);
8325 	if (r)
8326 		return r;
8327 	cik_gpu_init(rdev);
8328 
8329 	/* allocate rlc buffers */
8330 	if (rdev->flags & RADEON_IS_IGP) {
8331 		if (rdev->family == CHIP_KAVERI) {
8332 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8333 			rdev->rlc.reg_list_size =
8334 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8335 		} else {
8336 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8337 			rdev->rlc.reg_list_size =
8338 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8339 		}
8340 	}
8341 	rdev->rlc.cs_data = ci_cs_data;
8342 	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8343 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8344 	r = sumo_rlc_init(rdev);
8345 	if (r) {
8346 		DRM_ERROR("Failed to init rlc BOs!\n");
8347 		return r;
8348 	}
8349 
8350 	/* allocate wb buffer */
8351 	r = radeon_wb_init(rdev);
8352 	if (r)
8353 		return r;
8354 
8355 	/* allocate mec buffers */
8356 	r = cik_mec_init(rdev);
8357 	if (r) {
8358 		DRM_ERROR("Failed to init MEC BOs!\n");
8359 		return r;
8360 	}
8361 
8362 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8363 	if (r) {
8364 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8365 		return r;
8366 	}
8367 
8368 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8369 	if (r) {
8370 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8371 		return r;
8372 	}
8373 
8374 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8375 	if (r) {
8376 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8377 		return r;
8378 	}
8379 
8380 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8381 	if (r) {
8382 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8383 		return r;
8384 	}
8385 
8386 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8387 	if (r) {
8388 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8389 		return r;
8390 	}
8391 
8392 	cik_uvd_start(rdev);
8393 	cik_vce_start(rdev);
8394 
8395 	/* Enable IRQ */
8396 	if (!rdev->irq.installed) {
8397 		r = radeon_irq_kms_init(rdev);
8398 		if (r)
8399 			return r;
8400 	}
8401 
8402 	r = cik_irq_init(rdev);
8403 	if (r) {
8404 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8405 		radeon_irq_kms_fini(rdev);
8406 		return r;
8407 	}
8408 	cik_irq_set(rdev);
8409 
8410 	if (rdev->family == CHIP_HAWAII) {
8411 		if (rdev->new_fw)
8412 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8413 		else
8414 			nop = RADEON_CP_PACKET2;
8415 	} else {
8416 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8417 	}
8418 
8419 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8420 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8421 			     nop);
8422 	if (r)
8423 		return r;
8424 
8425 	/* set up the compute queues */
8426 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8427 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8428 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8429 			     nop);
8430 	if (r)
8431 		return r;
8432 	ring->me = 1; /* first MEC */
8433 	ring->pipe = 0; /* first pipe */
8434 	ring->queue = 0; /* first queue */
8435 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8436 
8437 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8438 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8439 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8440 			     nop);
8441 	if (r)
8442 		return r;
8443 	/* dGPU only have 1 MEC */
8444 	ring->me = 1; /* first MEC */
8445 	ring->pipe = 0; /* first pipe */
8446 	ring->queue = 1; /* second queue */
8447 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8448 
8449 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8450 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8451 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8452 	if (r)
8453 		return r;
8454 
8455 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8456 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8457 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8458 	if (r)
8459 		return r;
8460 
8461 	r = cik_cp_resume(rdev);
8462 	if (r)
8463 		return r;
8464 
8465 	r = cik_sdma_resume(rdev);
8466 	if (r)
8467 		return r;
8468 
8469 	cik_uvd_resume(rdev);
8470 	cik_vce_resume(rdev);
8471 
8472 	r = radeon_ib_pool_init(rdev);
8473 	if (r) {
8474 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8475 		return r;
8476 	}
8477 
8478 	r = radeon_vm_manager_init(rdev);
8479 	if (r) {
8480 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8481 		return r;
8482 	}
8483 
8484 	r = radeon_audio_init(rdev);
8485 	if (r)
8486 		return r;
8487 
8488 	r = radeon_kfd_resume(rdev);
8489 	if (r)
8490 		return r;
8491 
8492 	return 0;
8493 }
8494 
8495 /**
8496  * cik_resume - resume the asic to a functional state
8497  *
8498  * @rdev: radeon_device pointer
8499  *
8500  * Programs the asic to a functional state (CIK).
8501  * Called at resume.
8502  * Returns 0 for success, error for failure.
8503  */
8504 int cik_resume(struct radeon_device *rdev)
8505 {
8506 	int r;
8507 
8508 	/* post card */
8509 	atom_asic_init(rdev->mode_info.atom_context);
8510 
8511 	/* init golden registers */
8512 	cik_init_golden_registers(rdev);
8513 
8514 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8515 		radeon_pm_resume(rdev);
8516 
8517 	rdev->accel_working = true;
8518 	r = cik_startup(rdev);
8519 	if (r) {
8520 		DRM_ERROR("cik startup failed on resume\n");
8521 		rdev->accel_working = false;
8522 		return r;
8523 	}
8524 
8525 	return r;
8526 
8527 }
8528 
8529 /**
8530  * cik_suspend - suspend the asic
8531  *
8532  * @rdev: radeon_device pointer
8533  *
8534  * Bring the chip into a state suitable for suspend (CIK).
8535  * Called at suspend.
8536  * Returns 0 for success.
8537  */
8538 int cik_suspend(struct radeon_device *rdev)
8539 {
8540 	radeon_kfd_suspend(rdev);
8541 	radeon_pm_suspend(rdev);
8542 	radeon_audio_fini(rdev);
8543 	radeon_vm_manager_fini(rdev);
8544 	cik_cp_enable(rdev, false);
8545 	cik_sdma_enable(rdev, false);
8546 	if (rdev->has_uvd) {
8547 		uvd_v1_0_fini(rdev);
8548 		radeon_uvd_suspend(rdev);
8549 	}
8550 	if (rdev->has_vce)
8551 		radeon_vce_suspend(rdev);
8552 	cik_fini_pg(rdev);
8553 	cik_fini_cg(rdev);
8554 	cik_irq_suspend(rdev);
8555 	radeon_wb_disable(rdev);
8556 	cik_pcie_gart_disable(rdev);
8557 	return 0;
8558 }
8559 
8560 /* Plan is to move initialization in that function and use
8561  * helper function so that radeon_device_init pretty much
8562  * do nothing more than calling asic specific function. This
8563  * should also allow to remove a bunch of callback function
8564  * like vram_info.
8565  */
8566 /**
8567  * cik_init - asic specific driver and hw init
8568  *
8569  * @rdev: radeon_device pointer
8570  *
8571  * Setup asic specific driver variables and program the hw
8572  * to a functional state (CIK).
8573  * Called at driver startup.
8574  * Returns 0 for success, errors for failure.
8575  */
8576 int cik_init(struct radeon_device *rdev)
8577 {
8578 	struct radeon_ring *ring;
8579 	int r;
8580 
8581 	/* Read BIOS */
8582 	if (!radeon_get_bios(rdev)) {
8583 		if (ASIC_IS_AVIVO(rdev))
8584 			return -EINVAL;
8585 	}
8586 	/* Must be an ATOMBIOS */
8587 	if (!rdev->is_atom_bios) {
8588 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8589 		return -EINVAL;
8590 	}
8591 	r = radeon_atombios_init(rdev);
8592 	if (r)
8593 		return r;
8594 
8595 	/* Post card if necessary */
8596 	if (!radeon_card_posted(rdev)) {
8597 		if (!rdev->bios) {
8598 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8599 			return -EINVAL;
8600 		}
8601 		DRM_INFO("GPU not posted. posting now...\n");
8602 		atom_asic_init(rdev->mode_info.atom_context);
8603 	}
8604 	/* init golden registers */
8605 	cik_init_golden_registers(rdev);
8606 	/* Initialize scratch registers */
8607 	cik_scratch_init(rdev);
8608 	/* Initialize surface registers */
8609 	radeon_surface_init(rdev);
8610 	/* Initialize clocks */
8611 	radeon_get_clock_info(rdev->ddev);
8612 
8613 	/* Fence driver */
8614 	r = radeon_fence_driver_init(rdev);
8615 	if (r)
8616 		return r;
8617 
8618 	/* initialize memory controller */
8619 	r = cik_mc_init(rdev);
8620 	if (r)
8621 		return r;
8622 	/* Memory manager */
8623 	r = radeon_bo_init(rdev);
8624 	if (r)
8625 		return r;
8626 
8627 	if (rdev->flags & RADEON_IS_IGP) {
8628 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8629 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8630 			r = cik_init_microcode(rdev);
8631 			if (r) {
8632 				DRM_ERROR("Failed to load firmware!\n");
8633 				return r;
8634 			}
8635 		}
8636 	} else {
8637 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8638 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8639 		    !rdev->mc_fw) {
8640 			r = cik_init_microcode(rdev);
8641 			if (r) {
8642 				DRM_ERROR("Failed to load firmware!\n");
8643 				return r;
8644 			}
8645 		}
8646 	}
8647 
8648 	/* Initialize power management */
8649 	radeon_pm_init(rdev);
8650 
8651 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8652 	ring->ring_obj = NULL;
8653 	r600_ring_init(rdev, ring, 1024 * 1024);
8654 
8655 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8656 	ring->ring_obj = NULL;
8657 	r600_ring_init(rdev, ring, 1024 * 1024);
8658 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8659 	if (r)
8660 		return r;
8661 
8662 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8663 	ring->ring_obj = NULL;
8664 	r600_ring_init(rdev, ring, 1024 * 1024);
8665 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8666 	if (r)
8667 		return r;
8668 
8669 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8670 	ring->ring_obj = NULL;
8671 	r600_ring_init(rdev, ring, 256 * 1024);
8672 
8673 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8674 	ring->ring_obj = NULL;
8675 	r600_ring_init(rdev, ring, 256 * 1024);
8676 
8677 	cik_uvd_init(rdev);
8678 	cik_vce_init(rdev);
8679 
8680 	rdev->ih.ring_obj = NULL;
8681 	r600_ih_ring_init(rdev, 64 * 1024);
8682 
8683 	r = r600_pcie_gart_init(rdev);
8684 	if (r)
8685 		return r;
8686 
8687 	rdev->accel_working = true;
8688 	r = cik_startup(rdev);
8689 	if (r) {
8690 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8691 		cik_cp_fini(rdev);
8692 		cik_sdma_fini(rdev);
8693 		cik_irq_fini(rdev);
8694 		sumo_rlc_fini(rdev);
8695 		cik_mec_fini(rdev);
8696 		radeon_wb_fini(rdev);
8697 		radeon_ib_pool_fini(rdev);
8698 		radeon_vm_manager_fini(rdev);
8699 		radeon_irq_kms_fini(rdev);
8700 		cik_pcie_gart_fini(rdev);
8701 		rdev->accel_working = false;
8702 	}
8703 
8704 	/* Don't start up if the MC ucode is missing.
8705 	 * The default clocks and voltages before the MC ucode
8706 	 * is loaded are not suffient for advanced operations.
8707 	 */
8708 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8709 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8710 		return -EINVAL;
8711 	}
8712 
8713 	return 0;
8714 }
8715 
8716 /**
8717  * cik_fini - asic specific driver and hw fini
8718  *
8719  * @rdev: radeon_device pointer
8720  *
8721  * Tear down the asic specific driver variables and program the hw
8722  * to an idle state (CIK).
8723  * Called at driver unload.
8724  */
8725 void cik_fini(struct radeon_device *rdev)
8726 {
8727 	radeon_pm_fini(rdev);
8728 	cik_cp_fini(rdev);
8729 	cik_sdma_fini(rdev);
8730 	cik_fini_pg(rdev);
8731 	cik_fini_cg(rdev);
8732 	cik_irq_fini(rdev);
8733 	sumo_rlc_fini(rdev);
8734 	cik_mec_fini(rdev);
8735 	radeon_wb_fini(rdev);
8736 	radeon_vm_manager_fini(rdev);
8737 	radeon_ib_pool_fini(rdev);
8738 	radeon_irq_kms_fini(rdev);
8739 	uvd_v1_0_fini(rdev);
8740 	radeon_uvd_fini(rdev);
8741 	radeon_vce_fini(rdev);
8742 	cik_pcie_gart_fini(rdev);
8743 	r600_vram_scratch_fini(rdev);
8744 	radeon_gem_fini(rdev);
8745 	radeon_fence_driver_fini(rdev);
8746 	radeon_bo_fini(rdev);
8747 	radeon_atombios_fini(rdev);
8748 	cik_fini_microcode(rdev);
8749 	kfree(rdev->bios);
8750 	rdev->bios = NULL;
8751 }
8752 
8753 void dce8_program_fmt(struct drm_encoder *encoder)
8754 {
8755 	struct drm_device *dev = encoder->dev;
8756 	struct radeon_device *rdev = dev->dev_private;
8757 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8758 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8759 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8760 	int bpc = 0;
8761 	u32 tmp = 0;
8762 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8763 
8764 	if (connector) {
8765 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8766 		bpc = radeon_get_monitor_bpc(connector);
8767 		dither = radeon_connector->dither;
8768 	}
8769 
8770 	/* LVDS/eDP FMT is set up by atom */
8771 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8772 		return;
8773 
8774 	/* not needed for analog */
8775 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8776 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8777 		return;
8778 
8779 	if (bpc == 0)
8780 		return;
8781 
8782 	switch (bpc) {
8783 	case 6:
8784 		if (dither == RADEON_FMT_DITHER_ENABLE)
8785 			/* XXX sort out optimal dither settings */
8786 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8787 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8788 		else
8789 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8790 		break;
8791 	case 8:
8792 		if (dither == RADEON_FMT_DITHER_ENABLE)
8793 			/* XXX sort out optimal dither settings */
8794 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8795 				FMT_RGB_RANDOM_ENABLE |
8796 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8797 		else
8798 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8799 		break;
8800 	case 10:
8801 		if (dither == RADEON_FMT_DITHER_ENABLE)
8802 			/* XXX sort out optimal dither settings */
8803 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8804 				FMT_RGB_RANDOM_ENABLE |
8805 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8806 		else
8807 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8808 		break;
8809 	default:
8810 		/* not needed */
8811 		break;
8812 	}
8813 
8814 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8815 }
8816 
8817 /* display watermark setup */
8818 /**
8819  * dce8_line_buffer_adjust - Set up the line buffer
8820  *
8821  * @rdev: radeon_device pointer
8822  * @radeon_crtc: the selected display controller
8823  * @mode: the current display mode on the selected display
8824  * controller
8825  *
8826  * Setup up the line buffer allocation for
8827  * the selected display controller (CIK).
8828  * Returns the line buffer size in pixels.
8829  */
8830 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8831 				   struct radeon_crtc *radeon_crtc,
8832 				   struct drm_display_mode *mode)
8833 {
8834 	u32 tmp, buffer_alloc, i;
8835 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8836 	/*
8837 	 * Line Buffer Setup
8838 	 * There are 6 line buffers, one for each display controllers.
8839 	 * There are 3 partitions per LB. Select the number of partitions
8840 	 * to enable based on the display width.  For display widths larger
8841 	 * than 4096, you need use to use 2 display controllers and combine
8842 	 * them using the stereo blender.
8843 	 */
8844 	if (radeon_crtc->base.enabled && mode) {
8845 		if (mode->crtc_hdisplay < 1920) {
8846 			tmp = 1;
8847 			buffer_alloc = 2;
8848 		} else if (mode->crtc_hdisplay < 2560) {
8849 			tmp = 2;
8850 			buffer_alloc = 2;
8851 		} else if (mode->crtc_hdisplay < 4096) {
8852 			tmp = 0;
8853 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8854 		} else {
8855 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8856 			tmp = 0;
8857 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8858 		}
8859 	} else {
8860 		tmp = 1;
8861 		buffer_alloc = 0;
8862 	}
8863 
8864 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8865 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8866 
8867 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8868 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8869 	for (i = 0; i < rdev->usec_timeout; i++) {
8870 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8871 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8872 			break;
8873 		udelay(1);
8874 	}
8875 
8876 	if (radeon_crtc->base.enabled && mode) {
8877 		switch (tmp) {
8878 		case 0:
8879 		default:
8880 			return 4096 * 2;
8881 		case 1:
8882 			return 1920 * 2;
8883 		case 2:
8884 			return 2560 * 2;
8885 		}
8886 	}
8887 
8888 	/* controller not enabled, so no lb used */
8889 	return 0;
8890 }
8891 
8892 /**
8893  * cik_get_number_of_dram_channels - get the number of dram channels
8894  *
8895  * @rdev: radeon_device pointer
8896  *
8897  * Look up the number of video ram channels (CIK).
8898  * Used for display watermark bandwidth calculations
8899  * Returns the number of dram channels
8900  */
8901 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8902 {
8903 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8904 
8905 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8906 	case 0:
8907 	default:
8908 		return 1;
8909 	case 1:
8910 		return 2;
8911 	case 2:
8912 		return 4;
8913 	case 3:
8914 		return 8;
8915 	case 4:
8916 		return 3;
8917 	case 5:
8918 		return 6;
8919 	case 6:
8920 		return 10;
8921 	case 7:
8922 		return 12;
8923 	case 8:
8924 		return 16;
8925 	}
8926 }
8927 
8928 struct dce8_wm_params {
8929 	u32 dram_channels; /* number of dram channels */
8930 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8931 	u32 sclk;          /* engine clock in kHz */
8932 	u32 disp_clk;      /* display clock in kHz */
8933 	u32 src_width;     /* viewport width */
8934 	u32 active_time;   /* active display time in ns */
8935 	u32 blank_time;    /* blank time in ns */
8936 	bool interlaced;    /* mode is interlaced */
8937 	fixed20_12 vsc;    /* vertical scale ratio */
8938 	u32 num_heads;     /* number of active crtcs */
8939 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8940 	u32 lb_size;       /* line buffer allocated to pipe */
8941 	u32 vtaps;         /* vertical scaler taps */
8942 };
8943 
8944 /**
8945  * dce8_dram_bandwidth - get the dram bandwidth
8946  *
8947  * @wm: watermark calculation data
8948  *
8949  * Calculate the raw dram bandwidth (CIK).
8950  * Used for display watermark bandwidth calculations
8951  * Returns the dram bandwidth in MBytes/s
8952  */
8953 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8954 {
8955 	/* Calculate raw DRAM Bandwidth */
8956 	fixed20_12 dram_efficiency; /* 0.7 */
8957 	fixed20_12 yclk, dram_channels, bandwidth;
8958 	fixed20_12 a;
8959 
8960 	a.full = dfixed_const(1000);
8961 	yclk.full = dfixed_const(wm->yclk);
8962 	yclk.full = dfixed_div(yclk, a);
8963 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8964 	a.full = dfixed_const(10);
8965 	dram_efficiency.full = dfixed_const(7);
8966 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8967 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8968 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8969 
8970 	return dfixed_trunc(bandwidth);
8971 }
8972 
8973 /**
8974  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8975  *
8976  * @wm: watermark calculation data
8977  *
8978  * Calculate the dram bandwidth used for display (CIK).
8979  * Used for display watermark bandwidth calculations
8980  * Returns the dram bandwidth for display in MBytes/s
8981  */
8982 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8983 {
8984 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8985 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8986 	fixed20_12 yclk, dram_channels, bandwidth;
8987 	fixed20_12 a;
8988 
8989 	a.full = dfixed_const(1000);
8990 	yclk.full = dfixed_const(wm->yclk);
8991 	yclk.full = dfixed_div(yclk, a);
8992 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8993 	a.full = dfixed_const(10);
8994 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8995 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8996 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8997 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8998 
8999 	return dfixed_trunc(bandwidth);
9000 }
9001 
9002 /**
9003  * dce8_data_return_bandwidth - get the data return bandwidth
9004  *
9005  * @wm: watermark calculation data
9006  *
9007  * Calculate the data return bandwidth used for display (CIK).
9008  * Used for display watermark bandwidth calculations
9009  * Returns the data return bandwidth in MBytes/s
9010  */
9011 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9012 {
9013 	/* Calculate the display Data return Bandwidth */
9014 	fixed20_12 return_efficiency; /* 0.8 */
9015 	fixed20_12 sclk, bandwidth;
9016 	fixed20_12 a;
9017 
9018 	a.full = dfixed_const(1000);
9019 	sclk.full = dfixed_const(wm->sclk);
9020 	sclk.full = dfixed_div(sclk, a);
9021 	a.full = dfixed_const(10);
9022 	return_efficiency.full = dfixed_const(8);
9023 	return_efficiency.full = dfixed_div(return_efficiency, a);
9024 	a.full = dfixed_const(32);
9025 	bandwidth.full = dfixed_mul(a, sclk);
9026 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9027 
9028 	return dfixed_trunc(bandwidth);
9029 }
9030 
9031 /**
9032  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9033  *
9034  * @wm: watermark calculation data
9035  *
9036  * Calculate the dmif bandwidth used for display (CIK).
9037  * Used for display watermark bandwidth calculations
9038  * Returns the dmif bandwidth in MBytes/s
9039  */
9040 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9041 {
9042 	/* Calculate the DMIF Request Bandwidth */
9043 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9044 	fixed20_12 disp_clk, bandwidth;
9045 	fixed20_12 a, b;
9046 
9047 	a.full = dfixed_const(1000);
9048 	disp_clk.full = dfixed_const(wm->disp_clk);
9049 	disp_clk.full = dfixed_div(disp_clk, a);
9050 	a.full = dfixed_const(32);
9051 	b.full = dfixed_mul(a, disp_clk);
9052 
9053 	a.full = dfixed_const(10);
9054 	disp_clk_request_efficiency.full = dfixed_const(8);
9055 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9056 
9057 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9058 
9059 	return dfixed_trunc(bandwidth);
9060 }
9061 
9062 /**
9063  * dce8_available_bandwidth - get the min available bandwidth
9064  *
9065  * @wm: watermark calculation data
9066  *
9067  * Calculate the min available bandwidth used for display (CIK).
9068  * Used for display watermark bandwidth calculations
9069  * Returns the min available bandwidth in MBytes/s
9070  */
9071 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9072 {
9073 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9074 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9075 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9076 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9077 
9078 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9079 }
9080 
9081 /**
9082  * dce8_average_bandwidth - get the average available bandwidth
9083  *
9084  * @wm: watermark calculation data
9085  *
9086  * Calculate the average available bandwidth used for display (CIK).
9087  * Used for display watermark bandwidth calculations
9088  * Returns the average available bandwidth in MBytes/s
9089  */
9090 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9091 {
9092 	/* Calculate the display mode Average Bandwidth
9093 	 * DisplayMode should contain the source and destination dimensions,
9094 	 * timing, etc.
9095 	 */
9096 	fixed20_12 bpp;
9097 	fixed20_12 line_time;
9098 	fixed20_12 src_width;
9099 	fixed20_12 bandwidth;
9100 	fixed20_12 a;
9101 
9102 	a.full = dfixed_const(1000);
9103 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9104 	line_time.full = dfixed_div(line_time, a);
9105 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9106 	src_width.full = dfixed_const(wm->src_width);
9107 	bandwidth.full = dfixed_mul(src_width, bpp);
9108 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9109 	bandwidth.full = dfixed_div(bandwidth, line_time);
9110 
9111 	return dfixed_trunc(bandwidth);
9112 }
9113 
9114 /**
9115  * dce8_latency_watermark - get the latency watermark
9116  *
9117  * @wm: watermark calculation data
9118  *
9119  * Calculate the latency watermark (CIK).
9120  * Used for display watermark bandwidth calculations
9121  * Returns the latency watermark in ns
9122  */
9123 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9124 {
9125 	/* First calculate the latency in ns */
9126 	u32 mc_latency = 2000; /* 2000 ns. */
9127 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9128 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9129 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9130 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9131 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9132 		(wm->num_heads * cursor_line_pair_return_time);
9133 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9134 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9135 	u32 tmp, dmif_size = 12288;
9136 	fixed20_12 a, b, c;
9137 
9138 	if (wm->num_heads == 0)
9139 		return 0;
9140 
9141 	a.full = dfixed_const(2);
9142 	b.full = dfixed_const(1);
9143 	if ((wm->vsc.full > a.full) ||
9144 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9145 	    (wm->vtaps >= 5) ||
9146 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9147 		max_src_lines_per_dst_line = 4;
9148 	else
9149 		max_src_lines_per_dst_line = 2;
9150 
9151 	a.full = dfixed_const(available_bandwidth);
9152 	b.full = dfixed_const(wm->num_heads);
9153 	a.full = dfixed_div(a, b);
9154 
9155 	b.full = dfixed_const(mc_latency + 512);
9156 	c.full = dfixed_const(wm->disp_clk);
9157 	b.full = dfixed_div(b, c);
9158 
9159 	c.full = dfixed_const(dmif_size);
9160 	b.full = dfixed_div(c, b);
9161 
9162 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9163 
9164 	b.full = dfixed_const(1000);
9165 	c.full = dfixed_const(wm->disp_clk);
9166 	b.full = dfixed_div(c, b);
9167 	c.full = dfixed_const(wm->bytes_per_pixel);
9168 	b.full = dfixed_mul(b, c);
9169 
9170 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9171 
9172 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9173 	b.full = dfixed_const(1000);
9174 	c.full = dfixed_const(lb_fill_bw);
9175 	b.full = dfixed_div(c, b);
9176 	a.full = dfixed_div(a, b);
9177 	line_fill_time = dfixed_trunc(a);
9178 
9179 	if (line_fill_time < wm->active_time)
9180 		return latency;
9181 	else
9182 		return latency + (line_fill_time - wm->active_time);
9183 
9184 }
9185 
9186 /**
9187  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9188  * average and available dram bandwidth
9189  *
9190  * @wm: watermark calculation data
9191  *
9192  * Check if the display average bandwidth fits in the display
9193  * dram bandwidth (CIK).
9194  * Used for display watermark bandwidth calculations
9195  * Returns true if the display fits, false if not.
9196  */
9197 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9198 {
9199 	if (dce8_average_bandwidth(wm) <=
9200 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9201 		return true;
9202 	else
9203 		return false;
9204 }
9205 
9206 /**
9207  * dce8_average_bandwidth_vs_available_bandwidth - check
9208  * average and available bandwidth
9209  *
9210  * @wm: watermark calculation data
9211  *
9212  * Check if the display average bandwidth fits in the display
9213  * available bandwidth (CIK).
9214  * Used for display watermark bandwidth calculations
9215  * Returns true if the display fits, false if not.
9216  */
9217 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9218 {
9219 	if (dce8_average_bandwidth(wm) <=
9220 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9221 		return true;
9222 	else
9223 		return false;
9224 }
9225 
9226 /**
9227  * dce8_check_latency_hiding - check latency hiding
9228  *
9229  * @wm: watermark calculation data
9230  *
9231  * Check latency hiding (CIK).
9232  * Used for display watermark bandwidth calculations
9233  * Returns true if the display fits, false if not.
9234  */
9235 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9236 {
9237 	u32 lb_partitions = wm->lb_size / wm->src_width;
9238 	u32 line_time = wm->active_time + wm->blank_time;
9239 	u32 latency_tolerant_lines;
9240 	u32 latency_hiding;
9241 	fixed20_12 a;
9242 
9243 	a.full = dfixed_const(1);
9244 	if (wm->vsc.full > a.full)
9245 		latency_tolerant_lines = 1;
9246 	else {
9247 		if (lb_partitions <= (wm->vtaps + 1))
9248 			latency_tolerant_lines = 1;
9249 		else
9250 			latency_tolerant_lines = 2;
9251 	}
9252 
9253 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9254 
9255 	if (dce8_latency_watermark(wm) <= latency_hiding)
9256 		return true;
9257 	else
9258 		return false;
9259 }
9260 
9261 /**
9262  * dce8_program_watermarks - program display watermarks
9263  *
9264  * @rdev: radeon_device pointer
9265  * @radeon_crtc: the selected display controller
9266  * @lb_size: line buffer size
9267  * @num_heads: number of display controllers in use
9268  *
9269  * Calculate and program the display watermarks for the
9270  * selected display controller (CIK).
9271  */
9272 static void dce8_program_watermarks(struct radeon_device *rdev,
9273 				    struct radeon_crtc *radeon_crtc,
9274 				    u32 lb_size, u32 num_heads)
9275 {
9276 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9277 	struct dce8_wm_params wm_low, wm_high;
9278 	u32 pixel_period;
9279 	u32 line_time = 0;
9280 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9281 	u32 tmp, wm_mask;
9282 
9283 	if (radeon_crtc->base.enabled && num_heads && mode) {
9284 		pixel_period = 1000000 / (u32)mode->clock;
9285 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9286 
9287 		/* watermark for high clocks */
9288 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9289 		    rdev->pm.dpm_enabled) {
9290 			wm_high.yclk =
9291 				radeon_dpm_get_mclk(rdev, false) * 10;
9292 			wm_high.sclk =
9293 				radeon_dpm_get_sclk(rdev, false) * 10;
9294 		} else {
9295 			wm_high.yclk = rdev->pm.current_mclk * 10;
9296 			wm_high.sclk = rdev->pm.current_sclk * 10;
9297 		}
9298 
9299 		wm_high.disp_clk = mode->clock;
9300 		wm_high.src_width = mode->crtc_hdisplay;
9301 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9302 		wm_high.blank_time = line_time - wm_high.active_time;
9303 		wm_high.interlaced = false;
9304 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9305 			wm_high.interlaced = true;
9306 		wm_high.vsc = radeon_crtc->vsc;
9307 		wm_high.vtaps = 1;
9308 		if (radeon_crtc->rmx_type != RMX_OFF)
9309 			wm_high.vtaps = 2;
9310 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9311 		wm_high.lb_size = lb_size;
9312 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9313 		wm_high.num_heads = num_heads;
9314 
9315 		/* set for high clocks */
9316 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9317 
9318 		/* possibly force display priority to high */
9319 		/* should really do this at mode validation time... */
9320 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9321 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9322 		    !dce8_check_latency_hiding(&wm_high) ||
9323 		    (rdev->disp_priority == 2)) {
9324 			DRM_DEBUG_KMS("force priority to high\n");
9325 		}
9326 
9327 		/* watermark for low clocks */
9328 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9329 		    rdev->pm.dpm_enabled) {
9330 			wm_low.yclk =
9331 				radeon_dpm_get_mclk(rdev, true) * 10;
9332 			wm_low.sclk =
9333 				radeon_dpm_get_sclk(rdev, true) * 10;
9334 		} else {
9335 			wm_low.yclk = rdev->pm.current_mclk * 10;
9336 			wm_low.sclk = rdev->pm.current_sclk * 10;
9337 		}
9338 
9339 		wm_low.disp_clk = mode->clock;
9340 		wm_low.src_width = mode->crtc_hdisplay;
9341 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9342 		wm_low.blank_time = line_time - wm_low.active_time;
9343 		wm_low.interlaced = false;
9344 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9345 			wm_low.interlaced = true;
9346 		wm_low.vsc = radeon_crtc->vsc;
9347 		wm_low.vtaps = 1;
9348 		if (radeon_crtc->rmx_type != RMX_OFF)
9349 			wm_low.vtaps = 2;
9350 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9351 		wm_low.lb_size = lb_size;
9352 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9353 		wm_low.num_heads = num_heads;
9354 
9355 		/* set for low clocks */
9356 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9357 
9358 		/* possibly force display priority to high */
9359 		/* should really do this at mode validation time... */
9360 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9361 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9362 		    !dce8_check_latency_hiding(&wm_low) ||
9363 		    (rdev->disp_priority == 2)) {
9364 			DRM_DEBUG_KMS("force priority to high\n");
9365 		}
9366 
9367 		/* Save number of lines the linebuffer leads before the scanout */
9368 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9369 	}
9370 
9371 	/* select wm A */
9372 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9373 	tmp = wm_mask;
9374 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9375 	tmp |= LATENCY_WATERMARK_MASK(1);
9376 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9377 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9378 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9379 		LATENCY_HIGH_WATERMARK(line_time)));
9380 	/* select wm B */
9381 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9382 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9383 	tmp |= LATENCY_WATERMARK_MASK(2);
9384 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9385 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9386 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9387 		LATENCY_HIGH_WATERMARK(line_time)));
9388 	/* restore original selection */
9389 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9390 
9391 	/* save values for DPM */
9392 	radeon_crtc->line_time = line_time;
9393 	radeon_crtc->wm_high = latency_watermark_a;
9394 	radeon_crtc->wm_low = latency_watermark_b;
9395 }
9396 
9397 /**
9398  * dce8_bandwidth_update - program display watermarks
9399  *
9400  * @rdev: radeon_device pointer
9401  *
9402  * Calculate and program the display watermarks and line
9403  * buffer allocation (CIK).
9404  */
9405 void dce8_bandwidth_update(struct radeon_device *rdev)
9406 {
9407 	struct drm_display_mode *mode = NULL;
9408 	u32 num_heads = 0, lb_size;
9409 	int i;
9410 
9411 	if (!rdev->mode_info.mode_config_initialized)
9412 		return;
9413 
9414 	radeon_update_display_priority(rdev);
9415 
9416 	for (i = 0; i < rdev->num_crtc; i++) {
9417 		if (rdev->mode_info.crtcs[i]->base.enabled)
9418 			num_heads++;
9419 	}
9420 	for (i = 0; i < rdev->num_crtc; i++) {
9421 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9422 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9423 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9424 	}
9425 }
9426 
9427 /**
9428  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9429  *
9430  * @rdev: radeon_device pointer
9431  *
9432  * Fetches a GPU clock counter snapshot (SI).
9433  * Returns the 64 bit clock counter snapshot.
9434  */
9435 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9436 {
9437 	uint64_t clock;
9438 
9439 	mutex_lock(&rdev->gpu_clock_mutex);
9440 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9441 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9442 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9443 	mutex_unlock(&rdev->gpu_clock_mutex);
9444 	return clock;
9445 }
9446 
9447 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9448 			     u32 cntl_reg, u32 status_reg)
9449 {
9450 	int r, i;
9451 	struct atom_clock_dividers dividers;
9452 	uint32_t tmp;
9453 
9454 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9455 					   clock, false, &dividers);
9456 	if (r)
9457 		return r;
9458 
9459 	tmp = RREG32_SMC(cntl_reg);
9460 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9461 	tmp |= dividers.post_divider;
9462 	WREG32_SMC(cntl_reg, tmp);
9463 
9464 	for (i = 0; i < 100; i++) {
9465 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9466 			break;
9467 		mdelay(10);
9468 	}
9469 	if (i == 100)
9470 		return -ETIMEDOUT;
9471 
9472 	return 0;
9473 }
9474 
9475 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9476 {
9477 	int r = 0;
9478 
9479 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9480 	if (r)
9481 		return r;
9482 
9483 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9484 	return r;
9485 }
9486 
9487 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9488 {
9489 	int r, i;
9490 	struct atom_clock_dividers dividers;
9491 	u32 tmp;
9492 
9493 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9494 					   ecclk, false, &dividers);
9495 	if (r)
9496 		return r;
9497 
9498 	for (i = 0; i < 100; i++) {
9499 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9500 			break;
9501 		mdelay(10);
9502 	}
9503 	if (i == 100)
9504 		return -ETIMEDOUT;
9505 
9506 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9507 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9508 	tmp |= dividers.post_divider;
9509 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9510 
9511 	for (i = 0; i < 100; i++) {
9512 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9513 			break;
9514 		mdelay(10);
9515 	}
9516 	if (i == 100)
9517 		return -ETIMEDOUT;
9518 
9519 	return 0;
9520 }
9521 
9522 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9523 {
9524 	struct pci_dev *root = rdev->pdev->bus->self;
9525 	int bridge_pos, gpu_pos;
9526 	u32 speed_cntl, mask, current_data_rate;
9527 	int ret, i;
9528 	u16 tmp16;
9529 
9530 #if 0
9531 	if (pci_is_root_bus(rdev->pdev->bus))
9532 		return;
9533 #endif
9534 
9535 	if (radeon_pcie_gen2 == 0)
9536 		return;
9537 
9538 	if (rdev->flags & RADEON_IS_IGP)
9539 		return;
9540 
9541 	if (!(rdev->flags & RADEON_IS_PCIE))
9542 		return;
9543 
9544 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9545 	if (ret != 0)
9546 		return;
9547 
9548 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9549 		return;
9550 
9551 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9552 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9553 		LC_CURRENT_DATA_RATE_SHIFT;
9554 	if (mask & DRM_PCIE_SPEED_80) {
9555 		if (current_data_rate == 2) {
9556 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9557 			return;
9558 		}
9559 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9560 	} else if (mask & DRM_PCIE_SPEED_50) {
9561 		if (current_data_rate == 1) {
9562 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9563 			return;
9564 		}
9565 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9566 	}
9567 
9568 	bridge_pos = pci_pcie_cap(root);
9569 	if (!bridge_pos)
9570 		return;
9571 
9572 	gpu_pos = pci_pcie_cap(rdev->pdev);
9573 	if (!gpu_pos)
9574 		return;
9575 
9576 	if (mask & DRM_PCIE_SPEED_80) {
9577 		/* re-try equalization if gen3 is not already enabled */
9578 		if (current_data_rate != 2) {
9579 			u16 bridge_cfg, gpu_cfg;
9580 			u16 bridge_cfg2, gpu_cfg2;
9581 			u32 max_lw, current_lw, tmp;
9582 
9583 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9584 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9585 
9586 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9587 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9588 
9589 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9590 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9591 
9592 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9593 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9594 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9595 
9596 			if (current_lw < max_lw) {
9597 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9598 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9599 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9600 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9601 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9602 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9603 				}
9604 			}
9605 
9606 			for (i = 0; i < 10; i++) {
9607 				/* check status */
9608 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9609 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9610 					break;
9611 
9612 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9613 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9614 
9615 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9616 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9617 
9618 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9619 				tmp |= LC_SET_QUIESCE;
9620 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9621 
9622 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9623 				tmp |= LC_REDO_EQ;
9624 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9625 
9626 				mdelay(100);
9627 
9628 				/* linkctl */
9629 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9630 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9631 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9632 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9633 
9634 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9635 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9636 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9637 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9638 
9639 				/* linkctl2 */
9640 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9641 				tmp16 &= ~((1 << 4) | (7 << 9));
9642 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9643 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9644 
9645 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9646 				tmp16 &= ~((1 << 4) | (7 << 9));
9647 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9648 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9649 
9650 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9651 				tmp &= ~LC_SET_QUIESCE;
9652 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9653 			}
9654 		}
9655 	}
9656 
9657 	/* set the link speed */
9658 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9659 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9660 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9661 
9662 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9663 	tmp16 &= ~0xf;
9664 	if (mask & DRM_PCIE_SPEED_80)
9665 		tmp16 |= 3; /* gen3 */
9666 	else if (mask & DRM_PCIE_SPEED_50)
9667 		tmp16 |= 2; /* gen2 */
9668 	else
9669 		tmp16 |= 1; /* gen1 */
9670 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9671 
9672 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9673 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9674 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9675 
9676 	for (i = 0; i < rdev->usec_timeout; i++) {
9677 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9678 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9679 			break;
9680 		udelay(1);
9681 	}
9682 }
9683 
9684 static void cik_program_aspm(struct radeon_device *rdev)
9685 {
9686 	u32 data, orig;
9687 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9688 #if 0
9689 	bool disable_clkreq = false;
9690 #endif
9691 
9692 	if (radeon_aspm == 0)
9693 		return;
9694 
9695 	/* XXX double check IGPs */
9696 	if (rdev->flags & RADEON_IS_IGP)
9697 		return;
9698 
9699 	if (!(rdev->flags & RADEON_IS_PCIE))
9700 		return;
9701 
9702 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9703 	data &= ~LC_XMIT_N_FTS_MASK;
9704 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9705 	if (orig != data)
9706 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9707 
9708 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9709 	data |= LC_GO_TO_RECOVERY;
9710 	if (orig != data)
9711 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9712 
9713 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9714 	data |= P_IGNORE_EDB_ERR;
9715 	if (orig != data)
9716 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9717 
9718 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9719 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9720 	data |= LC_PMI_TO_L1_DIS;
9721 	if (!disable_l0s)
9722 		data |= LC_L0S_INACTIVITY(7);
9723 
9724 	if (!disable_l1) {
9725 		data |= LC_L1_INACTIVITY(7);
9726 		data &= ~LC_PMI_TO_L1_DIS;
9727 		if (orig != data)
9728 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9729 
9730 		if (!disable_plloff_in_l1) {
9731 			bool clk_req_support;
9732 
9733 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9734 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9735 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9736 			if (orig != data)
9737 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9738 
9739 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9740 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9741 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9742 			if (orig != data)
9743 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9744 
9745 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9746 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9747 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9748 			if (orig != data)
9749 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9750 
9751 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9752 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9753 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9754 			if (orig != data)
9755 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9756 
9757 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9758 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9759 			data |= LC_DYN_LANES_PWR_STATE(3);
9760 			if (orig != data)
9761 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9762 
9763 #ifdef zMN_TODO
9764 			if (!disable_clkreq &&
9765 			    !pci_is_root_bus(rdev->pdev->bus)) {
9766 				struct pci_dev *root = rdev->pdev->bus->self;
9767 				u32 lnkcap;
9768 
9769 				clk_req_support = false;
9770 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9771 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9772 					clk_req_support = true;
9773 			} else {
9774 				clk_req_support = false;
9775 			}
9776 #else
9777 			clk_req_support = false;
9778 #endif
9779 
9780 			if (clk_req_support) {
9781 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9782 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9783 				if (orig != data)
9784 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9785 
9786 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9787 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9788 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9789 				if (orig != data)
9790 					WREG32_SMC(THM_CLK_CNTL, data);
9791 
9792 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9793 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9794 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9795 				if (orig != data)
9796 					WREG32_SMC(MISC_CLK_CTRL, data);
9797 
9798 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9799 				data &= ~BCLK_AS_XCLK;
9800 				if (orig != data)
9801 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9802 
9803 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9804 				data &= ~FORCE_BIF_REFCLK_EN;
9805 				if (orig != data)
9806 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9807 
9808 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9809 				data &= ~MPLL_CLKOUT_SEL_MASK;
9810 				data |= MPLL_CLKOUT_SEL(4);
9811 				if (orig != data)
9812 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9813 			}
9814 		}
9815 	} else {
9816 		if (orig != data)
9817 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9818 	}
9819 
9820 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9821 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9822 	if (orig != data)
9823 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9824 
9825 	if (!disable_l0s) {
9826 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9827 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9828 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9829 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9830 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9831 				data &= ~LC_L0S_INACTIVITY_MASK;
9832 				if (orig != data)
9833 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9834 			}
9835 		}
9836 	}
9837 }
9838