xref: /dragonfly/sys/dev/drm/radeon/cik.c (revision 4ad7b37a)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "cikd.h"
30 #include "atom.h"
31 #include "cik_blit_shaders.h"
32 #include "radeon_ucode.h"
33 #include "clearstate_ci.h"
34 
35 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
36 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 
45 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
46 MODULE_FIRMWARE("radeon/bonaire_me.bin");
47 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
48 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
49 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
50 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
51 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
52 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
55 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
56 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
63 
64 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
65 MODULE_FIRMWARE("radeon/hawaii_me.bin");
66 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
67 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
68 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
69 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
70 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
71 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
72 
73 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
74 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
75 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
79 
80 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
81 MODULE_FIRMWARE("radeon/kaveri_me.bin");
82 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
83 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
84 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
85 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
86 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
87 
88 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
89 MODULE_FIRMWARE("radeon/KABINI_me.bin");
90 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
91 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
92 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
93 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
94 
95 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
96 MODULE_FIRMWARE("radeon/kabini_me.bin");
97 MODULE_FIRMWARE("radeon/kabini_ce.bin");
98 MODULE_FIRMWARE("radeon/kabini_mec.bin");
99 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
100 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
101 
102 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
103 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
104 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
108 
109 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
110 MODULE_FIRMWARE("radeon/mullins_me.bin");
111 MODULE_FIRMWARE("radeon/mullins_ce.bin");
112 MODULE_FIRMWARE("radeon/mullins_mec.bin");
113 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
114 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
115 
116 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
117 static void cik_rlc_stop(struct radeon_device *rdev);
118 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
119 static void cik_program_aspm(struct radeon_device *rdev);
120 static void cik_init_pg(struct radeon_device *rdev);
121 static void cik_init_cg(struct radeon_device *rdev);
122 static void cik_fini_pg(struct radeon_device *rdev);
123 static void cik_fini_cg(struct radeon_device *rdev);
124 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
125 					  bool enable);
126 
127 /* get temperature in millidegrees */
128 int ci_get_temp(struct radeon_device *rdev)
129 {
130 	u32 temp;
131 	int actual_temp = 0;
132 
133 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
134 		CTF_TEMP_SHIFT;
135 
136 	if (temp & 0x200)
137 		actual_temp = 255;
138 	else
139 		actual_temp = temp & 0x1ff;
140 
141 	actual_temp = actual_temp * 1000;
142 
143 	return actual_temp;
144 }
145 
146 /* get temperature in millidegrees */
147 int kv_get_temp(struct radeon_device *rdev)
148 {
149 	u32 temp;
150 	int actual_temp = 0;
151 
152 	temp = RREG32_SMC(0xC0300E0C);
153 
154 	if (temp)
155 		actual_temp = (temp / 8) - 49;
156 	else
157 		actual_temp = 0;
158 
159 	actual_temp = actual_temp * 1000;
160 
161 	return actual_temp;
162 }
163 
164 /*
165  * Indirect registers accessor
166  */
167 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
168 {
169 	u32 r;
170 
171 	spin_lock(&rdev->pciep_idx_lock);
172 	WREG32(PCIE_INDEX, reg);
173 	(void)RREG32(PCIE_INDEX);
174 	r = RREG32(PCIE_DATA);
175 	spin_unlock(&rdev->pciep_idx_lock);
176 	return r;
177 }
178 
179 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
180 {
181 	spin_lock(&rdev->pciep_idx_lock);
182 	WREG32(PCIE_INDEX, reg);
183 	(void)RREG32(PCIE_INDEX);
184 	WREG32(PCIE_DATA, v);
185 	(void)RREG32(PCIE_DATA);
186 	spin_unlock(&rdev->pciep_idx_lock);
187 }
188 
189 static const u32 spectre_rlc_save_restore_register_list[] =
190 {
191 	(0x0e00 << 16) | (0xc12c >> 2),
192 	0x00000000,
193 	(0x0e00 << 16) | (0xc140 >> 2),
194 	0x00000000,
195 	(0x0e00 << 16) | (0xc150 >> 2),
196 	0x00000000,
197 	(0x0e00 << 16) | (0xc15c >> 2),
198 	0x00000000,
199 	(0x0e00 << 16) | (0xc168 >> 2),
200 	0x00000000,
201 	(0x0e00 << 16) | (0xc170 >> 2),
202 	0x00000000,
203 	(0x0e00 << 16) | (0xc178 >> 2),
204 	0x00000000,
205 	(0x0e00 << 16) | (0xc204 >> 2),
206 	0x00000000,
207 	(0x0e00 << 16) | (0xc2b4 >> 2),
208 	0x00000000,
209 	(0x0e00 << 16) | (0xc2b8 >> 2),
210 	0x00000000,
211 	(0x0e00 << 16) | (0xc2bc >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xc2c0 >> 2),
214 	0x00000000,
215 	(0x0e00 << 16) | (0x8228 >> 2),
216 	0x00000000,
217 	(0x0e00 << 16) | (0x829c >> 2),
218 	0x00000000,
219 	(0x0e00 << 16) | (0x869c >> 2),
220 	0x00000000,
221 	(0x0600 << 16) | (0x98f4 >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0x98f8 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0x9900 >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc260 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0x90e8 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0x3c000 >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0x3c00c >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0x8c1c >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0x9700 >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0xcd20 >> 2),
240 	0x00000000,
241 	(0x4e00 << 16) | (0xcd20 >> 2),
242 	0x00000000,
243 	(0x5e00 << 16) | (0xcd20 >> 2),
244 	0x00000000,
245 	(0x6e00 << 16) | (0xcd20 >> 2),
246 	0x00000000,
247 	(0x7e00 << 16) | (0xcd20 >> 2),
248 	0x00000000,
249 	(0x8e00 << 16) | (0xcd20 >> 2),
250 	0x00000000,
251 	(0x9e00 << 16) | (0xcd20 >> 2),
252 	0x00000000,
253 	(0xae00 << 16) | (0xcd20 >> 2),
254 	0x00000000,
255 	(0xbe00 << 16) | (0xcd20 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0x89bc >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0x8900 >> 2),
260 	0x00000000,
261 	0x3,
262 	(0x0e00 << 16) | (0xc130 >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc134 >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc1fc >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc208 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc264 >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc268 >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0xc26c >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0xc270 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0xc274 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0xc278 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc27c >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc280 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc284 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc288 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc28c >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc290 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc294 >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc298 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xc29c >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xc2a0 >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0xc2a4 >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0xc2a8 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0xc2ac  >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0xc2b0 >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0x301d0 >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0x30238 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0x30250 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0x30254 >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0x30258 >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0x3025c >> 2),
321 	0x00000000,
322 	(0x4e00 << 16) | (0xc900 >> 2),
323 	0x00000000,
324 	(0x5e00 << 16) | (0xc900 >> 2),
325 	0x00000000,
326 	(0x6e00 << 16) | (0xc900 >> 2),
327 	0x00000000,
328 	(0x7e00 << 16) | (0xc900 >> 2),
329 	0x00000000,
330 	(0x8e00 << 16) | (0xc900 >> 2),
331 	0x00000000,
332 	(0x9e00 << 16) | (0xc900 >> 2),
333 	0x00000000,
334 	(0xae00 << 16) | (0xc900 >> 2),
335 	0x00000000,
336 	(0xbe00 << 16) | (0xc900 >> 2),
337 	0x00000000,
338 	(0x4e00 << 16) | (0xc904 >> 2),
339 	0x00000000,
340 	(0x5e00 << 16) | (0xc904 >> 2),
341 	0x00000000,
342 	(0x6e00 << 16) | (0xc904 >> 2),
343 	0x00000000,
344 	(0x7e00 << 16) | (0xc904 >> 2),
345 	0x00000000,
346 	(0x8e00 << 16) | (0xc904 >> 2),
347 	0x00000000,
348 	(0x9e00 << 16) | (0xc904 >> 2),
349 	0x00000000,
350 	(0xae00 << 16) | (0xc904 >> 2),
351 	0x00000000,
352 	(0xbe00 << 16) | (0xc904 >> 2),
353 	0x00000000,
354 	(0x4e00 << 16) | (0xc908 >> 2),
355 	0x00000000,
356 	(0x5e00 << 16) | (0xc908 >> 2),
357 	0x00000000,
358 	(0x6e00 << 16) | (0xc908 >> 2),
359 	0x00000000,
360 	(0x7e00 << 16) | (0xc908 >> 2),
361 	0x00000000,
362 	(0x8e00 << 16) | (0xc908 >> 2),
363 	0x00000000,
364 	(0x9e00 << 16) | (0xc908 >> 2),
365 	0x00000000,
366 	(0xae00 << 16) | (0xc908 >> 2),
367 	0x00000000,
368 	(0xbe00 << 16) | (0xc908 >> 2),
369 	0x00000000,
370 	(0x4e00 << 16) | (0xc90c >> 2),
371 	0x00000000,
372 	(0x5e00 << 16) | (0xc90c >> 2),
373 	0x00000000,
374 	(0x6e00 << 16) | (0xc90c >> 2),
375 	0x00000000,
376 	(0x7e00 << 16) | (0xc90c >> 2),
377 	0x00000000,
378 	(0x8e00 << 16) | (0xc90c >> 2),
379 	0x00000000,
380 	(0x9e00 << 16) | (0xc90c >> 2),
381 	0x00000000,
382 	(0xae00 << 16) | (0xc90c >> 2),
383 	0x00000000,
384 	(0xbe00 << 16) | (0xc90c >> 2),
385 	0x00000000,
386 	(0x4e00 << 16) | (0xc910 >> 2),
387 	0x00000000,
388 	(0x5e00 << 16) | (0xc910 >> 2),
389 	0x00000000,
390 	(0x6e00 << 16) | (0xc910 >> 2),
391 	0x00000000,
392 	(0x7e00 << 16) | (0xc910 >> 2),
393 	0x00000000,
394 	(0x8e00 << 16) | (0xc910 >> 2),
395 	0x00000000,
396 	(0x9e00 << 16) | (0xc910 >> 2),
397 	0x00000000,
398 	(0xae00 << 16) | (0xc910 >> 2),
399 	0x00000000,
400 	(0xbe00 << 16) | (0xc910 >> 2),
401 	0x00000000,
402 	(0x0e00 << 16) | (0xc99c >> 2),
403 	0x00000000,
404 	(0x0e00 << 16) | (0x9834 >> 2),
405 	0x00000000,
406 	(0x0000 << 16) | (0x30f00 >> 2),
407 	0x00000000,
408 	(0x0001 << 16) | (0x30f00 >> 2),
409 	0x00000000,
410 	(0x0000 << 16) | (0x30f04 >> 2),
411 	0x00000000,
412 	(0x0001 << 16) | (0x30f04 >> 2),
413 	0x00000000,
414 	(0x0000 << 16) | (0x30f08 >> 2),
415 	0x00000000,
416 	(0x0001 << 16) | (0x30f08 >> 2),
417 	0x00000000,
418 	(0x0000 << 16) | (0x30f0c >> 2),
419 	0x00000000,
420 	(0x0001 << 16) | (0x30f0c >> 2),
421 	0x00000000,
422 	(0x0600 << 16) | (0x9b7c >> 2),
423 	0x00000000,
424 	(0x0e00 << 16) | (0x8a14 >> 2),
425 	0x00000000,
426 	(0x0e00 << 16) | (0x8a18 >> 2),
427 	0x00000000,
428 	(0x0600 << 16) | (0x30a00 >> 2),
429 	0x00000000,
430 	(0x0e00 << 16) | (0x8bf0 >> 2),
431 	0x00000000,
432 	(0x0e00 << 16) | (0x8bcc >> 2),
433 	0x00000000,
434 	(0x0e00 << 16) | (0x8b24 >> 2),
435 	0x00000000,
436 	(0x0e00 << 16) | (0x30a04 >> 2),
437 	0x00000000,
438 	(0x0600 << 16) | (0x30a10 >> 2),
439 	0x00000000,
440 	(0x0600 << 16) | (0x30a14 >> 2),
441 	0x00000000,
442 	(0x0600 << 16) | (0x30a18 >> 2),
443 	0x00000000,
444 	(0x0600 << 16) | (0x30a2c >> 2),
445 	0x00000000,
446 	(0x0e00 << 16) | (0xc700 >> 2),
447 	0x00000000,
448 	(0x0e00 << 16) | (0xc704 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0xc708 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0xc768 >> 2),
453 	0x00000000,
454 	(0x0400 << 16) | (0xc770 >> 2),
455 	0x00000000,
456 	(0x0400 << 16) | (0xc774 >> 2),
457 	0x00000000,
458 	(0x0400 << 16) | (0xc778 >> 2),
459 	0x00000000,
460 	(0x0400 << 16) | (0xc77c >> 2),
461 	0x00000000,
462 	(0x0400 << 16) | (0xc780 >> 2),
463 	0x00000000,
464 	(0x0400 << 16) | (0xc784 >> 2),
465 	0x00000000,
466 	(0x0400 << 16) | (0xc788 >> 2),
467 	0x00000000,
468 	(0x0400 << 16) | (0xc78c >> 2),
469 	0x00000000,
470 	(0x0400 << 16) | (0xc798 >> 2),
471 	0x00000000,
472 	(0x0400 << 16) | (0xc79c >> 2),
473 	0x00000000,
474 	(0x0400 << 16) | (0xc7a0 >> 2),
475 	0x00000000,
476 	(0x0400 << 16) | (0xc7a4 >> 2),
477 	0x00000000,
478 	(0x0400 << 16) | (0xc7a8 >> 2),
479 	0x00000000,
480 	(0x0400 << 16) | (0xc7ac >> 2),
481 	0x00000000,
482 	(0x0400 << 16) | (0xc7b0 >> 2),
483 	0x00000000,
484 	(0x0400 << 16) | (0xc7b4 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x9100 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0x3c010 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0x92a8 >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0x92ac >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0x92b4 >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0x92b8 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0x92bc >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0x92c0 >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0x92c4 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x92c8 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x92cc >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x92d0 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8c00 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8c04 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x8c20 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x8c38 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x8c3c >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0xae00 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x9604 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xac08 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xac0c >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xac10 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xac14 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0xac58 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0xac68 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0xac6c >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0xac70 >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0xac74 >> 2),
541 	0x00000000,
542 	(0x0e00 << 16) | (0xac78 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xac7c >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xac80 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0xac84 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0xac88 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0xac8c >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0x970c >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0x9714 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x9718 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x971c >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x31068 >> 2),
563 	0x00000000,
564 	(0x4e00 << 16) | (0x31068 >> 2),
565 	0x00000000,
566 	(0x5e00 << 16) | (0x31068 >> 2),
567 	0x00000000,
568 	(0x6e00 << 16) | (0x31068 >> 2),
569 	0x00000000,
570 	(0x7e00 << 16) | (0x31068 >> 2),
571 	0x00000000,
572 	(0x8e00 << 16) | (0x31068 >> 2),
573 	0x00000000,
574 	(0x9e00 << 16) | (0x31068 >> 2),
575 	0x00000000,
576 	(0xae00 << 16) | (0x31068 >> 2),
577 	0x00000000,
578 	(0xbe00 << 16) | (0x31068 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0xcd10 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0xcd14 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x88b0 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x88b4 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x88b8 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x88bc >> 2),
591 	0x00000000,
592 	(0x0400 << 16) | (0x89c0 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x88c4 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x88c8 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x88d0 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x88d4 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0x88d8 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x8980 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0x30938 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0x3093c >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0x30940 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0x89a0 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0x30900 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x30904 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x89b4 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x3c210 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x3c214 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x3c218 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x8904 >> 2),
627 	0x00000000,
628 	0x5,
629 	(0x0e00 << 16) | (0x8c28 >> 2),
630 	(0x0e00 << 16) | (0x8c2c >> 2),
631 	(0x0e00 << 16) | (0x8c30 >> 2),
632 	(0x0e00 << 16) | (0x8c34 >> 2),
633 	(0x0e00 << 16) | (0x9600 >> 2),
634 };
635 
636 static const u32 kalindi_rlc_save_restore_register_list[] =
637 {
638 	(0x0e00 << 16) | (0xc12c >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0xc140 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0xc150 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0xc15c >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0xc168 >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0xc170 >> 2),
649 	0x00000000,
650 	(0x0e00 << 16) | (0xc204 >> 2),
651 	0x00000000,
652 	(0x0e00 << 16) | (0xc2b4 >> 2),
653 	0x00000000,
654 	(0x0e00 << 16) | (0xc2b8 >> 2),
655 	0x00000000,
656 	(0x0e00 << 16) | (0xc2bc >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xc2c0 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0x8228 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0x829c >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x869c >> 2),
665 	0x00000000,
666 	(0x0600 << 16) | (0x98f4 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x98f8 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x9900 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc260 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x90e8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x3c000 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x3c00c >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x8c1c >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x9700 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0xcd20 >> 2),
685 	0x00000000,
686 	(0x4e00 << 16) | (0xcd20 >> 2),
687 	0x00000000,
688 	(0x5e00 << 16) | (0xcd20 >> 2),
689 	0x00000000,
690 	(0x6e00 << 16) | (0xcd20 >> 2),
691 	0x00000000,
692 	(0x7e00 << 16) | (0xcd20 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x89bc >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x8900 >> 2),
697 	0x00000000,
698 	0x3,
699 	(0x0e00 << 16) | (0xc130 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0xc134 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0xc1fc >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0xc208 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0xc264 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0xc268 >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0xc26c >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0xc270 >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0xc274 >> 2),
716 	0x00000000,
717 	(0x0e00 << 16) | (0xc28c >> 2),
718 	0x00000000,
719 	(0x0e00 << 16) | (0xc290 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0xc294 >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc298 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc2a0 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc2a4 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc2a8 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc2ac >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0x301d0 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0x30238 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0x30250 >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0x30254 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0x30258 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0x3025c >> 2),
744 	0x00000000,
745 	(0x4e00 << 16) | (0xc900 >> 2),
746 	0x00000000,
747 	(0x5e00 << 16) | (0xc900 >> 2),
748 	0x00000000,
749 	(0x6e00 << 16) | (0xc900 >> 2),
750 	0x00000000,
751 	(0x7e00 << 16) | (0xc900 >> 2),
752 	0x00000000,
753 	(0x4e00 << 16) | (0xc904 >> 2),
754 	0x00000000,
755 	(0x5e00 << 16) | (0xc904 >> 2),
756 	0x00000000,
757 	(0x6e00 << 16) | (0xc904 >> 2),
758 	0x00000000,
759 	(0x7e00 << 16) | (0xc904 >> 2),
760 	0x00000000,
761 	(0x4e00 << 16) | (0xc908 >> 2),
762 	0x00000000,
763 	(0x5e00 << 16) | (0xc908 >> 2),
764 	0x00000000,
765 	(0x6e00 << 16) | (0xc908 >> 2),
766 	0x00000000,
767 	(0x7e00 << 16) | (0xc908 >> 2),
768 	0x00000000,
769 	(0x4e00 << 16) | (0xc90c >> 2),
770 	0x00000000,
771 	(0x5e00 << 16) | (0xc90c >> 2),
772 	0x00000000,
773 	(0x6e00 << 16) | (0xc90c >> 2),
774 	0x00000000,
775 	(0x7e00 << 16) | (0xc90c >> 2),
776 	0x00000000,
777 	(0x4e00 << 16) | (0xc910 >> 2),
778 	0x00000000,
779 	(0x5e00 << 16) | (0xc910 >> 2),
780 	0x00000000,
781 	(0x6e00 << 16) | (0xc910 >> 2),
782 	0x00000000,
783 	(0x7e00 << 16) | (0xc910 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc99c >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0x9834 >> 2),
788 	0x00000000,
789 	(0x0000 << 16) | (0x30f00 >> 2),
790 	0x00000000,
791 	(0x0000 << 16) | (0x30f04 >> 2),
792 	0x00000000,
793 	(0x0000 << 16) | (0x30f08 >> 2),
794 	0x00000000,
795 	(0x0000 << 16) | (0x30f0c >> 2),
796 	0x00000000,
797 	(0x0600 << 16) | (0x9b7c >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0x8a14 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0x8a18 >> 2),
802 	0x00000000,
803 	(0x0600 << 16) | (0x30a00 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x8bf0 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0x8bcc >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0x8b24 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x30a04 >> 2),
812 	0x00000000,
813 	(0x0600 << 16) | (0x30a10 >> 2),
814 	0x00000000,
815 	(0x0600 << 16) | (0x30a14 >> 2),
816 	0x00000000,
817 	(0x0600 << 16) | (0x30a18 >> 2),
818 	0x00000000,
819 	(0x0600 << 16) | (0x30a2c >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0xc700 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0xc704 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xc708 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0xc768 >> 2),
828 	0x00000000,
829 	(0x0400 << 16) | (0xc770 >> 2),
830 	0x00000000,
831 	(0x0400 << 16) | (0xc774 >> 2),
832 	0x00000000,
833 	(0x0400 << 16) | (0xc798 >> 2),
834 	0x00000000,
835 	(0x0400 << 16) | (0xc79c >> 2),
836 	0x00000000,
837 	(0x0e00 << 16) | (0x9100 >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0x3c010 >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0x8c00 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0x8c04 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0x8c20 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0x8c38 >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0x8c3c >> 2),
850 	0x00000000,
851 	(0x0e00 << 16) | (0xae00 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0x9604 >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0xac08 >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0xac0c >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0xac10 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0xac14 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0xac58 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0xac68 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0xac6c >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0xac70 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0xac74 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0xac78 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0xac7c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xac80 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xac84 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0xac88 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0xac8c >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x970c >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x9714 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x9718 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x971c >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x31068 >> 2),
894 	0x00000000,
895 	(0x4e00 << 16) | (0x31068 >> 2),
896 	0x00000000,
897 	(0x5e00 << 16) | (0x31068 >> 2),
898 	0x00000000,
899 	(0x6e00 << 16) | (0x31068 >> 2),
900 	0x00000000,
901 	(0x7e00 << 16) | (0x31068 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xcd10 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xcd14 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x88b0 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x88b4 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x88b8 >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x88bc >> 2),
914 	0x00000000,
915 	(0x0400 << 16) | (0x89c0 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x88c4 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x88c8 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x88d0 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x88d4 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x88d8 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8980 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x30938 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x3093c >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0x30940 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0x89a0 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0x30900 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0x30904 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0x89b4 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0x3e1fc >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0x3c210 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0x3c214 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0x3c218 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0x8904 >> 2),
952 	0x00000000,
953 	0x5,
954 	(0x0e00 << 16) | (0x8c28 >> 2),
955 	(0x0e00 << 16) | (0x8c2c >> 2),
956 	(0x0e00 << 16) | (0x8c30 >> 2),
957 	(0x0e00 << 16) | (0x8c34 >> 2),
958 	(0x0e00 << 16) | (0x9600 >> 2),
959 };
960 
961 static const u32 bonaire_golden_spm_registers[] =
962 {
963 	0x30800, 0xe0ffffff, 0xe0000000
964 };
965 
966 static const u32 bonaire_golden_common_registers[] =
967 {
968 	0xc770, 0xffffffff, 0x00000800,
969 	0xc774, 0xffffffff, 0x00000800,
970 	0xc798, 0xffffffff, 0x00007fbf,
971 	0xc79c, 0xffffffff, 0x00007faf
972 };
973 
974 static const u32 bonaire_golden_registers[] =
975 {
976 	0x3354, 0x00000333, 0x00000333,
977 	0x3350, 0x000c0fc0, 0x00040200,
978 	0x9a10, 0x00010000, 0x00058208,
979 	0x3c000, 0xffff1fff, 0x00140000,
980 	0x3c200, 0xfdfc0fff, 0x00000100,
981 	0x3c234, 0x40000000, 0x40000200,
982 	0x9830, 0xffffffff, 0x00000000,
983 	0x9834, 0xf00fffff, 0x00000400,
984 	0x9838, 0x0002021c, 0x00020200,
985 	0xc78, 0x00000080, 0x00000000,
986 	0x5bb0, 0x000000f0, 0x00000070,
987 	0x5bc0, 0xf0311fff, 0x80300000,
988 	0x98f8, 0x73773777, 0x12010001,
989 	0x350c, 0x00810000, 0x408af000,
990 	0x7030, 0x31000111, 0x00000011,
991 	0x2f48, 0x73773777, 0x12010001,
992 	0x220c, 0x00007fb6, 0x0021a1b1,
993 	0x2210, 0x00007fb6, 0x002021b1,
994 	0x2180, 0x00007fb6, 0x00002191,
995 	0x2218, 0x00007fb6, 0x002121b1,
996 	0x221c, 0x00007fb6, 0x002021b1,
997 	0x21dc, 0x00007fb6, 0x00002191,
998 	0x21e0, 0x00007fb6, 0x00002191,
999 	0x3628, 0x0000003f, 0x0000000a,
1000 	0x362c, 0x0000003f, 0x0000000a,
1001 	0x2ae4, 0x00073ffe, 0x000022a2,
1002 	0x240c, 0x000007ff, 0x00000000,
1003 	0x8a14, 0xf000003f, 0x00000007,
1004 	0x8bf0, 0x00002001, 0x00000001,
1005 	0x8b24, 0xffffffff, 0x00ffffff,
1006 	0x30a04, 0x0000ff0f, 0x00000000,
1007 	0x28a4c, 0x07ffffff, 0x06000000,
1008 	0x4d8, 0x00000fff, 0x00000100,
1009 	0x3e78, 0x00000001, 0x00000002,
1010 	0x9100, 0x03000000, 0x0362c688,
1011 	0x8c00, 0x000000ff, 0x00000001,
1012 	0xe40, 0x00001fff, 0x00001fff,
1013 	0x9060, 0x0000007f, 0x00000020,
1014 	0x9508, 0x00010000, 0x00010000,
1015 	0xac14, 0x000003ff, 0x000000f3,
1016 	0xac0c, 0xffffffff, 0x00001032
1017 };
1018 
1019 static const u32 bonaire_mgcg_cgcg_init[] =
1020 {
1021 	0xc420, 0xffffffff, 0xfffffffc,
1022 	0x30800, 0xffffffff, 0xe0000000,
1023 	0x3c2a0, 0xffffffff, 0x00000100,
1024 	0x3c208, 0xffffffff, 0x00000100,
1025 	0x3c2c0, 0xffffffff, 0xc0000100,
1026 	0x3c2c8, 0xffffffff, 0xc0000100,
1027 	0x3c2c4, 0xffffffff, 0xc0000100,
1028 	0x55e4, 0xffffffff, 0x00600100,
1029 	0x3c280, 0xffffffff, 0x00000100,
1030 	0x3c214, 0xffffffff, 0x06000100,
1031 	0x3c220, 0xffffffff, 0x00000100,
1032 	0x3c218, 0xffffffff, 0x06000100,
1033 	0x3c204, 0xffffffff, 0x00000100,
1034 	0x3c2e0, 0xffffffff, 0x00000100,
1035 	0x3c224, 0xffffffff, 0x00000100,
1036 	0x3c200, 0xffffffff, 0x00000100,
1037 	0x3c230, 0xffffffff, 0x00000100,
1038 	0x3c234, 0xffffffff, 0x00000100,
1039 	0x3c250, 0xffffffff, 0x00000100,
1040 	0x3c254, 0xffffffff, 0x00000100,
1041 	0x3c258, 0xffffffff, 0x00000100,
1042 	0x3c25c, 0xffffffff, 0x00000100,
1043 	0x3c260, 0xffffffff, 0x00000100,
1044 	0x3c27c, 0xffffffff, 0x00000100,
1045 	0x3c278, 0xffffffff, 0x00000100,
1046 	0x3c210, 0xffffffff, 0x06000100,
1047 	0x3c290, 0xffffffff, 0x00000100,
1048 	0x3c274, 0xffffffff, 0x00000100,
1049 	0x3c2b4, 0xffffffff, 0x00000100,
1050 	0x3c2b0, 0xffffffff, 0x00000100,
1051 	0x3c270, 0xffffffff, 0x00000100,
1052 	0x30800, 0xffffffff, 0xe0000000,
1053 	0x3c020, 0xffffffff, 0x00010000,
1054 	0x3c024, 0xffffffff, 0x00030002,
1055 	0x3c028, 0xffffffff, 0x00040007,
1056 	0x3c02c, 0xffffffff, 0x00060005,
1057 	0x3c030, 0xffffffff, 0x00090008,
1058 	0x3c034, 0xffffffff, 0x00010000,
1059 	0x3c038, 0xffffffff, 0x00030002,
1060 	0x3c03c, 0xffffffff, 0x00040007,
1061 	0x3c040, 0xffffffff, 0x00060005,
1062 	0x3c044, 0xffffffff, 0x00090008,
1063 	0x3c048, 0xffffffff, 0x00010000,
1064 	0x3c04c, 0xffffffff, 0x00030002,
1065 	0x3c050, 0xffffffff, 0x00040007,
1066 	0x3c054, 0xffffffff, 0x00060005,
1067 	0x3c058, 0xffffffff, 0x00090008,
1068 	0x3c05c, 0xffffffff, 0x00010000,
1069 	0x3c060, 0xffffffff, 0x00030002,
1070 	0x3c064, 0xffffffff, 0x00040007,
1071 	0x3c068, 0xffffffff, 0x00060005,
1072 	0x3c06c, 0xffffffff, 0x00090008,
1073 	0x3c070, 0xffffffff, 0x00010000,
1074 	0x3c074, 0xffffffff, 0x00030002,
1075 	0x3c078, 0xffffffff, 0x00040007,
1076 	0x3c07c, 0xffffffff, 0x00060005,
1077 	0x3c080, 0xffffffff, 0x00090008,
1078 	0x3c084, 0xffffffff, 0x00010000,
1079 	0x3c088, 0xffffffff, 0x00030002,
1080 	0x3c08c, 0xffffffff, 0x00040007,
1081 	0x3c090, 0xffffffff, 0x00060005,
1082 	0x3c094, 0xffffffff, 0x00090008,
1083 	0x3c098, 0xffffffff, 0x00010000,
1084 	0x3c09c, 0xffffffff, 0x00030002,
1085 	0x3c0a0, 0xffffffff, 0x00040007,
1086 	0x3c0a4, 0xffffffff, 0x00060005,
1087 	0x3c0a8, 0xffffffff, 0x00090008,
1088 	0x3c000, 0xffffffff, 0x96e00200,
1089 	0x8708, 0xffffffff, 0x00900100,
1090 	0xc424, 0xffffffff, 0x0020003f,
1091 	0x38, 0xffffffff, 0x0140001c,
1092 	0x3c, 0x000f0000, 0x000f0000,
1093 	0x220, 0xffffffff, 0xC060000C,
1094 	0x224, 0xc0000fff, 0x00000100,
1095 	0xf90, 0xffffffff, 0x00000100,
1096 	0xf98, 0x00000101, 0x00000000,
1097 	0x20a8, 0xffffffff, 0x00000104,
1098 	0x55e4, 0xff000fff, 0x00000100,
1099 	0x30cc, 0xc0000fff, 0x00000104,
1100 	0xc1e4, 0x00000001, 0x00000001,
1101 	0xd00c, 0xff000ff0, 0x00000100,
1102 	0xd80c, 0xff000ff0, 0x00000100
1103 };
1104 
1105 static const u32 spectre_golden_spm_registers[] =
1106 {
1107 	0x30800, 0xe0ffffff, 0xe0000000
1108 };
1109 
1110 static const u32 spectre_golden_common_registers[] =
1111 {
1112 	0xc770, 0xffffffff, 0x00000800,
1113 	0xc774, 0xffffffff, 0x00000800,
1114 	0xc798, 0xffffffff, 0x00007fbf,
1115 	0xc79c, 0xffffffff, 0x00007faf
1116 };
1117 
1118 static const u32 spectre_golden_registers[] =
1119 {
1120 	0x3c000, 0xffff1fff, 0x96940200,
1121 	0x3c00c, 0xffff0001, 0xff000000,
1122 	0x3c200, 0xfffc0fff, 0x00000100,
1123 	0x6ed8, 0x00010101, 0x00010000,
1124 	0x9834, 0xf00fffff, 0x00000400,
1125 	0x9838, 0xfffffffc, 0x00020200,
1126 	0x5bb0, 0x000000f0, 0x00000070,
1127 	0x5bc0, 0xf0311fff, 0x80300000,
1128 	0x98f8, 0x73773777, 0x12010001,
1129 	0x9b7c, 0x00ff0000, 0x00fc0000,
1130 	0x2f48, 0x73773777, 0x12010001,
1131 	0x8a14, 0xf000003f, 0x00000007,
1132 	0x8b24, 0xffffffff, 0x00ffffff,
1133 	0x28350, 0x3f3f3fff, 0x00000082,
1134 	0x28354, 0x0000003f, 0x00000000,
1135 	0x3e78, 0x00000001, 0x00000002,
1136 	0x913c, 0xffff03df, 0x00000004,
1137 	0xc768, 0x00000008, 0x00000008,
1138 	0x8c00, 0x000008ff, 0x00000800,
1139 	0x9508, 0x00010000, 0x00010000,
1140 	0xac0c, 0xffffffff, 0x54763210,
1141 	0x214f8, 0x01ff01ff, 0x00000002,
1142 	0x21498, 0x007ff800, 0x00200000,
1143 	0x2015c, 0xffffffff, 0x00000f40,
1144 	0x30934, 0xffffffff, 0x00000001
1145 };
1146 
1147 static const u32 spectre_mgcg_cgcg_init[] =
1148 {
1149 	0xc420, 0xffffffff, 0xfffffffc,
1150 	0x30800, 0xffffffff, 0xe0000000,
1151 	0x3c2a0, 0xffffffff, 0x00000100,
1152 	0x3c208, 0xffffffff, 0x00000100,
1153 	0x3c2c0, 0xffffffff, 0x00000100,
1154 	0x3c2c8, 0xffffffff, 0x00000100,
1155 	0x3c2c4, 0xffffffff, 0x00000100,
1156 	0x55e4, 0xffffffff, 0x00600100,
1157 	0x3c280, 0xffffffff, 0x00000100,
1158 	0x3c214, 0xffffffff, 0x06000100,
1159 	0x3c220, 0xffffffff, 0x00000100,
1160 	0x3c218, 0xffffffff, 0x06000100,
1161 	0x3c204, 0xffffffff, 0x00000100,
1162 	0x3c2e0, 0xffffffff, 0x00000100,
1163 	0x3c224, 0xffffffff, 0x00000100,
1164 	0x3c200, 0xffffffff, 0x00000100,
1165 	0x3c230, 0xffffffff, 0x00000100,
1166 	0x3c234, 0xffffffff, 0x00000100,
1167 	0x3c250, 0xffffffff, 0x00000100,
1168 	0x3c254, 0xffffffff, 0x00000100,
1169 	0x3c258, 0xffffffff, 0x00000100,
1170 	0x3c25c, 0xffffffff, 0x00000100,
1171 	0x3c260, 0xffffffff, 0x00000100,
1172 	0x3c27c, 0xffffffff, 0x00000100,
1173 	0x3c278, 0xffffffff, 0x00000100,
1174 	0x3c210, 0xffffffff, 0x06000100,
1175 	0x3c290, 0xffffffff, 0x00000100,
1176 	0x3c274, 0xffffffff, 0x00000100,
1177 	0x3c2b4, 0xffffffff, 0x00000100,
1178 	0x3c2b0, 0xffffffff, 0x00000100,
1179 	0x3c270, 0xffffffff, 0x00000100,
1180 	0x30800, 0xffffffff, 0xe0000000,
1181 	0x3c020, 0xffffffff, 0x00010000,
1182 	0x3c024, 0xffffffff, 0x00030002,
1183 	0x3c028, 0xffffffff, 0x00040007,
1184 	0x3c02c, 0xffffffff, 0x00060005,
1185 	0x3c030, 0xffffffff, 0x00090008,
1186 	0x3c034, 0xffffffff, 0x00010000,
1187 	0x3c038, 0xffffffff, 0x00030002,
1188 	0x3c03c, 0xffffffff, 0x00040007,
1189 	0x3c040, 0xffffffff, 0x00060005,
1190 	0x3c044, 0xffffffff, 0x00090008,
1191 	0x3c048, 0xffffffff, 0x00010000,
1192 	0x3c04c, 0xffffffff, 0x00030002,
1193 	0x3c050, 0xffffffff, 0x00040007,
1194 	0x3c054, 0xffffffff, 0x00060005,
1195 	0x3c058, 0xffffffff, 0x00090008,
1196 	0x3c05c, 0xffffffff, 0x00010000,
1197 	0x3c060, 0xffffffff, 0x00030002,
1198 	0x3c064, 0xffffffff, 0x00040007,
1199 	0x3c068, 0xffffffff, 0x00060005,
1200 	0x3c06c, 0xffffffff, 0x00090008,
1201 	0x3c070, 0xffffffff, 0x00010000,
1202 	0x3c074, 0xffffffff, 0x00030002,
1203 	0x3c078, 0xffffffff, 0x00040007,
1204 	0x3c07c, 0xffffffff, 0x00060005,
1205 	0x3c080, 0xffffffff, 0x00090008,
1206 	0x3c084, 0xffffffff, 0x00010000,
1207 	0x3c088, 0xffffffff, 0x00030002,
1208 	0x3c08c, 0xffffffff, 0x00040007,
1209 	0x3c090, 0xffffffff, 0x00060005,
1210 	0x3c094, 0xffffffff, 0x00090008,
1211 	0x3c098, 0xffffffff, 0x00010000,
1212 	0x3c09c, 0xffffffff, 0x00030002,
1213 	0x3c0a0, 0xffffffff, 0x00040007,
1214 	0x3c0a4, 0xffffffff, 0x00060005,
1215 	0x3c0a8, 0xffffffff, 0x00090008,
1216 	0x3c0ac, 0xffffffff, 0x00010000,
1217 	0x3c0b0, 0xffffffff, 0x00030002,
1218 	0x3c0b4, 0xffffffff, 0x00040007,
1219 	0x3c0b8, 0xffffffff, 0x00060005,
1220 	0x3c0bc, 0xffffffff, 0x00090008,
1221 	0x3c000, 0xffffffff, 0x96e00200,
1222 	0x8708, 0xffffffff, 0x00900100,
1223 	0xc424, 0xffffffff, 0x0020003f,
1224 	0x38, 0xffffffff, 0x0140001c,
1225 	0x3c, 0x000f0000, 0x000f0000,
1226 	0x220, 0xffffffff, 0xC060000C,
1227 	0x224, 0xc0000fff, 0x00000100,
1228 	0xf90, 0xffffffff, 0x00000100,
1229 	0xf98, 0x00000101, 0x00000000,
1230 	0x20a8, 0xffffffff, 0x00000104,
1231 	0x55e4, 0xff000fff, 0x00000100,
1232 	0x30cc, 0xc0000fff, 0x00000104,
1233 	0xc1e4, 0x00000001, 0x00000001,
1234 	0xd00c, 0xff000ff0, 0x00000100,
1235 	0xd80c, 0xff000ff0, 0x00000100
1236 };
1237 
1238 static const u32 kalindi_golden_spm_registers[] =
1239 {
1240 	0x30800, 0xe0ffffff, 0xe0000000
1241 };
1242 
1243 static const u32 kalindi_golden_common_registers[] =
1244 {
1245 	0xc770, 0xffffffff, 0x00000800,
1246 	0xc774, 0xffffffff, 0x00000800,
1247 	0xc798, 0xffffffff, 0x00007fbf,
1248 	0xc79c, 0xffffffff, 0x00007faf
1249 };
1250 
1251 static const u32 kalindi_golden_registers[] =
1252 {
1253 	0x3c000, 0xffffdfff, 0x6e944040,
1254 	0x55e4, 0xff607fff, 0xfc000100,
1255 	0x3c220, 0xff000fff, 0x00000100,
1256 	0x3c224, 0xff000fff, 0x00000100,
1257 	0x3c200, 0xfffc0fff, 0x00000100,
1258 	0x6ed8, 0x00010101, 0x00010000,
1259 	0x9830, 0xffffffff, 0x00000000,
1260 	0x9834, 0xf00fffff, 0x00000400,
1261 	0x5bb0, 0x000000f0, 0x00000070,
1262 	0x5bc0, 0xf0311fff, 0x80300000,
1263 	0x98f8, 0x73773777, 0x12010001,
1264 	0x98fc, 0xffffffff, 0x00000010,
1265 	0x9b7c, 0x00ff0000, 0x00fc0000,
1266 	0x8030, 0x00001f0f, 0x0000100a,
1267 	0x2f48, 0x73773777, 0x12010001,
1268 	0x2408, 0x000fffff, 0x000c007f,
1269 	0x8a14, 0xf000003f, 0x00000007,
1270 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1271 	0x30a04, 0x0000ff0f, 0x00000000,
1272 	0x28a4c, 0x07ffffff, 0x06000000,
1273 	0x4d8, 0x00000fff, 0x00000100,
1274 	0x3e78, 0x00000001, 0x00000002,
1275 	0xc768, 0x00000008, 0x00000008,
1276 	0x8c00, 0x000000ff, 0x00000003,
1277 	0x214f8, 0x01ff01ff, 0x00000002,
1278 	0x21498, 0x007ff800, 0x00200000,
1279 	0x2015c, 0xffffffff, 0x00000f40,
1280 	0x88c4, 0x001f3ae3, 0x00000082,
1281 	0x88d4, 0x0000001f, 0x00000010,
1282 	0x30934, 0xffffffff, 0x00000000
1283 };
1284 
1285 static const u32 kalindi_mgcg_cgcg_init[] =
1286 {
1287 	0xc420, 0xffffffff, 0xfffffffc,
1288 	0x30800, 0xffffffff, 0xe0000000,
1289 	0x3c2a0, 0xffffffff, 0x00000100,
1290 	0x3c208, 0xffffffff, 0x00000100,
1291 	0x3c2c0, 0xffffffff, 0x00000100,
1292 	0x3c2c8, 0xffffffff, 0x00000100,
1293 	0x3c2c4, 0xffffffff, 0x00000100,
1294 	0x55e4, 0xffffffff, 0x00600100,
1295 	0x3c280, 0xffffffff, 0x00000100,
1296 	0x3c214, 0xffffffff, 0x06000100,
1297 	0x3c220, 0xffffffff, 0x00000100,
1298 	0x3c218, 0xffffffff, 0x06000100,
1299 	0x3c204, 0xffffffff, 0x00000100,
1300 	0x3c2e0, 0xffffffff, 0x00000100,
1301 	0x3c224, 0xffffffff, 0x00000100,
1302 	0x3c200, 0xffffffff, 0x00000100,
1303 	0x3c230, 0xffffffff, 0x00000100,
1304 	0x3c234, 0xffffffff, 0x00000100,
1305 	0x3c250, 0xffffffff, 0x00000100,
1306 	0x3c254, 0xffffffff, 0x00000100,
1307 	0x3c258, 0xffffffff, 0x00000100,
1308 	0x3c25c, 0xffffffff, 0x00000100,
1309 	0x3c260, 0xffffffff, 0x00000100,
1310 	0x3c27c, 0xffffffff, 0x00000100,
1311 	0x3c278, 0xffffffff, 0x00000100,
1312 	0x3c210, 0xffffffff, 0x06000100,
1313 	0x3c290, 0xffffffff, 0x00000100,
1314 	0x3c274, 0xffffffff, 0x00000100,
1315 	0x3c2b4, 0xffffffff, 0x00000100,
1316 	0x3c2b0, 0xffffffff, 0x00000100,
1317 	0x3c270, 0xffffffff, 0x00000100,
1318 	0x30800, 0xffffffff, 0xe0000000,
1319 	0x3c020, 0xffffffff, 0x00010000,
1320 	0x3c024, 0xffffffff, 0x00030002,
1321 	0x3c028, 0xffffffff, 0x00040007,
1322 	0x3c02c, 0xffffffff, 0x00060005,
1323 	0x3c030, 0xffffffff, 0x00090008,
1324 	0x3c034, 0xffffffff, 0x00010000,
1325 	0x3c038, 0xffffffff, 0x00030002,
1326 	0x3c03c, 0xffffffff, 0x00040007,
1327 	0x3c040, 0xffffffff, 0x00060005,
1328 	0x3c044, 0xffffffff, 0x00090008,
1329 	0x3c000, 0xffffffff, 0x96e00200,
1330 	0x8708, 0xffffffff, 0x00900100,
1331 	0xc424, 0xffffffff, 0x0020003f,
1332 	0x38, 0xffffffff, 0x0140001c,
1333 	0x3c, 0x000f0000, 0x000f0000,
1334 	0x220, 0xffffffff, 0xC060000C,
1335 	0x224, 0xc0000fff, 0x00000100,
1336 	0x20a8, 0xffffffff, 0x00000104,
1337 	0x55e4, 0xff000fff, 0x00000100,
1338 	0x30cc, 0xc0000fff, 0x00000104,
1339 	0xc1e4, 0x00000001, 0x00000001,
1340 	0xd00c, 0xff000ff0, 0x00000100,
1341 	0xd80c, 0xff000ff0, 0x00000100
1342 };
1343 
1344 static const u32 hawaii_golden_spm_registers[] =
1345 {
1346 	0x30800, 0xe0ffffff, 0xe0000000
1347 };
1348 
1349 static const u32 hawaii_golden_common_registers[] =
1350 {
1351 	0x30800, 0xffffffff, 0xe0000000,
1352 	0x28350, 0xffffffff, 0x3a00161a,
1353 	0x28354, 0xffffffff, 0x0000002e,
1354 	0x9a10, 0xffffffff, 0x00018208,
1355 	0x98f8, 0xffffffff, 0x12011003
1356 };
1357 
1358 static const u32 hawaii_golden_registers[] =
1359 {
1360 	0x3354, 0x00000333, 0x00000333,
1361 	0x9a10, 0x00010000, 0x00058208,
1362 	0x9830, 0xffffffff, 0x00000000,
1363 	0x9834, 0xf00fffff, 0x00000400,
1364 	0x9838, 0x0002021c, 0x00020200,
1365 	0xc78, 0x00000080, 0x00000000,
1366 	0x5bb0, 0x000000f0, 0x00000070,
1367 	0x5bc0, 0xf0311fff, 0x80300000,
1368 	0x350c, 0x00810000, 0x408af000,
1369 	0x7030, 0x31000111, 0x00000011,
1370 	0x2f48, 0x73773777, 0x12010001,
1371 	0x2120, 0x0000007f, 0x0000001b,
1372 	0x21dc, 0x00007fb6, 0x00002191,
1373 	0x3628, 0x0000003f, 0x0000000a,
1374 	0x362c, 0x0000003f, 0x0000000a,
1375 	0x2ae4, 0x00073ffe, 0x000022a2,
1376 	0x240c, 0x000007ff, 0x00000000,
1377 	0x8bf0, 0x00002001, 0x00000001,
1378 	0x8b24, 0xffffffff, 0x00ffffff,
1379 	0x30a04, 0x0000ff0f, 0x00000000,
1380 	0x28a4c, 0x07ffffff, 0x06000000,
1381 	0x3e78, 0x00000001, 0x00000002,
1382 	0xc768, 0x00000008, 0x00000008,
1383 	0xc770, 0x00000f00, 0x00000800,
1384 	0xc774, 0x00000f00, 0x00000800,
1385 	0xc798, 0x00ffffff, 0x00ff7fbf,
1386 	0xc79c, 0x00ffffff, 0x00ff7faf,
1387 	0x8c00, 0x000000ff, 0x00000800,
1388 	0xe40, 0x00001fff, 0x00001fff,
1389 	0x9060, 0x0000007f, 0x00000020,
1390 	0x9508, 0x00010000, 0x00010000,
1391 	0xae00, 0x00100000, 0x000ff07c,
1392 	0xac14, 0x000003ff, 0x0000000f,
1393 	0xac10, 0xffffffff, 0x7564fdec,
1394 	0xac0c, 0xffffffff, 0x3120b9a8,
1395 	0xac08, 0x20000000, 0x0f9c0000
1396 };
1397 
1398 static const u32 hawaii_mgcg_cgcg_init[] =
1399 {
1400 	0xc420, 0xffffffff, 0xfffffffd,
1401 	0x30800, 0xffffffff, 0xe0000000,
1402 	0x3c2a0, 0xffffffff, 0x00000100,
1403 	0x3c208, 0xffffffff, 0x00000100,
1404 	0x3c2c0, 0xffffffff, 0x00000100,
1405 	0x3c2c8, 0xffffffff, 0x00000100,
1406 	0x3c2c4, 0xffffffff, 0x00000100,
1407 	0x55e4, 0xffffffff, 0x00200100,
1408 	0x3c280, 0xffffffff, 0x00000100,
1409 	0x3c214, 0xffffffff, 0x06000100,
1410 	0x3c220, 0xffffffff, 0x00000100,
1411 	0x3c218, 0xffffffff, 0x06000100,
1412 	0x3c204, 0xffffffff, 0x00000100,
1413 	0x3c2e0, 0xffffffff, 0x00000100,
1414 	0x3c224, 0xffffffff, 0x00000100,
1415 	0x3c200, 0xffffffff, 0x00000100,
1416 	0x3c230, 0xffffffff, 0x00000100,
1417 	0x3c234, 0xffffffff, 0x00000100,
1418 	0x3c250, 0xffffffff, 0x00000100,
1419 	0x3c254, 0xffffffff, 0x00000100,
1420 	0x3c258, 0xffffffff, 0x00000100,
1421 	0x3c25c, 0xffffffff, 0x00000100,
1422 	0x3c260, 0xffffffff, 0x00000100,
1423 	0x3c27c, 0xffffffff, 0x00000100,
1424 	0x3c278, 0xffffffff, 0x00000100,
1425 	0x3c210, 0xffffffff, 0x06000100,
1426 	0x3c290, 0xffffffff, 0x00000100,
1427 	0x3c274, 0xffffffff, 0x00000100,
1428 	0x3c2b4, 0xffffffff, 0x00000100,
1429 	0x3c2b0, 0xffffffff, 0x00000100,
1430 	0x3c270, 0xffffffff, 0x00000100,
1431 	0x30800, 0xffffffff, 0xe0000000,
1432 	0x3c020, 0xffffffff, 0x00010000,
1433 	0x3c024, 0xffffffff, 0x00030002,
1434 	0x3c028, 0xffffffff, 0x00040007,
1435 	0x3c02c, 0xffffffff, 0x00060005,
1436 	0x3c030, 0xffffffff, 0x00090008,
1437 	0x3c034, 0xffffffff, 0x00010000,
1438 	0x3c038, 0xffffffff, 0x00030002,
1439 	0x3c03c, 0xffffffff, 0x00040007,
1440 	0x3c040, 0xffffffff, 0x00060005,
1441 	0x3c044, 0xffffffff, 0x00090008,
1442 	0x3c048, 0xffffffff, 0x00010000,
1443 	0x3c04c, 0xffffffff, 0x00030002,
1444 	0x3c050, 0xffffffff, 0x00040007,
1445 	0x3c054, 0xffffffff, 0x00060005,
1446 	0x3c058, 0xffffffff, 0x00090008,
1447 	0x3c05c, 0xffffffff, 0x00010000,
1448 	0x3c060, 0xffffffff, 0x00030002,
1449 	0x3c064, 0xffffffff, 0x00040007,
1450 	0x3c068, 0xffffffff, 0x00060005,
1451 	0x3c06c, 0xffffffff, 0x00090008,
1452 	0x3c070, 0xffffffff, 0x00010000,
1453 	0x3c074, 0xffffffff, 0x00030002,
1454 	0x3c078, 0xffffffff, 0x00040007,
1455 	0x3c07c, 0xffffffff, 0x00060005,
1456 	0x3c080, 0xffffffff, 0x00090008,
1457 	0x3c084, 0xffffffff, 0x00010000,
1458 	0x3c088, 0xffffffff, 0x00030002,
1459 	0x3c08c, 0xffffffff, 0x00040007,
1460 	0x3c090, 0xffffffff, 0x00060005,
1461 	0x3c094, 0xffffffff, 0x00090008,
1462 	0x3c098, 0xffffffff, 0x00010000,
1463 	0x3c09c, 0xffffffff, 0x00030002,
1464 	0x3c0a0, 0xffffffff, 0x00040007,
1465 	0x3c0a4, 0xffffffff, 0x00060005,
1466 	0x3c0a8, 0xffffffff, 0x00090008,
1467 	0x3c0ac, 0xffffffff, 0x00010000,
1468 	0x3c0b0, 0xffffffff, 0x00030002,
1469 	0x3c0b4, 0xffffffff, 0x00040007,
1470 	0x3c0b8, 0xffffffff, 0x00060005,
1471 	0x3c0bc, 0xffffffff, 0x00090008,
1472 	0x3c0c0, 0xffffffff, 0x00010000,
1473 	0x3c0c4, 0xffffffff, 0x00030002,
1474 	0x3c0c8, 0xffffffff, 0x00040007,
1475 	0x3c0cc, 0xffffffff, 0x00060005,
1476 	0x3c0d0, 0xffffffff, 0x00090008,
1477 	0x3c0d4, 0xffffffff, 0x00010000,
1478 	0x3c0d8, 0xffffffff, 0x00030002,
1479 	0x3c0dc, 0xffffffff, 0x00040007,
1480 	0x3c0e0, 0xffffffff, 0x00060005,
1481 	0x3c0e4, 0xffffffff, 0x00090008,
1482 	0x3c0e8, 0xffffffff, 0x00010000,
1483 	0x3c0ec, 0xffffffff, 0x00030002,
1484 	0x3c0f0, 0xffffffff, 0x00040007,
1485 	0x3c0f4, 0xffffffff, 0x00060005,
1486 	0x3c0f8, 0xffffffff, 0x00090008,
1487 	0xc318, 0xffffffff, 0x00020200,
1488 	0x3350, 0xffffffff, 0x00000200,
1489 	0x15c0, 0xffffffff, 0x00000400,
1490 	0x55e8, 0xffffffff, 0x00000000,
1491 	0x2f50, 0xffffffff, 0x00000902,
1492 	0x3c000, 0xffffffff, 0x96940200,
1493 	0x8708, 0xffffffff, 0x00900100,
1494 	0xc424, 0xffffffff, 0x0020003f,
1495 	0x38, 0xffffffff, 0x0140001c,
1496 	0x3c, 0x000f0000, 0x000f0000,
1497 	0x220, 0xffffffff, 0xc060000c,
1498 	0x224, 0xc0000fff, 0x00000100,
1499 	0xf90, 0xffffffff, 0x00000100,
1500 	0xf98, 0x00000101, 0x00000000,
1501 	0x20a8, 0xffffffff, 0x00000104,
1502 	0x55e4, 0xff000fff, 0x00000100,
1503 	0x30cc, 0xc0000fff, 0x00000104,
1504 	0xc1e4, 0x00000001, 0x00000001,
1505 	0xd00c, 0xff000ff0, 0x00000100,
1506 	0xd80c, 0xff000ff0, 0x00000100
1507 };
1508 
1509 static const u32 godavari_golden_registers[] =
1510 {
1511 	0x55e4, 0xff607fff, 0xfc000100,
1512 	0x6ed8, 0x00010101, 0x00010000,
1513 	0x9830, 0xffffffff, 0x00000000,
1514 	0x98302, 0xf00fffff, 0x00000400,
1515 	0x6130, 0xffffffff, 0x00010000,
1516 	0x5bb0, 0x000000f0, 0x00000070,
1517 	0x5bc0, 0xf0311fff, 0x80300000,
1518 	0x98f8, 0x73773777, 0x12010001,
1519 	0x98fc, 0xffffffff, 0x00000010,
1520 	0x8030, 0x00001f0f, 0x0000100a,
1521 	0x2f48, 0x73773777, 0x12010001,
1522 	0x2408, 0x000fffff, 0x000c007f,
1523 	0x8a14, 0xf000003f, 0x00000007,
1524 	0x8b24, 0xffffffff, 0x00ff0fff,
1525 	0x30a04, 0x0000ff0f, 0x00000000,
1526 	0x28a4c, 0x07ffffff, 0x06000000,
1527 	0x4d8, 0x00000fff, 0x00000100,
1528 	0xd014, 0x00010000, 0x00810001,
1529 	0xd814, 0x00010000, 0x00810001,
1530 	0x3e78, 0x00000001, 0x00000002,
1531 	0xc768, 0x00000008, 0x00000008,
1532 	0xc770, 0x00000f00, 0x00000800,
1533 	0xc774, 0x00000f00, 0x00000800,
1534 	0xc798, 0x00ffffff, 0x00ff7fbf,
1535 	0xc79c, 0x00ffffff, 0x00ff7faf,
1536 	0x8c00, 0x000000ff, 0x00000001,
1537 	0x214f8, 0x01ff01ff, 0x00000002,
1538 	0x21498, 0x007ff800, 0x00200000,
1539 	0x2015c, 0xffffffff, 0x00000f40,
1540 	0x88c4, 0x001f3ae3, 0x00000082,
1541 	0x88d4, 0x0000001f, 0x00000010,
1542 	0x30934, 0xffffffff, 0x00000000
1543 };
1544 
1545 
1546 static void cik_init_golden_registers(struct radeon_device *rdev)
1547 {
1548 	switch (rdev->family) {
1549 	case CHIP_BONAIRE:
1550 		radeon_program_register_sequence(rdev,
1551 						 bonaire_mgcg_cgcg_init,
1552 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1553 		radeon_program_register_sequence(rdev,
1554 						 bonaire_golden_registers,
1555 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1556 		radeon_program_register_sequence(rdev,
1557 						 bonaire_golden_common_registers,
1558 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1559 		radeon_program_register_sequence(rdev,
1560 						 bonaire_golden_spm_registers,
1561 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1562 		break;
1563 	case CHIP_KABINI:
1564 		radeon_program_register_sequence(rdev,
1565 						 kalindi_mgcg_cgcg_init,
1566 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1567 		radeon_program_register_sequence(rdev,
1568 						 kalindi_golden_registers,
1569 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1570 		radeon_program_register_sequence(rdev,
1571 						 kalindi_golden_common_registers,
1572 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1573 		radeon_program_register_sequence(rdev,
1574 						 kalindi_golden_spm_registers,
1575 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1576 		break;
1577 	case CHIP_MULLINS:
1578 		radeon_program_register_sequence(rdev,
1579 						 kalindi_mgcg_cgcg_init,
1580 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1581 		radeon_program_register_sequence(rdev,
1582 						 godavari_golden_registers,
1583 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1584 		radeon_program_register_sequence(rdev,
1585 						 kalindi_golden_common_registers,
1586 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1587 		radeon_program_register_sequence(rdev,
1588 						 kalindi_golden_spm_registers,
1589 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1590 		break;
1591 	case CHIP_KAVERI:
1592 		radeon_program_register_sequence(rdev,
1593 						 spectre_mgcg_cgcg_init,
1594 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1595 		radeon_program_register_sequence(rdev,
1596 						 spectre_golden_registers,
1597 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1598 		radeon_program_register_sequence(rdev,
1599 						 spectre_golden_common_registers,
1600 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1601 		radeon_program_register_sequence(rdev,
1602 						 spectre_golden_spm_registers,
1603 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1604 		break;
1605 	case CHIP_HAWAII:
1606 		radeon_program_register_sequence(rdev,
1607 						 hawaii_mgcg_cgcg_init,
1608 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1609 		radeon_program_register_sequence(rdev,
1610 						 hawaii_golden_registers,
1611 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1612 		radeon_program_register_sequence(rdev,
1613 						 hawaii_golden_common_registers,
1614 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1615 		radeon_program_register_sequence(rdev,
1616 						 hawaii_golden_spm_registers,
1617 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1618 		break;
1619 	default:
1620 		break;
1621 	}
1622 }
1623 
1624 /**
1625  * cik_get_xclk - get the xclk
1626  *
1627  * @rdev: radeon_device pointer
1628  *
1629  * Returns the reference clock used by the gfx engine
1630  * (CIK).
1631  */
1632 u32 cik_get_xclk(struct radeon_device *rdev)
1633 {
1634         u32 reference_clock = rdev->clock.spll.reference_freq;
1635 
1636 	if (rdev->flags & RADEON_IS_IGP) {
1637 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1638 			return reference_clock / 2;
1639 	} else {
1640 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1641 			return reference_clock / 4;
1642 	}
1643 	return reference_clock;
1644 }
1645 
1646 /**
1647  * cik_mm_rdoorbell - read a doorbell dword
1648  *
1649  * @rdev: radeon_device pointer
1650  * @index: doorbell index
1651  *
1652  * Returns the value in the doorbell aperture at the
1653  * requested doorbell index (CIK).
1654  */
1655 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1656 {
1657 	if (index < rdev->doorbell.num_doorbells) {
1658 		return readl(rdev->doorbell.ptr + index);
1659 	} else {
1660 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1661 		return 0;
1662 	}
1663 }
1664 
1665 /**
1666  * cik_mm_wdoorbell - write a doorbell dword
1667  *
1668  * @rdev: radeon_device pointer
1669  * @index: doorbell index
1670  * @v: value to write
1671  *
1672  * Writes @v to the doorbell aperture at the
1673  * requested doorbell index (CIK).
1674  */
1675 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1676 {
1677 	if (index < rdev->doorbell.num_doorbells) {
1678 		writel(v, rdev->doorbell.ptr + index);
1679 	} else {
1680 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1681 	}
1682 }
1683 
1684 #define BONAIRE_IO_MC_REGS_SIZE 36
1685 
1686 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1687 {
1688 	{0x00000070, 0x04400000},
1689 	{0x00000071, 0x80c01803},
1690 	{0x00000072, 0x00004004},
1691 	{0x00000073, 0x00000100},
1692 	{0x00000074, 0x00ff0000},
1693 	{0x00000075, 0x34000000},
1694 	{0x00000076, 0x08000014},
1695 	{0x00000077, 0x00cc08ec},
1696 	{0x00000078, 0x00000400},
1697 	{0x00000079, 0x00000000},
1698 	{0x0000007a, 0x04090000},
1699 	{0x0000007c, 0x00000000},
1700 	{0x0000007e, 0x4408a8e8},
1701 	{0x0000007f, 0x00000304},
1702 	{0x00000080, 0x00000000},
1703 	{0x00000082, 0x00000001},
1704 	{0x00000083, 0x00000002},
1705 	{0x00000084, 0xf3e4f400},
1706 	{0x00000085, 0x052024e3},
1707 	{0x00000087, 0x00000000},
1708 	{0x00000088, 0x01000000},
1709 	{0x0000008a, 0x1c0a0000},
1710 	{0x0000008b, 0xff010000},
1711 	{0x0000008d, 0xffffefff},
1712 	{0x0000008e, 0xfff3efff},
1713 	{0x0000008f, 0xfff3efbf},
1714 	{0x00000092, 0xf7ffffff},
1715 	{0x00000093, 0xffffff7f},
1716 	{0x00000095, 0x00101101},
1717 	{0x00000096, 0x00000fff},
1718 	{0x00000097, 0x00116fff},
1719 	{0x00000098, 0x60010000},
1720 	{0x00000099, 0x10010000},
1721 	{0x0000009a, 0x00006000},
1722 	{0x0000009b, 0x00001000},
1723 	{0x0000009f, 0x00b48000}
1724 };
1725 
1726 #define HAWAII_IO_MC_REGS_SIZE 22
1727 
1728 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1729 {
1730 	{0x0000007d, 0x40000000},
1731 	{0x0000007e, 0x40180304},
1732 	{0x0000007f, 0x0000ff00},
1733 	{0x00000081, 0x00000000},
1734 	{0x00000083, 0x00000800},
1735 	{0x00000086, 0x00000000},
1736 	{0x00000087, 0x00000100},
1737 	{0x00000088, 0x00020100},
1738 	{0x00000089, 0x00000000},
1739 	{0x0000008b, 0x00040000},
1740 	{0x0000008c, 0x00000100},
1741 	{0x0000008e, 0xff010000},
1742 	{0x00000090, 0xffffefff},
1743 	{0x00000091, 0xfff3efff},
1744 	{0x00000092, 0xfff3efbf},
1745 	{0x00000093, 0xf7ffffff},
1746 	{0x00000094, 0xffffff7f},
1747 	{0x00000095, 0x00000fff},
1748 	{0x00000096, 0x00116fff},
1749 	{0x00000097, 0x60010000},
1750 	{0x00000098, 0x10010000},
1751 	{0x0000009f, 0x00c79000}
1752 };
1753 
1754 
1755 /**
1756  * cik_srbm_select - select specific register instances
1757  *
1758  * @rdev: radeon_device pointer
1759  * @me: selected ME (micro engine)
1760  * @pipe: pipe
1761  * @queue: queue
1762  * @vmid: VMID
1763  *
1764  * Switches the currently active registers instances.  Some
1765  * registers are instanced per VMID, others are instanced per
1766  * me/pipe/queue combination.
1767  */
1768 static void cik_srbm_select(struct radeon_device *rdev,
1769 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1770 {
1771 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1772 			     MEID(me & 0x3) |
1773 			     VMID(vmid & 0xf) |
1774 			     QUEUEID(queue & 0x7));
1775 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1776 }
1777 
1778 /* ucode loading */
1779 /**
1780  * ci_mc_load_microcode - load MC ucode into the hw
1781  *
1782  * @rdev: radeon_device pointer
1783  *
1784  * Load the GDDR MC ucode into the hw (CIK).
1785  * Returns 0 on success, error on failure.
1786  */
1787 int ci_mc_load_microcode(struct radeon_device *rdev)
1788 {
1789 	const __be32 *fw_data = NULL;
1790 	const __le32 *new_fw_data = NULL;
1791 	u32 running, blackout = 0;
1792 	u32 *io_mc_regs = NULL;
1793 	const __le32 *new_io_mc_regs = NULL;
1794 	int i, regs_size, ucode_size;
1795 
1796 	if (!rdev->mc_fw)
1797 		return -EINVAL;
1798 
1799 	if (rdev->new_fw) {
1800 		const struct mc_firmware_header_v1_0 *hdr =
1801 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1802 
1803 		radeon_ucode_print_mc_hdr(&hdr->header);
1804 
1805 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1806 		new_io_mc_regs = (const __le32 *)
1807 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1808 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1809 		new_fw_data = (const __le32 *)
1810 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1811 	} else {
1812 		ucode_size = rdev->mc_fw->datasize / 4;
1813 
1814 		switch (rdev->family) {
1815 		case CHIP_BONAIRE:
1816 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1817 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1818 			break;
1819 		case CHIP_HAWAII:
1820 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1821 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1822 			break;
1823 		default:
1824 			return -EINVAL;
1825 		}
1826 		fw_data = (const __be32 *)rdev->mc_fw->data;
1827 	}
1828 
1829 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1830 
1831 	if (running == 0) {
1832 		if (running) {
1833 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1834 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1835 		}
1836 
1837 		/* reset the engine and set to writable */
1838 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1839 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1840 
1841 		/* load mc io regs */
1842 		for (i = 0; i < regs_size; i++) {
1843 			if (rdev->new_fw) {
1844 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1845 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1846 			} else {
1847 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1848 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1849 			}
1850 		}
1851 		/* load the MC ucode */
1852 		for (i = 0; i < ucode_size; i++) {
1853 			if (rdev->new_fw)
1854 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1855 			else
1856 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1857 		}
1858 
1859 		/* put the engine back into the active state */
1860 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1861 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1862 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1863 
1864 		/* wait for training to complete */
1865 		for (i = 0; i < rdev->usec_timeout; i++) {
1866 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1867 				break;
1868 			udelay(1);
1869 		}
1870 		for (i = 0; i < rdev->usec_timeout; i++) {
1871 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1872 				break;
1873 			udelay(1);
1874 		}
1875 
1876 		if (running)
1877 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1878 	}
1879 
1880 	return 0;
1881 }
1882 
1883 /**
1884  * cik_init_microcode - load ucode images from disk
1885  *
1886  * @rdev: radeon_device pointer
1887  *
1888  * Use the firmware interface to load the ucode images into
1889  * the driver (not loaded into hw).
1890  * Returns 0 on success, error on failure.
1891  */
1892 static int cik_init_microcode(struct radeon_device *rdev)
1893 {
1894 	const char *chip_name;
1895 	const char *new_chip_name;
1896 	size_t pfp_req_size, me_req_size, ce_req_size,
1897 		mec_req_size, rlc_req_size, mc_req_size = 0,
1898 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1899 	char fw_name[30];
1900 	int new_fw = 0;
1901 	int err;
1902 	int num_fw;
1903 
1904 	DRM_DEBUG("\n");
1905 
1906 	switch (rdev->family) {
1907 	case CHIP_BONAIRE:
1908 		chip_name = "BONAIRE";
1909 		new_chip_name = "bonaire";
1910 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1911 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1912 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1913 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1914 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1915 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1916 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1917 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1918 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1919 		num_fw = 8;
1920 		break;
1921 	case CHIP_HAWAII:
1922 		chip_name = "HAWAII";
1923 		new_chip_name = "hawaii";
1924 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1925 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1926 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1927 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1928 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1929 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1930 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1931 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1932 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1933 		num_fw = 8;
1934 		break;
1935 	case CHIP_KAVERI:
1936 		chip_name = "KAVERI";
1937 		new_chip_name = "kaveri";
1938 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1939 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1940 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1941 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1942 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1943 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1944 		num_fw = 7;
1945 		break;
1946 	case CHIP_KABINI:
1947 		chip_name = "KABINI";
1948 		new_chip_name = "kabini";
1949 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1950 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1951 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1952 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1953 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1954 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1955 		num_fw = 6;
1956 		break;
1957 	case CHIP_MULLINS:
1958 		chip_name = "MULLINS";
1959 		new_chip_name = "mullins";
1960 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1961 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1962 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1963 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1964 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1965 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1966 		num_fw = 6;
1967 		break;
1968 	default: BUG();
1969 	}
1970 
1971 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1972 
1973 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1974 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1975 	if (err) {
1976 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1977 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1978 		if (err)
1979 			goto out;
1980 		if (rdev->pfp_fw->datasize != pfp_req_size) {
1981 			printk(KERN_ERR
1982 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1983 			       rdev->pfp_fw->datasize, fw_name);
1984 			err = -EINVAL;
1985 			goto out;
1986 		}
1987 	} else {
1988 		err = radeon_ucode_validate(rdev->pfp_fw);
1989 		if (err) {
1990 			printk(KERN_ERR
1991 			       "cik_fw: validation failed for firmware \"%s\"\n",
1992 			       fw_name);
1993 			goto out;
1994 		} else {
1995 			new_fw++;
1996 		}
1997 	}
1998 
1999 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
2000 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2001 	if (err) {
2002 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
2003 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2004 		if (err)
2005 			goto out;
2006 		if (rdev->me_fw->datasize != me_req_size) {
2007 			printk(KERN_ERR
2008 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2009 			       rdev->me_fw->datasize, fw_name);
2010 			err = -EINVAL;
2011 		}
2012 	} else {
2013 		err = radeon_ucode_validate(rdev->me_fw);
2014 		if (err) {
2015 			printk(KERN_ERR
2016 			       "cik_fw: validation failed for firmware \"%s\"\n",
2017 			       fw_name);
2018 			goto out;
2019 		} else {
2020 			new_fw++;
2021 		}
2022 	}
2023 
2024 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
2025 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2026 	if (err) {
2027 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
2028 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2029 		if (err)
2030 			goto out;
2031 		if (rdev->ce_fw->datasize != ce_req_size) {
2032 			printk(KERN_ERR
2033 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2034 			       rdev->ce_fw->datasize, fw_name);
2035 			err = -EINVAL;
2036 		}
2037 	} else {
2038 		err = radeon_ucode_validate(rdev->ce_fw);
2039 		if (err) {
2040 			printk(KERN_ERR
2041 			       "cik_fw: validation failed for firmware \"%s\"\n",
2042 			       fw_name);
2043 			goto out;
2044 		} else {
2045 			new_fw++;
2046 		}
2047 	}
2048 
2049 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name);
2050 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2051 	if (err) {
2052 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
2053 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2054 		if (err)
2055 			goto out;
2056 		if (rdev->mec_fw->datasize != mec_req_size) {
2057 			printk(KERN_ERR
2058 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2059 			       rdev->mec_fw->datasize, fw_name);
2060 			err = -EINVAL;
2061 		}
2062 	} else {
2063 		err = radeon_ucode_validate(rdev->mec_fw);
2064 		if (err) {
2065 			printk(KERN_ERR
2066 			       "cik_fw: validation failed for firmware \"%s\"\n",
2067 			       fw_name);
2068 			goto out;
2069 		} else {
2070 			new_fw++;
2071 		}
2072 	}
2073 
2074 	if (rdev->family == CHIP_KAVERI) {
2075 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name);
2076 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2077 		if (err) {
2078 			goto out;
2079 		} else {
2080 			err = radeon_ucode_validate(rdev->mec2_fw);
2081 			if (err) {
2082 				goto out;
2083 			} else {
2084 				new_fw++;
2085 			}
2086 		}
2087 	}
2088 
2089 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
2090 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2091 	if (err) {
2092 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
2093 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2094 		if (err)
2095 			goto out;
2096 		if (rdev->rlc_fw->datasize != rlc_req_size) {
2097 			printk(KERN_ERR
2098 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->rlc_fw->datasize, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->rlc_fw);
2104 		if (err) {
2105 			printk(KERN_ERR
2106 			       "cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name);
2115 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
2118 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->sdma_fw->datasize != sdma_req_size) {
2122 			printk(KERN_ERR
2123 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2124 			       rdev->sdma_fw->datasize, fw_name);
2125 			err = -EINVAL;
2126 		}
2127 	} else {
2128 		err = radeon_ucode_validate(rdev->sdma_fw);
2129 		if (err) {
2130 			printk(KERN_ERR
2131 			       "cik_fw: validation failed for firmware \"%s\"\n",
2132 			       fw_name);
2133 			goto out;
2134 		} else {
2135 			new_fw++;
2136 		}
2137 	}
2138 
2139 	/* No SMC, MC ucode on APUs */
2140 	if (!(rdev->flags & RADEON_IS_IGP)) {
2141 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
2142 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2143 		if (err) {
2144 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
2145 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2146 			if (err) {
2147 				ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
2148 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2149 				if (err)
2150 					goto out;
2151 			}
2152 			if ((rdev->mc_fw->datasize != mc_req_size) &&
2153 			    (rdev->mc_fw->datasize != mc2_req_size)){
2154 				printk(KERN_ERR
2155 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2156 				       rdev->mc_fw->datasize, fw_name);
2157 				err = -EINVAL;
2158 			}
2159 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
2160 		} else {
2161 			err = radeon_ucode_validate(rdev->mc_fw);
2162 			if (err) {
2163 				printk(KERN_ERR
2164 				       "cik_fw: validation failed for firmware \"%s\"\n",
2165 				       fw_name);
2166 				goto out;
2167 			} else {
2168 				new_fw++;
2169 			}
2170 		}
2171 
2172 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
2173 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2174 		if (err) {
2175 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
2176 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2177 			if (err) {
2178 				printk(KERN_ERR
2179 				       "smc: error loading firmware \"%s\"\n",
2180 				       fw_name);
2181 				release_firmware(rdev->smc_fw);
2182 				rdev->smc_fw = NULL;
2183 				err = 0;
2184 			} else if (rdev->smc_fw->datasize != smc_req_size) {
2185 				printk(KERN_ERR
2186 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2187 				       rdev->smc_fw->datasize, fw_name);
2188 				err = -EINVAL;
2189 			}
2190 		} else {
2191 			err = radeon_ucode_validate(rdev->smc_fw);
2192 			if (err) {
2193 				printk(KERN_ERR
2194 				       "cik_fw: validation failed for firmware \"%s\"\n",
2195 				       fw_name);
2196 				goto out;
2197 			} else {
2198 				new_fw++;
2199 			}
2200 		}
2201 	}
2202 
2203 	if (new_fw == 0) {
2204 		rdev->new_fw = false;
2205 	} else if (new_fw < num_fw) {
2206 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2207 		err = -EINVAL;
2208 	} else {
2209 		rdev->new_fw = true;
2210 	}
2211 
2212 out:
2213 	if (err) {
2214 		if (err != -EINVAL)
2215 			printk(KERN_ERR
2216 			       "cik_cp: Failed to load firmware \"%s\"\n",
2217 			       fw_name);
2218 		release_firmware(rdev->pfp_fw);
2219 		rdev->pfp_fw = NULL;
2220 		release_firmware(rdev->me_fw);
2221 		rdev->me_fw = NULL;
2222 		release_firmware(rdev->ce_fw);
2223 		rdev->ce_fw = NULL;
2224 		release_firmware(rdev->mec_fw);
2225 		rdev->mec_fw = NULL;
2226 		release_firmware(rdev->mec2_fw);
2227 		rdev->mec2_fw = NULL;
2228 		release_firmware(rdev->rlc_fw);
2229 		rdev->rlc_fw = NULL;
2230 		release_firmware(rdev->sdma_fw);
2231 		rdev->sdma_fw = NULL;
2232 		release_firmware(rdev->mc_fw);
2233 		rdev->mc_fw = NULL;
2234 		release_firmware(rdev->smc_fw);
2235 		rdev->smc_fw = NULL;
2236 	}
2237 	return err;
2238 }
2239 
2240 /**
2241  * cik_fini_microcode - drop the firmwares image references
2242  *
2243  * @rdev: radeon_device pointer
2244  *
2245  * Drop the pfp, me, mec, mec2, rlc, sdma, mc, smc and ce firmware image references.
2246  * Called at driver shutdown.
2247  */
2248 static void cik_fini_microcode(struct radeon_device *rdev)
2249 {
2250 	release_firmware(rdev->pfp_fw);
2251 	rdev->pfp_fw = NULL;
2252 	release_firmware(rdev->me_fw);
2253 	rdev->me_fw = NULL;
2254 	release_firmware(rdev->ce_fw);
2255 	rdev->ce_fw = NULL;
2256 	release_firmware(rdev->mec_fw);
2257 	rdev->mec_fw = NULL;
2258 	release_firmware(rdev->mec2_fw);
2259 	rdev->mec2_fw = NULL;
2260 	release_firmware(rdev->rlc_fw);
2261 	rdev->rlc_fw = NULL;
2262 	release_firmware(rdev->sdma_fw);
2263 	rdev->sdma_fw = NULL;
2264 	release_firmware(rdev->mc_fw);
2265 	rdev->mc_fw = NULL;
2266 	release_firmware(rdev->smc_fw);
2267 	rdev->smc_fw = NULL;
2268 }
2269 
2270 /*
2271  * Core functions
2272  */
2273 /**
2274  * cik_tiling_mode_table_init - init the hw tiling table
2275  *
2276  * @rdev: radeon_device pointer
2277  *
2278  * Starting with SI, the tiling setup is done globally in a
2279  * set of 32 tiling modes.  Rather than selecting each set of
2280  * parameters per surface as on older asics, we just select
2281  * which index in the tiling table we want to use, and the
2282  * surface uses those parameters (CIK).
2283  */
2284 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2285 {
2286 	const u32 num_tile_mode_states = 32;
2287 	const u32 num_secondary_tile_mode_states = 16;
2288 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2289 	u32 num_pipe_configs;
2290 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2291 		rdev->config.cik.max_shader_engines;
2292 
2293 	switch (rdev->config.cik.mem_row_size_in_kb) {
2294 	case 1:
2295 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2296 		break;
2297 	case 2:
2298 	default:
2299 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2300 		break;
2301 	case 4:
2302 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2303 		break;
2304 	}
2305 
2306 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2307 	if (num_pipe_configs > 8)
2308 		num_pipe_configs = 16;
2309 
2310 	if (num_pipe_configs == 16) {
2311 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2312 			switch (reg_offset) {
2313 			case 0:
2314 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2316 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2318 				break;
2319 			case 1:
2320 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2322 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2324 				break;
2325 			case 2:
2326 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2328 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2330 				break;
2331 			case 3:
2332 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2334 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2336 				break;
2337 			case 4:
2338 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2339 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2340 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 						 TILE_SPLIT(split_equal_to_row_size));
2342 				break;
2343 			case 5:
2344 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2345 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347 				break;
2348 			case 6:
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2350 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2351 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2353 				break;
2354 			case 7:
2355 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2356 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2357 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 						 TILE_SPLIT(split_equal_to_row_size));
2359 				break;
2360 			case 8:
2361 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2362 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2363 				break;
2364 			case 9:
2365 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2368 				break;
2369 			case 10:
2370 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374 				break;
2375 			case 11:
2376 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2378 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2379 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380 				break;
2381 			case 12:
2382 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2383 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2384 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386 				break;
2387 			case 13:
2388 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2389 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2391 				break;
2392 			case 14:
2393 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397 				break;
2398 			case 16:
2399 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2400 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2402 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2403 				break;
2404 			case 17:
2405 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2406 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2407 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409 				break;
2410 			case 27:
2411 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2412 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2414 				break;
2415 			case 28:
2416 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2418 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 				break;
2421 			case 29:
2422 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2423 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2424 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2425 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2426 				break;
2427 			case 30:
2428 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2429 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2430 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 				break;
2433 			default:
2434 				gb_tile_moden = 0;
2435 				break;
2436 			}
2437 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2438 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2439 		}
2440 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2441 			switch (reg_offset) {
2442 			case 0:
2443 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2445 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2446 						 NUM_BANKS(ADDR_SURF_16_BANK));
2447 				break;
2448 			case 1:
2449 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 						 NUM_BANKS(ADDR_SURF_16_BANK));
2453 				break;
2454 			case 2:
2455 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2457 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2458 						 NUM_BANKS(ADDR_SURF_16_BANK));
2459 				break;
2460 			case 3:
2461 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 						 NUM_BANKS(ADDR_SURF_16_BANK));
2465 				break;
2466 			case 4:
2467 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470 						 NUM_BANKS(ADDR_SURF_8_BANK));
2471 				break;
2472 			case 5:
2473 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476 						 NUM_BANKS(ADDR_SURF_4_BANK));
2477 				break;
2478 			case 6:
2479 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2481 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2482 						 NUM_BANKS(ADDR_SURF_2_BANK));
2483 				break;
2484 			case 8:
2485 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2487 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2488 						 NUM_BANKS(ADDR_SURF_16_BANK));
2489 				break;
2490 			case 9:
2491 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2493 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2494 						 NUM_BANKS(ADDR_SURF_16_BANK));
2495 				break;
2496 			case 10:
2497 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 						 NUM_BANKS(ADDR_SURF_16_BANK));
2501 				break;
2502 			case 11:
2503 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2506 						 NUM_BANKS(ADDR_SURF_8_BANK));
2507 				break;
2508 			case 12:
2509 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2512 						 NUM_BANKS(ADDR_SURF_4_BANK));
2513 				break;
2514 			case 13:
2515 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518 						 NUM_BANKS(ADDR_SURF_2_BANK));
2519 				break;
2520 			case 14:
2521 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 						 NUM_BANKS(ADDR_SURF_2_BANK));
2525 				break;
2526 			default:
2527 				gb_tile_moden = 0;
2528 				break;
2529 			}
2530 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2531 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2532 		}
2533 	} else if (num_pipe_configs == 8) {
2534 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2535 			switch (reg_offset) {
2536 			case 0:
2537 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2539 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2541 				break;
2542 			case 1:
2543 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2545 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2547 				break;
2548 			case 2:
2549 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2553 				break;
2554 			case 3:
2555 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2559 				break;
2560 			case 4:
2561 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2563 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564 						 TILE_SPLIT(split_equal_to_row_size));
2565 				break;
2566 			case 5:
2567 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2568 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2570 				break;
2571 			case 6:
2572 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2574 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2576 				break;
2577 			case 7:
2578 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2579 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2580 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 						 TILE_SPLIT(split_equal_to_row_size));
2582 				break;
2583 			case 8:
2584 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2585 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2586 				break;
2587 			case 9:
2588 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2589 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2591 				break;
2592 			case 10:
2593 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2597 				break;
2598 			case 11:
2599 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2600 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2601 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2602 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2603 				break;
2604 			case 12:
2605 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2606 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609 				break;
2610 			case 13:
2611 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2612 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2614 				break;
2615 			case 14:
2616 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2618 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 				break;
2621 			case 16:
2622 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2623 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2624 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2625 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626 				break;
2627 			case 17:
2628 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2629 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2630 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632 				break;
2633 			case 27:
2634 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2635 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2636 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2637 				break;
2638 			case 28:
2639 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2641 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 				break;
2644 			case 29:
2645 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2647 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2648 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2649 				break;
2650 			case 30:
2651 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2652 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2653 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2654 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2655 				break;
2656 			default:
2657 				gb_tile_moden = 0;
2658 				break;
2659 			}
2660 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2661 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2662 		}
2663 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2664 			switch (reg_offset) {
2665 			case 0:
2666 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2668 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2669 						 NUM_BANKS(ADDR_SURF_16_BANK));
2670 				break;
2671 			case 1:
2672 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2674 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2675 						 NUM_BANKS(ADDR_SURF_16_BANK));
2676 				break;
2677 			case 2:
2678 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2681 						 NUM_BANKS(ADDR_SURF_16_BANK));
2682 				break;
2683 			case 3:
2684 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2687 						 NUM_BANKS(ADDR_SURF_16_BANK));
2688 				break;
2689 			case 4:
2690 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2693 						 NUM_BANKS(ADDR_SURF_8_BANK));
2694 				break;
2695 			case 5:
2696 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2699 						 NUM_BANKS(ADDR_SURF_4_BANK));
2700 				break;
2701 			case 6:
2702 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2705 						 NUM_BANKS(ADDR_SURF_2_BANK));
2706 				break;
2707 			case 8:
2708 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2710 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2711 						 NUM_BANKS(ADDR_SURF_16_BANK));
2712 				break;
2713 			case 9:
2714 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2716 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2717 						 NUM_BANKS(ADDR_SURF_16_BANK));
2718 				break;
2719 			case 10:
2720 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723 						 NUM_BANKS(ADDR_SURF_16_BANK));
2724 				break;
2725 			case 11:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729 						 NUM_BANKS(ADDR_SURF_16_BANK));
2730 				break;
2731 			case 12:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2735 						 NUM_BANKS(ADDR_SURF_8_BANK));
2736 				break;
2737 			case 13:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2741 						 NUM_BANKS(ADDR_SURF_4_BANK));
2742 				break;
2743 			case 14:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2747 						 NUM_BANKS(ADDR_SURF_2_BANK));
2748 				break;
2749 			default:
2750 				gb_tile_moden = 0;
2751 				break;
2752 			}
2753 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2754 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2755 		}
2756 	} else if (num_pipe_configs == 4) {
2757 		if (num_rbs == 4) {
2758 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2759 				switch (reg_offset) {
2760 				case 0:
2761 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2763 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2765 					break;
2766 				case 1:
2767 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2768 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2769 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2771 					break;
2772 				case 2:
2773 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2777 					break;
2778 				case 3:
2779 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2780 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2781 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2783 					break;
2784 				case 4:
2785 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2787 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788 							 TILE_SPLIT(split_equal_to_row_size));
2789 					break;
2790 				case 5:
2791 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2794 					break;
2795 				case 6:
2796 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2800 					break;
2801 				case 7:
2802 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2803 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 							 TILE_SPLIT(split_equal_to_row_size));
2806 					break;
2807 				case 8:
2808 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2809 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2810 					break;
2811 				case 9:
2812 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2813 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2815 					break;
2816 				case 10:
2817 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2818 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2819 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2821 					break;
2822 				case 11:
2823 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2826 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2827 					break;
2828 				case 12:
2829 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2830 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833 					break;
2834 				case 13:
2835 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2836 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2838 					break;
2839 				case 14:
2840 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2841 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2842 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844 					break;
2845 				case 16:
2846 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2847 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2849 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2850 					break;
2851 				case 17:
2852 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2853 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856 					break;
2857 				case 27:
2858 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2859 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2860 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2861 					break;
2862 				case 28:
2863 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2864 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2865 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 					break;
2868 				case 29:
2869 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2870 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2871 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2872 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2873 					break;
2874 				case 30:
2875 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2876 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2877 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879 					break;
2880 				default:
2881 					gb_tile_moden = 0;
2882 					break;
2883 				}
2884 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2885 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2886 			}
2887 		} else if (num_rbs < 4) {
2888 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2889 				switch (reg_offset) {
2890 				case 0:
2891 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2894 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2895 					break;
2896 				case 1:
2897 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2900 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2901 					break;
2902 				case 2:
2903 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2905 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2906 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2907 					break;
2908 				case 3:
2909 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2911 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2912 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2913 					break;
2914 				case 4:
2915 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2917 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2918 							 TILE_SPLIT(split_equal_to_row_size));
2919 					break;
2920 				case 5:
2921 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2923 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924 					break;
2925 				case 6:
2926 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2927 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2928 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2929 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2930 					break;
2931 				case 7:
2932 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2933 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2935 							 TILE_SPLIT(split_equal_to_row_size));
2936 					break;
2937 				case 8:
2938 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2939 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2940 					break;
2941 				case 9:
2942 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2943 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2944 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2945 					break;
2946 				case 10:
2947 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2948 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2950 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 					break;
2952 				case 11:
2953 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2955 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2956 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2957 					break;
2958 				case 12:
2959 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2960 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2961 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2962 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963 					break;
2964 				case 13:
2965 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2966 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2967 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2968 					break;
2969 				case 14:
2970 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2971 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2973 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974 					break;
2975 				case 16:
2976 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2977 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2978 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2979 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2980 					break;
2981 				case 17:
2982 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2983 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2985 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2986 					break;
2987 				case 27:
2988 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2989 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2990 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2991 					break;
2992 				case 28:
2993 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2994 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2996 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 					break;
2998 				case 29:
2999 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3000 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3001 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3002 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3003 					break;
3004 				case 30:
3005 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3006 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3007 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3008 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009 					break;
3010 				default:
3011 					gb_tile_moden = 0;
3012 					break;
3013 				}
3014 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3015 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3016 			}
3017 		}
3018 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3019 			switch (reg_offset) {
3020 			case 0:
3021 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3023 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024 						 NUM_BANKS(ADDR_SURF_16_BANK));
3025 				break;
3026 			case 1:
3027 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3029 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3030 						 NUM_BANKS(ADDR_SURF_16_BANK));
3031 				break;
3032 			case 2:
3033 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3035 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3036 						 NUM_BANKS(ADDR_SURF_16_BANK));
3037 				break;
3038 			case 3:
3039 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3041 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042 						 NUM_BANKS(ADDR_SURF_16_BANK));
3043 				break;
3044 			case 4:
3045 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3047 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3048 						 NUM_BANKS(ADDR_SURF_16_BANK));
3049 				break;
3050 			case 5:
3051 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3054 						 NUM_BANKS(ADDR_SURF_8_BANK));
3055 				break;
3056 			case 6:
3057 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3058 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3059 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3060 						 NUM_BANKS(ADDR_SURF_4_BANK));
3061 				break;
3062 			case 8:
3063 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3064 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3065 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066 						 NUM_BANKS(ADDR_SURF_16_BANK));
3067 				break;
3068 			case 9:
3069 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3070 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3071 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3072 						 NUM_BANKS(ADDR_SURF_16_BANK));
3073 				break;
3074 			case 10:
3075 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3077 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3078 						 NUM_BANKS(ADDR_SURF_16_BANK));
3079 				break;
3080 			case 11:
3081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084 						 NUM_BANKS(ADDR_SURF_16_BANK));
3085 				break;
3086 			case 12:
3087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3090 						 NUM_BANKS(ADDR_SURF_16_BANK));
3091 				break;
3092 			case 13:
3093 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096 						 NUM_BANKS(ADDR_SURF_8_BANK));
3097 				break;
3098 			case 14:
3099 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3102 						 NUM_BANKS(ADDR_SURF_4_BANK));
3103 				break;
3104 			default:
3105 				gb_tile_moden = 0;
3106 				break;
3107 			}
3108 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3109 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3110 		}
3111 	} else if (num_pipe_configs == 2) {
3112 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3113 			switch (reg_offset) {
3114 			case 0:
3115 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3117 						 PIPE_CONFIG(ADDR_SURF_P2) |
3118 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3119 				break;
3120 			case 1:
3121 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3123 						 PIPE_CONFIG(ADDR_SURF_P2) |
3124 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3125 				break;
3126 			case 2:
3127 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3128 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3129 						 PIPE_CONFIG(ADDR_SURF_P2) |
3130 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3131 				break;
3132 			case 3:
3133 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3134 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3135 						 PIPE_CONFIG(ADDR_SURF_P2) |
3136 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3137 				break;
3138 			case 4:
3139 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3140 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3141 						 PIPE_CONFIG(ADDR_SURF_P2) |
3142 						 TILE_SPLIT(split_equal_to_row_size));
3143 				break;
3144 			case 5:
3145 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3146 						 PIPE_CONFIG(ADDR_SURF_P2) |
3147 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3148 				break;
3149 			case 6:
3150 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3151 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3152 						 PIPE_CONFIG(ADDR_SURF_P2) |
3153 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3154 				break;
3155 			case 7:
3156 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3157 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3158 						 PIPE_CONFIG(ADDR_SURF_P2) |
3159 						 TILE_SPLIT(split_equal_to_row_size));
3160 				break;
3161 			case 8:
3162 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3163 						PIPE_CONFIG(ADDR_SURF_P2);
3164 				break;
3165 			case 9:
3166 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3167 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3168 						 PIPE_CONFIG(ADDR_SURF_P2));
3169 				break;
3170 			case 10:
3171 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3172 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3173 						 PIPE_CONFIG(ADDR_SURF_P2) |
3174 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3175 				break;
3176 			case 11:
3177 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3178 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3179 						 PIPE_CONFIG(ADDR_SURF_P2) |
3180 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3181 				break;
3182 			case 12:
3183 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3184 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3185 						 PIPE_CONFIG(ADDR_SURF_P2) |
3186 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3187 				break;
3188 			case 13:
3189 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3190 						 PIPE_CONFIG(ADDR_SURF_P2) |
3191 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3192 				break;
3193 			case 14:
3194 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3195 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3196 						 PIPE_CONFIG(ADDR_SURF_P2) |
3197 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3198 				break;
3199 			case 16:
3200 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3202 						 PIPE_CONFIG(ADDR_SURF_P2) |
3203 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3204 				break;
3205 			case 17:
3206 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3207 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3208 						 PIPE_CONFIG(ADDR_SURF_P2) |
3209 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210 				break;
3211 			case 27:
3212 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3213 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3214 						 PIPE_CONFIG(ADDR_SURF_P2));
3215 				break;
3216 			case 28:
3217 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3218 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3219 						 PIPE_CONFIG(ADDR_SURF_P2) |
3220 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3221 				break;
3222 			case 29:
3223 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3225 						 PIPE_CONFIG(ADDR_SURF_P2) |
3226 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3227 				break;
3228 			case 30:
3229 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3230 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3231 						 PIPE_CONFIG(ADDR_SURF_P2) |
3232 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233 				break;
3234 			default:
3235 				gb_tile_moden = 0;
3236 				break;
3237 			}
3238 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3239 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3240 		}
3241 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3242 			switch (reg_offset) {
3243 			case 0:
3244 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3245 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3246 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247 						 NUM_BANKS(ADDR_SURF_16_BANK));
3248 				break;
3249 			case 1:
3250 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3251 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3252 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253 						 NUM_BANKS(ADDR_SURF_16_BANK));
3254 				break;
3255 			case 2:
3256 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3258 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259 						 NUM_BANKS(ADDR_SURF_16_BANK));
3260 				break;
3261 			case 3:
3262 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3263 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3264 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265 						 NUM_BANKS(ADDR_SURF_16_BANK));
3266 				break;
3267 			case 4:
3268 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3270 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3271 						 NUM_BANKS(ADDR_SURF_16_BANK));
3272 				break;
3273 			case 5:
3274 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3275 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3276 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3277 						 NUM_BANKS(ADDR_SURF_16_BANK));
3278 				break;
3279 			case 6:
3280 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283 						 NUM_BANKS(ADDR_SURF_8_BANK));
3284 				break;
3285 			case 8:
3286 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3287 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3288 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3289 						 NUM_BANKS(ADDR_SURF_16_BANK));
3290 				break;
3291 			case 9:
3292 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3293 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3294 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295 						 NUM_BANKS(ADDR_SURF_16_BANK));
3296 				break;
3297 			case 10:
3298 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3299 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3300 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3301 						 NUM_BANKS(ADDR_SURF_16_BANK));
3302 				break;
3303 			case 11:
3304 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 						 NUM_BANKS(ADDR_SURF_16_BANK));
3308 				break;
3309 			case 12:
3310 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313 						 NUM_BANKS(ADDR_SURF_16_BANK));
3314 				break;
3315 			case 13:
3316 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 						 NUM_BANKS(ADDR_SURF_16_BANK));
3320 				break;
3321 			case 14:
3322 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3325 						 NUM_BANKS(ADDR_SURF_8_BANK));
3326 				break;
3327 			default:
3328 				gb_tile_moden = 0;
3329 				break;
3330 			}
3331 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3332 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3333 		}
3334 	} else
3335 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3336 }
3337 
3338 /**
3339  * cik_select_se_sh - select which SE, SH to address
3340  *
3341  * @rdev: radeon_device pointer
3342  * @se_num: shader engine to address
3343  * @sh_num: sh block to address
3344  *
3345  * Select which SE, SH combinations to address. Certain
3346  * registers are instanced per SE or SH.  0xffffffff means
3347  * broadcast to all SEs or SHs (CIK).
3348  */
3349 static void cik_select_se_sh(struct radeon_device *rdev,
3350 			     u32 se_num, u32 sh_num)
3351 {
3352 	u32 data = INSTANCE_BROADCAST_WRITES;
3353 
3354 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3355 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3356 	else if (se_num == 0xffffffff)
3357 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3358 	else if (sh_num == 0xffffffff)
3359 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3360 	else
3361 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3362 	WREG32(GRBM_GFX_INDEX, data);
3363 }
3364 
3365 /**
3366  * cik_create_bitmask - create a bitmask
3367  *
3368  * @bit_width: length of the mask
3369  *
3370  * create a variable length bit mask (CIK).
3371  * Returns the bitmask.
3372  */
3373 static u32 cik_create_bitmask(u32 bit_width)
3374 {
3375 	u32 i, mask = 0;
3376 
3377 	for (i = 0; i < bit_width; i++) {
3378 		mask <<= 1;
3379 		mask |= 1;
3380 	}
3381 	return mask;
3382 }
3383 
3384 /**
3385  * cik_get_rb_disabled - computes the mask of disabled RBs
3386  *
3387  * @rdev: radeon_device pointer
3388  * @max_rb_num: max RBs (render backends) for the asic
3389  * @se_num: number of SEs (shader engines) for the asic
3390  * @sh_per_se: number of SH blocks per SE for the asic
3391  *
3392  * Calculates the bitmask of disabled RBs (CIK).
3393  * Returns the disabled RB bitmask.
3394  */
3395 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3396 			      u32 max_rb_num_per_se,
3397 			      u32 sh_per_se)
3398 {
3399 	u32 data, mask;
3400 
3401 	data = RREG32(CC_RB_BACKEND_DISABLE);
3402 	if (data & 1)
3403 		data &= BACKEND_DISABLE_MASK;
3404 	else
3405 		data = 0;
3406 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3407 
3408 	data >>= BACKEND_DISABLE_SHIFT;
3409 
3410 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3411 
3412 	return data & mask;
3413 }
3414 
3415 /**
3416  * cik_setup_rb - setup the RBs on the asic
3417  *
3418  * @rdev: radeon_device pointer
3419  * @se_num: number of SEs (shader engines) for the asic
3420  * @sh_per_se: number of SH blocks per SE for the asic
3421  * @max_rb_num: max RBs (render backends) for the asic
3422  *
3423  * Configures per-SE/SH RB registers (CIK).
3424  */
3425 static void cik_setup_rb(struct radeon_device *rdev,
3426 			 u32 se_num, u32 sh_per_se,
3427 			 u32 max_rb_num_per_se)
3428 {
3429 	int i, j;
3430 	u32 data, mask;
3431 	u32 disabled_rbs = 0;
3432 	u32 enabled_rbs = 0;
3433 
3434 	for (i = 0; i < se_num; i++) {
3435 		for (j = 0; j < sh_per_se; j++) {
3436 			cik_select_se_sh(rdev, i, j);
3437 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3438 			if (rdev->family == CHIP_HAWAII)
3439 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3440 			else
3441 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3442 		}
3443 	}
3444 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3445 
3446 	mask = 1;
3447 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3448 		if (!(disabled_rbs & mask))
3449 			enabled_rbs |= mask;
3450 		mask <<= 1;
3451 	}
3452 
3453 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3454 
3455 	for (i = 0; i < se_num; i++) {
3456 		cik_select_se_sh(rdev, i, 0xffffffff);
3457 		data = 0;
3458 		for (j = 0; j < sh_per_se; j++) {
3459 			switch (enabled_rbs & 3) {
3460 			case 0:
3461 				if (j == 0)
3462 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3463 				else
3464 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3465 				break;
3466 			case 1:
3467 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3468 				break;
3469 			case 2:
3470 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3471 				break;
3472 			case 3:
3473 			default:
3474 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3475 				break;
3476 			}
3477 			enabled_rbs >>= 2;
3478 		}
3479 		WREG32(PA_SC_RASTER_CONFIG, data);
3480 	}
3481 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3482 }
3483 
3484 /**
3485  * cik_gpu_init - setup the 3D engine
3486  *
3487  * @rdev: radeon_device pointer
3488  *
3489  * Configures the 3D engine and tiling configuration
3490  * registers so that the 3D engine is usable.
3491  */
3492 static void cik_gpu_init(struct radeon_device *rdev)
3493 {
3494 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3495 	u32 mc_shared_chmap, mc_arb_ramcfg;
3496 	u32 hdp_host_path_cntl;
3497 	u32 tmp;
3498 	int i, j;
3499 
3500 	switch (rdev->family) {
3501 	case CHIP_BONAIRE:
3502 		rdev->config.cik.max_shader_engines = 2;
3503 		rdev->config.cik.max_tile_pipes = 4;
3504 		rdev->config.cik.max_cu_per_sh = 7;
3505 		rdev->config.cik.max_sh_per_se = 1;
3506 		rdev->config.cik.max_backends_per_se = 2;
3507 		rdev->config.cik.max_texture_channel_caches = 4;
3508 		rdev->config.cik.max_gprs = 256;
3509 		rdev->config.cik.max_gs_threads = 32;
3510 		rdev->config.cik.max_hw_contexts = 8;
3511 
3512 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3513 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3514 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3515 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3516 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3517 		break;
3518 	case CHIP_HAWAII:
3519 		rdev->config.cik.max_shader_engines = 4;
3520 		rdev->config.cik.max_tile_pipes = 16;
3521 		rdev->config.cik.max_cu_per_sh = 11;
3522 		rdev->config.cik.max_sh_per_se = 1;
3523 		rdev->config.cik.max_backends_per_se = 4;
3524 		rdev->config.cik.max_texture_channel_caches = 16;
3525 		rdev->config.cik.max_gprs = 256;
3526 		rdev->config.cik.max_gs_threads = 32;
3527 		rdev->config.cik.max_hw_contexts = 8;
3528 
3529 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3530 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3531 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3532 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3533 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3534 		break;
3535 	case CHIP_KAVERI:
3536 		rdev->config.cik.max_shader_engines = 1;
3537 		rdev->config.cik.max_tile_pipes = 4;
3538 		if ((rdev->pdev->device == 0x1304) ||
3539 		    (rdev->pdev->device == 0x1305) ||
3540 		    (rdev->pdev->device == 0x130C) ||
3541 		    (rdev->pdev->device == 0x130F) ||
3542 		    (rdev->pdev->device == 0x1310) ||
3543 		    (rdev->pdev->device == 0x1311) ||
3544 		    (rdev->pdev->device == 0x131C)) {
3545 			rdev->config.cik.max_cu_per_sh = 8;
3546 			rdev->config.cik.max_backends_per_se = 2;
3547 		} else if ((rdev->pdev->device == 0x1309) ||
3548 			   (rdev->pdev->device == 0x130A) ||
3549 			   (rdev->pdev->device == 0x130D) ||
3550 			   (rdev->pdev->device == 0x1313) ||
3551 			   (rdev->pdev->device == 0x131D)) {
3552 			rdev->config.cik.max_cu_per_sh = 6;
3553 			rdev->config.cik.max_backends_per_se = 2;
3554 		} else if ((rdev->pdev->device == 0x1306) ||
3555 			   (rdev->pdev->device == 0x1307) ||
3556 			   (rdev->pdev->device == 0x130B) ||
3557 			   (rdev->pdev->device == 0x130E) ||
3558 			   (rdev->pdev->device == 0x1315) ||
3559 			   (rdev->pdev->device == 0x1318) ||
3560 			   (rdev->pdev->device == 0x131B)) {
3561 			rdev->config.cik.max_cu_per_sh = 4;
3562 			rdev->config.cik.max_backends_per_se = 1;
3563 		} else {
3564 			rdev->config.cik.max_cu_per_sh = 3;
3565 			rdev->config.cik.max_backends_per_se = 1;
3566 		}
3567 		rdev->config.cik.max_sh_per_se = 1;
3568 		rdev->config.cik.max_texture_channel_caches = 4;
3569 		rdev->config.cik.max_gprs = 256;
3570 		rdev->config.cik.max_gs_threads = 16;
3571 		rdev->config.cik.max_hw_contexts = 8;
3572 
3573 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3574 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3575 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3576 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3577 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3578 		break;
3579 	case CHIP_KABINI:
3580 	case CHIP_MULLINS:
3581 	default:
3582 		rdev->config.cik.max_shader_engines = 1;
3583 		rdev->config.cik.max_tile_pipes = 2;
3584 		rdev->config.cik.max_cu_per_sh = 2;
3585 		rdev->config.cik.max_sh_per_se = 1;
3586 		rdev->config.cik.max_backends_per_se = 1;
3587 		rdev->config.cik.max_texture_channel_caches = 2;
3588 		rdev->config.cik.max_gprs = 256;
3589 		rdev->config.cik.max_gs_threads = 16;
3590 		rdev->config.cik.max_hw_contexts = 8;
3591 
3592 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3593 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3594 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3595 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3596 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3597 		break;
3598 	}
3599 
3600 	/* Initialize HDP */
3601 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3602 		WREG32((0x2c14 + j), 0x00000000);
3603 		WREG32((0x2c18 + j), 0x00000000);
3604 		WREG32((0x2c1c + j), 0x00000000);
3605 		WREG32((0x2c20 + j), 0x00000000);
3606 		WREG32((0x2c24 + j), 0x00000000);
3607 	}
3608 
3609 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3610 
3611 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3612 
3613 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3614 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3615 
3616 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3617 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3618 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3619 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3620 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3621 		rdev->config.cik.mem_row_size_in_kb = 4;
3622 	/* XXX use MC settings? */
3623 	rdev->config.cik.shader_engine_tile_size = 32;
3624 	rdev->config.cik.num_gpus = 1;
3625 	rdev->config.cik.multi_gpu_tile_size = 64;
3626 
3627 	/* fix up row size */
3628 	gb_addr_config &= ~ROW_SIZE_MASK;
3629 	switch (rdev->config.cik.mem_row_size_in_kb) {
3630 	case 1:
3631 	default:
3632 		gb_addr_config |= ROW_SIZE(0);
3633 		break;
3634 	case 2:
3635 		gb_addr_config |= ROW_SIZE(1);
3636 		break;
3637 	case 4:
3638 		gb_addr_config |= ROW_SIZE(2);
3639 		break;
3640 	}
3641 
3642 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3643 	 * not have bank info, so create a custom tiling dword.
3644 	 * bits 3:0   num_pipes
3645 	 * bits 7:4   num_banks
3646 	 * bits 11:8  group_size
3647 	 * bits 15:12 row_size
3648 	 */
3649 	rdev->config.cik.tile_config = 0;
3650 	switch (rdev->config.cik.num_tile_pipes) {
3651 	case 1:
3652 		rdev->config.cik.tile_config |= (0 << 0);
3653 		break;
3654 	case 2:
3655 		rdev->config.cik.tile_config |= (1 << 0);
3656 		break;
3657 	case 4:
3658 		rdev->config.cik.tile_config |= (2 << 0);
3659 		break;
3660 	case 8:
3661 	default:
3662 		/* XXX what about 12? */
3663 		rdev->config.cik.tile_config |= (3 << 0);
3664 		break;
3665 	}
3666 	rdev->config.cik.tile_config |=
3667 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3668 	rdev->config.cik.tile_config |=
3669 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3670 	rdev->config.cik.tile_config |=
3671 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3672 
3673 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3674 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3675 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3676 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3677 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3678 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3679 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3680 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3681 
3682 	cik_tiling_mode_table_init(rdev);
3683 
3684 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3685 		     rdev->config.cik.max_sh_per_se,
3686 		     rdev->config.cik.max_backends_per_se);
3687 
3688 	rdev->config.cik.active_cus = 0;
3689 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3690 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3691 			rdev->config.cik.active_cus +=
3692 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3693 		}
3694 	}
3695 
3696 	/* set HW defaults for 3D engine */
3697 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3698 
3699 	WREG32(SX_DEBUG_1, 0x20);
3700 
3701 	WREG32(TA_CNTL_AUX, 0x00010000);
3702 
3703 	tmp = RREG32(SPI_CONFIG_CNTL);
3704 	tmp |= 0x03000000;
3705 	WREG32(SPI_CONFIG_CNTL, tmp);
3706 
3707 	WREG32(SQ_CONFIG, 1);
3708 
3709 	WREG32(DB_DEBUG, 0);
3710 
3711 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3712 	tmp |= 0x00000400;
3713 	WREG32(DB_DEBUG2, tmp);
3714 
3715 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3716 	tmp |= 0x00020200;
3717 	WREG32(DB_DEBUG3, tmp);
3718 
3719 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3720 	tmp |= 0x00018208;
3721 	WREG32(CB_HW_CONTROL, tmp);
3722 
3723 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3724 
3725 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3726 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3727 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3728 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3729 
3730 	WREG32(VGT_NUM_INSTANCES, 1);
3731 
3732 	WREG32(CP_PERFMON_CNTL, 0);
3733 
3734 	WREG32(SQ_CONFIG, 0);
3735 
3736 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3737 					  FORCE_EOV_MAX_REZ_CNT(255)));
3738 
3739 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3740 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3741 
3742 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3743 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3744 
3745 	tmp = RREG32(HDP_MISC_CNTL);
3746 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3747 	WREG32(HDP_MISC_CNTL, tmp);
3748 
3749 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3750 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3751 
3752 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3753 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3754 
3755 	udelay(50);
3756 }
3757 
3758 /*
3759  * GPU scratch registers helpers function.
3760  */
3761 /**
3762  * cik_scratch_init - setup driver info for CP scratch regs
3763  *
3764  * @rdev: radeon_device pointer
3765  *
3766  * Set up the number and offset of the CP scratch registers.
3767  * NOTE: use of CP scratch registers is a legacy inferface and
3768  * is not used by default on newer asics (r6xx+).  On newer asics,
3769  * memory buffers are used for fences rather than scratch regs.
3770  */
3771 static void cik_scratch_init(struct radeon_device *rdev)
3772 {
3773 	int i;
3774 
3775 	rdev->scratch.num_reg = 7;
3776 	rdev->scratch.reg_base = SCRATCH_REG0;
3777 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3778 		rdev->scratch.free[i] = true;
3779 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3780 	}
3781 }
3782 
3783 /**
3784  * cik_ring_test - basic gfx ring test
3785  *
3786  * @rdev: radeon_device pointer
3787  * @ring: radeon_ring structure holding ring information
3788  *
3789  * Allocate a scratch register and write to it using the gfx ring (CIK).
3790  * Provides a basic gfx ring test to verify that the ring is working.
3791  * Used by cik_cp_gfx_resume();
3792  * Returns 0 on success, error on failure.
3793  */
3794 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3795 {
3796 	uint32_t scratch;
3797 	uint32_t tmp = 0;
3798 	unsigned i;
3799 	int r;
3800 
3801 	r = radeon_scratch_get(rdev, &scratch);
3802 	if (r) {
3803 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3804 		return r;
3805 	}
3806 	WREG32(scratch, 0xCAFEDEAD);
3807 	r = radeon_ring_lock(rdev, ring, 3);
3808 	if (r) {
3809 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3810 		radeon_scratch_free(rdev, scratch);
3811 		return r;
3812 	}
3813 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3814 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3815 	radeon_ring_write(ring, 0xDEADBEEF);
3816 	radeon_ring_unlock_commit(rdev, ring, false);
3817 
3818 	for (i = 0; i < rdev->usec_timeout; i++) {
3819 		tmp = RREG32(scratch);
3820 		if (tmp == 0xDEADBEEF)
3821 			break;
3822 		DRM_UDELAY(1);
3823 	}
3824 	if (i < rdev->usec_timeout) {
3825 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3826 	} else {
3827 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3828 			  ring->idx, scratch, tmp);
3829 		r = -EINVAL;
3830 	}
3831 	radeon_scratch_free(rdev, scratch);
3832 	return r;
3833 }
3834 
3835 /**
3836  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3837  *
3838  * @rdev: radeon_device pointer
3839  * @ridx: radeon ring index
3840  *
3841  * Emits an hdp flush on the cp.
3842  */
3843 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3844 				       int ridx)
3845 {
3846 	struct radeon_ring *ring = &rdev->ring[ridx];
3847 	u32 ref_and_mask;
3848 
3849 	switch (ring->idx) {
3850 	case CAYMAN_RING_TYPE_CP1_INDEX:
3851 	case CAYMAN_RING_TYPE_CP2_INDEX:
3852 	default:
3853 		switch (ring->me) {
3854 		case 0:
3855 			ref_and_mask = CP2 << ring->pipe;
3856 			break;
3857 		case 1:
3858 			ref_and_mask = CP6 << ring->pipe;
3859 			break;
3860 		default:
3861 			return;
3862 		}
3863 		break;
3864 	case RADEON_RING_TYPE_GFX_INDEX:
3865 		ref_and_mask = CP0;
3866 		break;
3867 	}
3868 
3869 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3870 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3871 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3872 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3873 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3874 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3875 	radeon_ring_write(ring, ref_and_mask);
3876 	radeon_ring_write(ring, ref_and_mask);
3877 	radeon_ring_write(ring, 0x20); /* poll interval */
3878 }
3879 
3880 /**
3881  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3882  *
3883  * @rdev: radeon_device pointer
3884  * @fence: radeon fence object
3885  *
3886  * Emits a fence sequnce number on the gfx ring and flushes
3887  * GPU caches.
3888  */
3889 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3890 			     struct radeon_fence *fence)
3891 {
3892 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3893 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3894 
3895 	/* EVENT_WRITE_EOP - flush caches, send int */
3896 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3897 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3898 				 EOP_TC_ACTION_EN |
3899 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3900 				 EVENT_INDEX(5)));
3901 	radeon_ring_write(ring, addr & 0xfffffffc);
3902 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3903 	radeon_ring_write(ring, fence->seq);
3904 	radeon_ring_write(ring, 0);
3905 }
3906 
3907 /**
3908  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3909  *
3910  * @rdev: radeon_device pointer
3911  * @fence: radeon fence object
3912  *
3913  * Emits a fence sequnce number on the compute ring and flushes
3914  * GPU caches.
3915  */
3916 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3917 				 struct radeon_fence *fence)
3918 {
3919 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3920 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3921 
3922 	/* RELEASE_MEM - flush caches, send int */
3923 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3924 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3925 				 EOP_TC_ACTION_EN |
3926 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3927 				 EVENT_INDEX(5)));
3928 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3929 	radeon_ring_write(ring, addr & 0xfffffffc);
3930 	radeon_ring_write(ring, upper_32_bits(addr));
3931 	radeon_ring_write(ring, fence->seq);
3932 	radeon_ring_write(ring, 0);
3933 }
3934 
3935 /**
3936  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3937  *
3938  * @rdev: radeon_device pointer
3939  * @ring: radeon ring buffer object
3940  * @semaphore: radeon semaphore object
3941  * @emit_wait: Is this a sempahore wait?
3942  *
3943  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3944  * from running ahead of semaphore waits.
3945  */
3946 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3947 			     struct radeon_ring *ring,
3948 			     struct radeon_semaphore *semaphore,
3949 			     bool emit_wait)
3950 {
3951 	uint64_t addr = semaphore->gpu_addr;
3952 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3953 
3954 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3955 	radeon_ring_write(ring, lower_32_bits(addr));
3956 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3957 
3958 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3959 		/* Prevent the PFP from running ahead of the semaphore wait */
3960 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3961 		radeon_ring_write(ring, 0x0);
3962 	}
3963 
3964 	return true;
3965 }
3966 
3967 /**
3968  * cik_copy_cpdma - copy pages using the CP DMA engine
3969  *
3970  * @rdev: radeon_device pointer
3971  * @src_offset: src GPU address
3972  * @dst_offset: dst GPU address
3973  * @num_gpu_pages: number of GPU pages to xfer
3974  * @resv: reservation object to sync to
3975  *
3976  * Copy GPU paging using the CP DMA engine (CIK+).
3977  * Used by the radeon ttm implementation to move pages if
3978  * registered as the asic copy callback.
3979  */
3980 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3981 				    uint64_t src_offset, uint64_t dst_offset,
3982 				    unsigned num_gpu_pages,
3983 				    struct reservation_object *resv)
3984 {
3985 	struct radeon_semaphore *sem = NULL;
3986 	struct radeon_fence *fence;
3987 	int ring_index = rdev->asic->copy.blit_ring_index;
3988 	struct radeon_ring *ring = &rdev->ring[ring_index];
3989 	u32 size_in_bytes, cur_size_in_bytes, control;
3990 	int i, num_loops;
3991 	int r = 0;
3992 
3993 	r = radeon_semaphore_create(rdev, &sem);
3994 	if (r) {
3995 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3996 		return ERR_PTR(r);
3997 	}
3998 
3999 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4000 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4001 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4002 	if (r) {
4003 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4004 		radeon_semaphore_free(rdev, &sem, NULL);
4005 		return ERR_PTR(r);
4006 	}
4007 
4008 	radeon_semaphore_sync_resv(rdev, sem, resv, false);
4009 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
4010 
4011 	for (i = 0; i < num_loops; i++) {
4012 		cur_size_in_bytes = size_in_bytes;
4013 		if (cur_size_in_bytes > 0x1fffff)
4014 			cur_size_in_bytes = 0x1fffff;
4015 		size_in_bytes -= cur_size_in_bytes;
4016 		control = 0;
4017 		if (size_in_bytes == 0)
4018 			control |= PACKET3_DMA_DATA_CP_SYNC;
4019 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4020 		radeon_ring_write(ring, control);
4021 		radeon_ring_write(ring, lower_32_bits(src_offset));
4022 		radeon_ring_write(ring, upper_32_bits(src_offset));
4023 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4024 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4025 		radeon_ring_write(ring, cur_size_in_bytes);
4026 		src_offset += cur_size_in_bytes;
4027 		dst_offset += cur_size_in_bytes;
4028 	}
4029 
4030 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4031 	if (r) {
4032 		radeon_ring_unlock_undo(rdev, ring);
4033 		radeon_semaphore_free(rdev, &sem, NULL);
4034 		return ERR_PTR(r);
4035 	}
4036 
4037 	radeon_ring_unlock_commit(rdev, ring, false);
4038 	radeon_semaphore_free(rdev, &sem, fence);
4039 
4040 	return fence;
4041 }
4042 
4043 /*
4044  * IB stuff
4045  */
4046 /**
4047  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4048  *
4049  * @rdev: radeon_device pointer
4050  * @ib: radeon indirect buffer object
4051  *
4052  * Emits an DE (drawing engine) or CE (constant engine) IB
4053  * on the gfx ring.  IBs are usually generated by userspace
4054  * acceleration drivers and submitted to the kernel for
4055  * sheduling on the ring.  This function schedules the IB
4056  * on the gfx ring for execution by the GPU.
4057  */
4058 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4059 {
4060 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4061 	u32 header, control = INDIRECT_BUFFER_VALID;
4062 
4063 	if (ib->is_const_ib) {
4064 		/* set switch buffer packet before const IB */
4065 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4066 		radeon_ring_write(ring, 0);
4067 
4068 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4069 	} else {
4070 		u32 next_rptr;
4071 		if (ring->rptr_save_reg) {
4072 			next_rptr = ring->wptr + 3 + 4;
4073 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4074 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4075 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4076 			radeon_ring_write(ring, next_rptr);
4077 		} else if (rdev->wb.enabled) {
4078 			next_rptr = ring->wptr + 5 + 4;
4079 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4080 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4081 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4082 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4083 			radeon_ring_write(ring, next_rptr);
4084 		}
4085 
4086 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4087 	}
4088 
4089 	control |= ib->length_dw |
4090 		(ib->vm ? (ib->vm->id << 24) : 0);
4091 
4092 	radeon_ring_write(ring, header);
4093 	radeon_ring_write(ring,
4094 #ifdef __BIG_ENDIAN
4095 			  (2 << 0) |
4096 #endif
4097 			  (ib->gpu_addr & 0xFFFFFFFC));
4098 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4099 	radeon_ring_write(ring, control);
4100 }
4101 
4102 /**
4103  * cik_ib_test - basic gfx ring IB test
4104  *
4105  * @rdev: radeon_device pointer
4106  * @ring: radeon_ring structure holding ring information
4107  *
4108  * Allocate an IB and execute it on the gfx ring (CIK).
4109  * Provides a basic gfx ring test to verify that IBs are working.
4110  * Returns 0 on success, error on failure.
4111  */
4112 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4113 {
4114 	struct radeon_ib ib;
4115 	uint32_t scratch;
4116 	uint32_t tmp = 0;
4117 	unsigned i;
4118 	int r;
4119 
4120 	r = radeon_scratch_get(rdev, &scratch);
4121 	if (r) {
4122 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4123 		return r;
4124 	}
4125 	WREG32(scratch, 0xCAFEDEAD);
4126 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4127 	if (r) {
4128 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4129 		radeon_scratch_free(rdev, scratch);
4130 		return r;
4131 	}
4132 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4133 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4134 	ib.ptr[2] = 0xDEADBEEF;
4135 	ib.length_dw = 3;
4136 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4137 	if (r) {
4138 		radeon_scratch_free(rdev, scratch);
4139 		radeon_ib_free(rdev, &ib);
4140 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4141 		return r;
4142 	}
4143 	r = radeon_fence_wait(ib.fence, false);
4144 	if (r) {
4145 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4146 		radeon_scratch_free(rdev, scratch);
4147 		radeon_ib_free(rdev, &ib);
4148 		return r;
4149 	}
4150 	for (i = 0; i < rdev->usec_timeout; i++) {
4151 		tmp = RREG32(scratch);
4152 		if (tmp == 0xDEADBEEF)
4153 			break;
4154 		DRM_UDELAY(1);
4155 	}
4156 	if (i < rdev->usec_timeout) {
4157 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4158 	} else {
4159 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4160 			  scratch, tmp);
4161 		r = -EINVAL;
4162 	}
4163 	radeon_scratch_free(rdev, scratch);
4164 	radeon_ib_free(rdev, &ib);
4165 	return r;
4166 }
4167 
4168 /*
4169  * CP.
4170  * On CIK, gfx and compute now have independant command processors.
4171  *
4172  * GFX
4173  * Gfx consists of a single ring and can process both gfx jobs and
4174  * compute jobs.  The gfx CP consists of three microengines (ME):
4175  * PFP - Pre-Fetch Parser
4176  * ME - Micro Engine
4177  * CE - Constant Engine
4178  * The PFP and ME make up what is considered the Drawing Engine (DE).
4179  * The CE is an asynchronous engine used for updating buffer desciptors
4180  * used by the DE so that they can be loaded into cache in parallel
4181  * while the DE is processing state update packets.
4182  *
4183  * Compute
4184  * The compute CP consists of two microengines (ME):
4185  * MEC1 - Compute MicroEngine 1
4186  * MEC2 - Compute MicroEngine 2
4187  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4188  * The queues are exposed to userspace and are programmed directly
4189  * by the compute runtime.
4190  */
4191 /**
4192  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4193  *
4194  * @rdev: radeon_device pointer
4195  * @enable: enable or disable the MEs
4196  *
4197  * Halts or unhalts the gfx MEs.
4198  */
4199 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4200 {
4201 	if (enable)
4202 		WREG32(CP_ME_CNTL, 0);
4203 	else {
4204 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4205 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4206 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4207 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4208 	}
4209 	udelay(50);
4210 }
4211 
4212 /**
4213  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4214  *
4215  * @rdev: radeon_device pointer
4216  *
4217  * Loads the gfx PFP, ME, and CE ucode.
4218  * Returns 0 for success, -EINVAL if the ucode is not available.
4219  */
4220 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4221 {
4222 	int i;
4223 
4224 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4225 		return -EINVAL;
4226 
4227 	cik_cp_gfx_enable(rdev, false);
4228 
4229 	if (rdev->new_fw) {
4230 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4231 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4232 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4233 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4234 		const struct gfx_firmware_header_v1_0 *me_hdr =
4235 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4236 		const __le32 *fw_data;
4237 		u32 fw_size;
4238 
4239 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4240 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4241 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4242 
4243 		/* PFP */
4244 		fw_data = (const __le32 *)
4245 			((const char *)rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4246 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4247 		WREG32(CP_PFP_UCODE_ADDR, 0);
4248 		for (i = 0; i < fw_size; i++)
4249 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4250 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4251 
4252 		/* CE */
4253 		fw_data = (const __le32 *)
4254 			((const char *)rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4255 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4256 		WREG32(CP_CE_UCODE_ADDR, 0);
4257 		for (i = 0; i < fw_size; i++)
4258 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4259 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4260 
4261 		/* ME */
4262 		fw_data = (const __be32 *)
4263 			((const char *)rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4264 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4265 		WREG32(CP_ME_RAM_WADDR, 0);
4266 		for (i = 0; i < fw_size; i++)
4267 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4268 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4269 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4270 	} else {
4271 		const __be32 *fw_data;
4272 
4273 		/* PFP */
4274 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4275 		WREG32(CP_PFP_UCODE_ADDR, 0);
4276 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4277 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4278 		WREG32(CP_PFP_UCODE_ADDR, 0);
4279 
4280 		/* CE */
4281 		fw_data = (const __be32 *)rdev->ce_fw->data;
4282 		WREG32(CP_CE_UCODE_ADDR, 0);
4283 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4284 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4285 		WREG32(CP_CE_UCODE_ADDR, 0);
4286 
4287 		/* ME */
4288 		fw_data = (const __be32 *)rdev->me_fw->data;
4289 		WREG32(CP_ME_RAM_WADDR, 0);
4290 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4291 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4292 		WREG32(CP_ME_RAM_WADDR, 0);
4293 	}
4294 
4295 	return 0;
4296 }
4297 
4298 /**
4299  * cik_cp_gfx_start - start the gfx ring
4300  *
4301  * @rdev: radeon_device pointer
4302  *
4303  * Enables the ring and loads the clear state context and other
4304  * packets required to init the ring.
4305  * Returns 0 for success, error for failure.
4306  */
4307 static int cik_cp_gfx_start(struct radeon_device *rdev)
4308 {
4309 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4310 	int r, i;
4311 
4312 	/* init the CP */
4313 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4314 	WREG32(CP_ENDIAN_SWAP, 0);
4315 	WREG32(CP_DEVICE_ID, 1);
4316 
4317 	cik_cp_gfx_enable(rdev, true);
4318 
4319 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4320 	if (r) {
4321 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4322 		return r;
4323 	}
4324 
4325 	/* init the CE partitions.  CE only used for gfx on CIK */
4326 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4327 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4328 	radeon_ring_write(ring, 0x8000);
4329 	radeon_ring_write(ring, 0x8000);
4330 
4331 	/* setup clear context state */
4332 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4333 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4334 
4335 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4336 	radeon_ring_write(ring, 0x80000000);
4337 	radeon_ring_write(ring, 0x80000000);
4338 
4339 	for (i = 0; i < cik_default_size; i++)
4340 		radeon_ring_write(ring, cik_default_state[i]);
4341 
4342 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4343 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4344 
4345 	/* set clear context state */
4346 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4347 	radeon_ring_write(ring, 0);
4348 
4349 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4350 	radeon_ring_write(ring, 0x00000316);
4351 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4352 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4353 
4354 	radeon_ring_unlock_commit(rdev, ring, false);
4355 
4356 	return 0;
4357 }
4358 
4359 /**
4360  * cik_cp_gfx_fini - stop the gfx ring
4361  *
4362  * @rdev: radeon_device pointer
4363  *
4364  * Stop the gfx ring and tear down the driver ring
4365  * info.
4366  */
4367 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4368 {
4369 	cik_cp_gfx_enable(rdev, false);
4370 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4371 }
4372 
4373 /**
4374  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4375  *
4376  * @rdev: radeon_device pointer
4377  *
4378  * Program the location and size of the gfx ring buffer
4379  * and test it to make sure it's working.
4380  * Returns 0 for success, error for failure.
4381  */
4382 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4383 {
4384 	struct radeon_ring *ring;
4385 	u32 tmp;
4386 	u32 rb_bufsz;
4387 	u64 rb_addr;
4388 	int r;
4389 
4390 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4391 	if (rdev->family != CHIP_HAWAII)
4392 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4393 
4394 	/* Set the write pointer delay */
4395 	WREG32(CP_RB_WPTR_DELAY, 0);
4396 
4397 	/* set the RB to use vmid 0 */
4398 	WREG32(CP_RB_VMID, 0);
4399 
4400 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4401 
4402 	/* ring 0 - compute and gfx */
4403 	/* Set ring buffer size */
4404 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4405 	rb_bufsz = order_base_2(ring->ring_size / 8);
4406 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4407 #ifdef __BIG_ENDIAN
4408 	tmp |= BUF_SWAP_32BIT;
4409 #endif
4410 	WREG32(CP_RB0_CNTL, tmp);
4411 
4412 	/* Initialize the ring buffer's read and write pointers */
4413 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4414 	ring->wptr = 0;
4415 	WREG32(CP_RB0_WPTR, ring->wptr);
4416 
4417 	/* set the wb address wether it's enabled or not */
4418 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4419 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4420 
4421 	/* scratch register shadowing is no longer supported */
4422 	WREG32(SCRATCH_UMSK, 0);
4423 
4424 	if (!rdev->wb.enabled)
4425 		tmp |= RB_NO_UPDATE;
4426 
4427 	mdelay(1);
4428 	WREG32(CP_RB0_CNTL, tmp);
4429 
4430 	rb_addr = ring->gpu_addr >> 8;
4431 	WREG32(CP_RB0_BASE, rb_addr);
4432 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4433 
4434 	/* start the ring */
4435 	cik_cp_gfx_start(rdev);
4436 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4437 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4438 	if (r) {
4439 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4440 		return r;
4441 	}
4442 
4443 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4444 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4445 
4446 	return 0;
4447 }
4448 
4449 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4450 		     struct radeon_ring *ring)
4451 {
4452 	u32 rptr;
4453 
4454 	if (rdev->wb.enabled)
4455 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4456 	else
4457 		rptr = RREG32(CP_RB0_RPTR);
4458 
4459 	return rptr;
4460 }
4461 
4462 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4463 		     struct radeon_ring *ring)
4464 {
4465 	u32 wptr;
4466 
4467 	wptr = RREG32(CP_RB0_WPTR);
4468 
4469 	return wptr;
4470 }
4471 
4472 void cik_gfx_set_wptr(struct radeon_device *rdev,
4473 		      struct radeon_ring *ring)
4474 {
4475 	WREG32(CP_RB0_WPTR, ring->wptr);
4476 	(void)RREG32(CP_RB0_WPTR);
4477 }
4478 
4479 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4480 			 struct radeon_ring *ring)
4481 {
4482 	u32 rptr;
4483 
4484 	if (rdev->wb.enabled) {
4485 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4486 	} else {
4487 		mutex_lock(&rdev->srbm_mutex);
4488 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4489 		rptr = RREG32(CP_HQD_PQ_RPTR);
4490 		cik_srbm_select(rdev, 0, 0, 0, 0);
4491 		mutex_unlock(&rdev->srbm_mutex);
4492 	}
4493 
4494 	return rptr;
4495 }
4496 
4497 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4498 			 struct radeon_ring *ring)
4499 {
4500 	u32 wptr;
4501 
4502 	if (rdev->wb.enabled) {
4503 		/* XXX check if swapping is necessary on BE */
4504 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4505 	} else {
4506 		mutex_lock(&rdev->srbm_mutex);
4507 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4508 		wptr = RREG32(CP_HQD_PQ_WPTR);
4509 		cik_srbm_select(rdev, 0, 0, 0, 0);
4510 		mutex_unlock(&rdev->srbm_mutex);
4511 	}
4512 
4513 	return wptr;
4514 }
4515 
4516 void cik_compute_set_wptr(struct radeon_device *rdev,
4517 			  struct radeon_ring *ring)
4518 {
4519 	/* XXX check if swapping is necessary on BE */
4520 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4521 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4522 }
4523 
4524 /**
4525  * cik_cp_compute_enable - enable/disable the compute CP MEs
4526  *
4527  * @rdev: radeon_device pointer
4528  * @enable: enable or disable the MEs
4529  *
4530  * Halts or unhalts the compute MEs.
4531  */
4532 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4533 {
4534 	if (enable)
4535 		WREG32(CP_MEC_CNTL, 0);
4536 	else {
4537 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4538 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4539 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4540 	}
4541 	udelay(50);
4542 }
4543 
4544 /**
4545  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4546  *
4547  * @rdev: radeon_device pointer
4548  *
4549  * Loads the compute MEC1&2 ucode.
4550  * Returns 0 for success, -EINVAL if the ucode is not available.
4551  */
4552 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4553 {
4554 	int i;
4555 
4556 	if (!rdev->mec_fw)
4557 		return -EINVAL;
4558 
4559 	cik_cp_compute_enable(rdev, false);
4560 
4561 	if (rdev->new_fw) {
4562 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4563 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4564 		const __le32 *fw_data;
4565 		u32 fw_size;
4566 
4567 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4568 
4569 		/* MEC1 */
4570 		fw_data = (const __le32 *)
4571 			((const char *)rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4572 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4573 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4574 		for (i = 0; i < fw_size; i++)
4575 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4576 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4577 
4578 		/* MEC2 */
4579 		if (rdev->family == CHIP_KAVERI) {
4580 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4581 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4582 
4583 			fw_data = (const __le32 *)
4584 				((const char *)rdev->mec2_fw->data +
4585 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4586 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4587 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4588 			for (i = 0; i < fw_size; i++)
4589 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4590 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4591 		}
4592 	} else {
4593 		const __be32 *fw_data;
4594 
4595 		/* MEC1 */
4596 		fw_data = (const __be32 *)rdev->mec_fw->data;
4597 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4598 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4599 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4600 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4601 
4602 		if (rdev->family == CHIP_KAVERI) {
4603 			/* MEC2 */
4604 			fw_data = (const __be32 *)rdev->mec_fw->data;
4605 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4606 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4607 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4608 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4609 		}
4610 	}
4611 
4612 	return 0;
4613 }
4614 
4615 /**
4616  * cik_cp_compute_start - start the compute queues
4617  *
4618  * @rdev: radeon_device pointer
4619  *
4620  * Enable the compute queues.
4621  * Returns 0 for success, error for failure.
4622  */
4623 static int cik_cp_compute_start(struct radeon_device *rdev)
4624 {
4625 	cik_cp_compute_enable(rdev, true);
4626 
4627 	return 0;
4628 }
4629 
4630 /**
4631  * cik_cp_compute_fini - stop the compute queues
4632  *
4633  * @rdev: radeon_device pointer
4634  *
4635  * Stop the compute queues and tear down the driver queue
4636  * info.
4637  */
4638 static void cik_cp_compute_fini(struct radeon_device *rdev)
4639 {
4640 	int i, idx, r;
4641 
4642 	cik_cp_compute_enable(rdev, false);
4643 
4644 	for (i = 0; i < 2; i++) {
4645 		if (i == 0)
4646 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4647 		else
4648 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4649 
4650 		if (rdev->ring[idx].mqd_obj) {
4651 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4652 			if (unlikely(r != 0))
4653 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4654 
4655 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4656 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4657 
4658 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4659 			rdev->ring[idx].mqd_obj = NULL;
4660 		}
4661 	}
4662 }
4663 
4664 static void cik_mec_fini(struct radeon_device *rdev)
4665 {
4666 	int r;
4667 
4668 	if (rdev->mec.hpd_eop_obj) {
4669 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4670 		if (unlikely(r != 0))
4671 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4672 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4673 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4674 
4675 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4676 		rdev->mec.hpd_eop_obj = NULL;
4677 	}
4678 }
4679 
4680 #define MEC_HPD_SIZE 2048
4681 
4682 static int cik_mec_init(struct radeon_device *rdev)
4683 {
4684 	int r;
4685 	u32 *hpd;
4686 
4687 	/*
4688 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4689 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4690 	 */
4691 	if (rdev->family == CHIP_KAVERI)
4692 		rdev->mec.num_mec = 2;
4693 	else
4694 		rdev->mec.num_mec = 1;
4695 	rdev->mec.num_pipe = 4;
4696 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4697 
4698 	if (rdev->mec.hpd_eop_obj == NULL) {
4699 		r = radeon_bo_create(rdev,
4700 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4701 				     PAGE_SIZE, true,
4702 				     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4703 				     &rdev->mec.hpd_eop_obj);
4704 		if (r) {
4705 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4706 			return r;
4707 		}
4708 	}
4709 
4710 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4711 	if (unlikely(r != 0)) {
4712 		cik_mec_fini(rdev);
4713 		return r;
4714 	}
4715 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4716 			  &rdev->mec.hpd_eop_gpu_addr);
4717 	if (r) {
4718 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4719 		cik_mec_fini(rdev);
4720 		return r;
4721 	}
4722 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4723 	if (r) {
4724 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4725 		cik_mec_fini(rdev);
4726 		return r;
4727 	}
4728 
4729 	/* clear memory.  Not sure if this is required or not */
4730 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4731 
4732 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4733 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4734 
4735 	return 0;
4736 }
4737 
4738 struct hqd_registers
4739 {
4740 	u32 cp_mqd_base_addr;
4741 	u32 cp_mqd_base_addr_hi;
4742 	u32 cp_hqd_active;
4743 	u32 cp_hqd_vmid;
4744 	u32 cp_hqd_persistent_state;
4745 	u32 cp_hqd_pipe_priority;
4746 	u32 cp_hqd_queue_priority;
4747 	u32 cp_hqd_quantum;
4748 	u32 cp_hqd_pq_base;
4749 	u32 cp_hqd_pq_base_hi;
4750 	u32 cp_hqd_pq_rptr;
4751 	u32 cp_hqd_pq_rptr_report_addr;
4752 	u32 cp_hqd_pq_rptr_report_addr_hi;
4753 	u32 cp_hqd_pq_wptr_poll_addr;
4754 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4755 	u32 cp_hqd_pq_doorbell_control;
4756 	u32 cp_hqd_pq_wptr;
4757 	u32 cp_hqd_pq_control;
4758 	u32 cp_hqd_ib_base_addr;
4759 	u32 cp_hqd_ib_base_addr_hi;
4760 	u32 cp_hqd_ib_rptr;
4761 	u32 cp_hqd_ib_control;
4762 	u32 cp_hqd_iq_timer;
4763 	u32 cp_hqd_iq_rptr;
4764 	u32 cp_hqd_dequeue_request;
4765 	u32 cp_hqd_dma_offload;
4766 	u32 cp_hqd_sema_cmd;
4767 	u32 cp_hqd_msg_type;
4768 	u32 cp_hqd_atomic0_preop_lo;
4769 	u32 cp_hqd_atomic0_preop_hi;
4770 	u32 cp_hqd_atomic1_preop_lo;
4771 	u32 cp_hqd_atomic1_preop_hi;
4772 	u32 cp_hqd_hq_scheduler0;
4773 	u32 cp_hqd_hq_scheduler1;
4774 	u32 cp_mqd_control;
4775 };
4776 
4777 struct bonaire_mqd
4778 {
4779 	u32 header;
4780 	u32 dispatch_initiator;
4781 	u32 dimensions[3];
4782 	u32 start_idx[3];
4783 	u32 num_threads[3];
4784 	u32 pipeline_stat_enable;
4785 	u32 perf_counter_enable;
4786 	u32 pgm[2];
4787 	u32 tba[2];
4788 	u32 tma[2];
4789 	u32 pgm_rsrc[2];
4790 	u32 vmid;
4791 	u32 resource_limits;
4792 	u32 static_thread_mgmt01[2];
4793 	u32 tmp_ring_size;
4794 	u32 static_thread_mgmt23[2];
4795 	u32 restart[3];
4796 	u32 thread_trace_enable;
4797 	u32 reserved1;
4798 	u32 user_data[16];
4799 	u32 vgtcs_invoke_count[2];
4800 	struct hqd_registers queue_state;
4801 	u32 dequeue_cntr;
4802 	u32 interrupt_queue[64];
4803 };
4804 
4805 /**
4806  * cik_cp_compute_resume - setup the compute queue registers
4807  *
4808  * @rdev: radeon_device pointer
4809  *
4810  * Program the compute queues and test them to make sure they
4811  * are working.
4812  * Returns 0 for success, error for failure.
4813  */
4814 static int cik_cp_compute_resume(struct radeon_device *rdev)
4815 {
4816 	int r, i, j, idx;
4817 	u32 tmp;
4818 	bool use_doorbell = true;
4819 	u64 hqd_gpu_addr;
4820 	u64 mqd_gpu_addr;
4821 	u64 eop_gpu_addr;
4822 	u64 wb_gpu_addr;
4823 	u32 *buf;
4824 	struct bonaire_mqd *mqd;
4825 
4826 	r = cik_cp_compute_start(rdev);
4827 	if (r)
4828 		return r;
4829 
4830 	/* fix up chicken bits */
4831 	tmp = RREG32(CP_CPF_DEBUG);
4832 	tmp |= (1 << 23);
4833 	WREG32(CP_CPF_DEBUG, tmp);
4834 
4835 	/* init the pipes */
4836 	mutex_lock(&rdev->srbm_mutex);
4837 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4838 		int me = (i < 4) ? 1 : 2;
4839 		int pipe = (i < 4) ? i : (i - 4);
4840 
4841 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4842 
4843 		cik_srbm_select(rdev, me, pipe, 0, 0);
4844 
4845 		/* write the EOP addr */
4846 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4847 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4848 
4849 		/* set the VMID assigned */
4850 		WREG32(CP_HPD_EOP_VMID, 0);
4851 
4852 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4853 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4854 		tmp &= ~EOP_SIZE_MASK;
4855 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4856 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4857 	}
4858 	cik_srbm_select(rdev, 0, 0, 0, 0);
4859 	mutex_unlock(&rdev->srbm_mutex);
4860 
4861 	/* init the queues.  Just two for now. */
4862 	for (i = 0; i < 2; i++) {
4863 		if (i == 0)
4864 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4865 		else
4866 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4867 
4868 		if (rdev->ring[idx].mqd_obj == NULL) {
4869 			r = radeon_bo_create(rdev,
4870 					     sizeof(struct bonaire_mqd),
4871 					     PAGE_SIZE, true,
4872 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4873 					     &rdev->ring[idx].mqd_obj);
4874 			if (r) {
4875 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4876 				return r;
4877 			}
4878 		}
4879 
4880 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4881 		if (unlikely(r != 0)) {
4882 			cik_cp_compute_fini(rdev);
4883 			return r;
4884 		}
4885 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4886 				  &mqd_gpu_addr);
4887 		if (r) {
4888 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4889 			cik_cp_compute_fini(rdev);
4890 			return r;
4891 		}
4892 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4893 		if (r) {
4894 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4895 			cik_cp_compute_fini(rdev);
4896 			return r;
4897 		}
4898 
4899 		/* init the mqd struct */
4900 		memset(buf, 0, sizeof(struct bonaire_mqd));
4901 
4902 		mqd = (struct bonaire_mqd *)buf;
4903 		mqd->header = 0xC0310800;
4904 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4905 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4906 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4907 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4908 
4909 		mutex_lock(&rdev->srbm_mutex);
4910 		cik_srbm_select(rdev, rdev->ring[idx].me,
4911 				rdev->ring[idx].pipe,
4912 				rdev->ring[idx].queue, 0);
4913 
4914 		/* disable wptr polling */
4915 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4916 		tmp &= ~WPTR_POLL_EN;
4917 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4918 
4919 		/* enable doorbell? */
4920 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4921 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4922 		if (use_doorbell)
4923 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4924 		else
4925 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4926 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4927 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4928 
4929 		/* disable the queue if it's active */
4930 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4931 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4932 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4933 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4934 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4935 			for (j = 0; j < rdev->usec_timeout; j++) {
4936 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4937 					break;
4938 				udelay(1);
4939 			}
4940 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4941 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4942 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4943 		}
4944 
4945 		/* set the pointer to the MQD */
4946 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4947 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4948 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4949 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4950 		/* set MQD vmid to 0 */
4951 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4952 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4953 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4954 
4955 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4956 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4957 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4958 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4959 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4960 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4961 
4962 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4963 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4964 		mqd->queue_state.cp_hqd_pq_control &=
4965 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4966 
4967 		mqd->queue_state.cp_hqd_pq_control |=
4968 			order_base_2(rdev->ring[idx].ring_size / 8);
4969 		mqd->queue_state.cp_hqd_pq_control |=
4970 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4971 #ifdef __BIG_ENDIAN
4972 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4973 #endif
4974 		mqd->queue_state.cp_hqd_pq_control &=
4975 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4976 		mqd->queue_state.cp_hqd_pq_control |=
4977 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4978 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4979 
4980 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4981 		if (i == 0)
4982 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4983 		else
4984 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4985 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4986 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4987 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4988 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4989 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4990 
4991 		/* set the wb address wether it's enabled or not */
4992 		if (i == 0)
4993 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4994 		else
4995 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4996 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4997 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4998 			upper_32_bits(wb_gpu_addr) & 0xffff;
4999 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5000 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5001 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5002 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5003 
5004 		/* enable the doorbell if requested */
5005 		if (use_doorbell) {
5006 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5007 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5008 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5009 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5010 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5011 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5012 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5013 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5014 
5015 		} else {
5016 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5017 		}
5018 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5019 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5020 
5021 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5022 		rdev->ring[idx].wptr = 0;
5023 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5024 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5025 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5026 
5027 		/* set the vmid for the queue */
5028 		mqd->queue_state.cp_hqd_vmid = 0;
5029 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5030 
5031 		/* activate the queue */
5032 		mqd->queue_state.cp_hqd_active = 1;
5033 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5034 
5035 		cik_srbm_select(rdev, 0, 0, 0, 0);
5036 		mutex_unlock(&rdev->srbm_mutex);
5037 
5038 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5039 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5040 
5041 		rdev->ring[idx].ready = true;
5042 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5043 		if (r)
5044 			rdev->ring[idx].ready = false;
5045 	}
5046 
5047 	return 0;
5048 }
5049 
5050 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5051 {
5052 	cik_cp_gfx_enable(rdev, enable);
5053 	cik_cp_compute_enable(rdev, enable);
5054 }
5055 
5056 static int cik_cp_load_microcode(struct radeon_device *rdev)
5057 {
5058 	int r;
5059 
5060 	r = cik_cp_gfx_load_microcode(rdev);
5061 	if (r)
5062 		return r;
5063 	r = cik_cp_compute_load_microcode(rdev);
5064 	if (r)
5065 		return r;
5066 
5067 	return 0;
5068 }
5069 
5070 static void cik_cp_fini(struct radeon_device *rdev)
5071 {
5072 	cik_cp_gfx_fini(rdev);
5073 	cik_cp_compute_fini(rdev);
5074 }
5075 
5076 static int cik_cp_resume(struct radeon_device *rdev)
5077 {
5078 	int r;
5079 
5080 	cik_enable_gui_idle_interrupt(rdev, false);
5081 
5082 	r = cik_cp_load_microcode(rdev);
5083 	if (r)
5084 		return r;
5085 
5086 	r = cik_cp_gfx_resume(rdev);
5087 	if (r)
5088 		return r;
5089 	r = cik_cp_compute_resume(rdev);
5090 	if (r)
5091 		return r;
5092 
5093 	cik_enable_gui_idle_interrupt(rdev, true);
5094 
5095 	return 0;
5096 }
5097 
5098 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5099 {
5100 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5101 		RREG32(GRBM_STATUS));
5102 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5103 		RREG32(GRBM_STATUS2));
5104 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5105 		RREG32(GRBM_STATUS_SE0));
5106 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5107 		RREG32(GRBM_STATUS_SE1));
5108 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5109 		RREG32(GRBM_STATUS_SE2));
5110 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5111 		RREG32(GRBM_STATUS_SE3));
5112 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5113 		RREG32(SRBM_STATUS));
5114 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5115 		RREG32(SRBM_STATUS2));
5116 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5117 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5118 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5119 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5120 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5121 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5122 		 RREG32(CP_STALLED_STAT1));
5123 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5124 		 RREG32(CP_STALLED_STAT2));
5125 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5126 		 RREG32(CP_STALLED_STAT3));
5127 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5128 		 RREG32(CP_CPF_BUSY_STAT));
5129 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5130 		 RREG32(CP_CPF_STALLED_STAT1));
5131 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5132 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5133 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5134 		 RREG32(CP_CPC_STALLED_STAT1));
5135 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5136 }
5137 
5138 /**
5139  * cik_gpu_check_soft_reset - check which blocks are busy
5140  *
5141  * @rdev: radeon_device pointer
5142  *
5143  * Check which blocks are busy and return the relevant reset
5144  * mask to be used by cik_gpu_soft_reset().
5145  * Returns a mask of the blocks to be reset.
5146  */
5147 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5148 {
5149 	u32 reset_mask = 0;
5150 	u32 tmp;
5151 
5152 	/* GRBM_STATUS */
5153 	tmp = RREG32(GRBM_STATUS);
5154 	if (tmp & (PA_BUSY | SC_BUSY |
5155 		   BCI_BUSY | SX_BUSY |
5156 		   TA_BUSY | VGT_BUSY |
5157 		   DB_BUSY | CB_BUSY |
5158 		   GDS_BUSY | SPI_BUSY |
5159 		   IA_BUSY | IA_BUSY_NO_DMA))
5160 		reset_mask |= RADEON_RESET_GFX;
5161 
5162 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5163 		reset_mask |= RADEON_RESET_CP;
5164 
5165 	/* GRBM_STATUS2 */
5166 	tmp = RREG32(GRBM_STATUS2);
5167 	if (tmp & RLC_BUSY)
5168 		reset_mask |= RADEON_RESET_RLC;
5169 
5170 	/* SDMA0_STATUS_REG */
5171 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5172 	if (!(tmp & SDMA_IDLE))
5173 		reset_mask |= RADEON_RESET_DMA;
5174 
5175 	/* SDMA1_STATUS_REG */
5176 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5177 	if (!(tmp & SDMA_IDLE))
5178 		reset_mask |= RADEON_RESET_DMA1;
5179 
5180 	/* SRBM_STATUS2 */
5181 	tmp = RREG32(SRBM_STATUS2);
5182 	if (tmp & SDMA_BUSY)
5183 		reset_mask |= RADEON_RESET_DMA;
5184 
5185 	if (tmp & SDMA1_BUSY)
5186 		reset_mask |= RADEON_RESET_DMA1;
5187 
5188 	/* SRBM_STATUS */
5189 	tmp = RREG32(SRBM_STATUS);
5190 
5191 	if (tmp & IH_BUSY)
5192 		reset_mask |= RADEON_RESET_IH;
5193 
5194 	if (tmp & SEM_BUSY)
5195 		reset_mask |= RADEON_RESET_SEM;
5196 
5197 	if (tmp & GRBM_RQ_PENDING)
5198 		reset_mask |= RADEON_RESET_GRBM;
5199 
5200 	if (tmp & VMC_BUSY)
5201 		reset_mask |= RADEON_RESET_VMC;
5202 
5203 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5204 		   MCC_BUSY | MCD_BUSY))
5205 		reset_mask |= RADEON_RESET_MC;
5206 
5207 	if (evergreen_is_display_hung(rdev))
5208 		reset_mask |= RADEON_RESET_DISPLAY;
5209 
5210 	/* Skip MC reset as it's mostly likely not hung, just busy */
5211 	if (reset_mask & RADEON_RESET_MC) {
5212 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5213 		reset_mask &= ~RADEON_RESET_MC;
5214 	}
5215 
5216 	return reset_mask;
5217 }
5218 
5219 /**
5220  * cik_gpu_soft_reset - soft reset GPU
5221  *
5222  * @rdev: radeon_device pointer
5223  * @reset_mask: mask of which blocks to reset
5224  *
5225  * Soft reset the blocks specified in @reset_mask.
5226  */
5227 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5228 {
5229 	struct evergreen_mc_save save;
5230 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5231 	u32 tmp;
5232 
5233 	if (reset_mask == 0)
5234 		return;
5235 
5236 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5237 
5238 	cik_print_gpu_status_regs(rdev);
5239 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5240 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5241 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5242 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5243 
5244 	/* disable CG/PG */
5245 	cik_fini_pg(rdev);
5246 	cik_fini_cg(rdev);
5247 
5248 	/* stop the rlc */
5249 	cik_rlc_stop(rdev);
5250 
5251 	/* Disable GFX parsing/prefetching */
5252 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5253 
5254 	/* Disable MEC parsing/prefetching */
5255 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5256 
5257 	if (reset_mask & RADEON_RESET_DMA) {
5258 		/* sdma0 */
5259 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5260 		tmp |= SDMA_HALT;
5261 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5262 	}
5263 	if (reset_mask & RADEON_RESET_DMA1) {
5264 		/* sdma1 */
5265 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5266 		tmp |= SDMA_HALT;
5267 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5268 	}
5269 
5270 	evergreen_mc_stop(rdev, &save);
5271 	if (evergreen_mc_wait_for_idle(rdev)) {
5272 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5273 	}
5274 
5275 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5276 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5277 
5278 	if (reset_mask & RADEON_RESET_CP) {
5279 		grbm_soft_reset |= SOFT_RESET_CP;
5280 
5281 		srbm_soft_reset |= SOFT_RESET_GRBM;
5282 	}
5283 
5284 	if (reset_mask & RADEON_RESET_DMA)
5285 		srbm_soft_reset |= SOFT_RESET_SDMA;
5286 
5287 	if (reset_mask & RADEON_RESET_DMA1)
5288 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5289 
5290 	if (reset_mask & RADEON_RESET_DISPLAY)
5291 		srbm_soft_reset |= SOFT_RESET_DC;
5292 
5293 	if (reset_mask & RADEON_RESET_RLC)
5294 		grbm_soft_reset |= SOFT_RESET_RLC;
5295 
5296 	if (reset_mask & RADEON_RESET_SEM)
5297 		srbm_soft_reset |= SOFT_RESET_SEM;
5298 
5299 	if (reset_mask & RADEON_RESET_IH)
5300 		srbm_soft_reset |= SOFT_RESET_IH;
5301 
5302 	if (reset_mask & RADEON_RESET_GRBM)
5303 		srbm_soft_reset |= SOFT_RESET_GRBM;
5304 
5305 	if (reset_mask & RADEON_RESET_VMC)
5306 		srbm_soft_reset |= SOFT_RESET_VMC;
5307 
5308 	if (!(rdev->flags & RADEON_IS_IGP)) {
5309 		if (reset_mask & RADEON_RESET_MC)
5310 			srbm_soft_reset |= SOFT_RESET_MC;
5311 	}
5312 
5313 	if (grbm_soft_reset) {
5314 		tmp = RREG32(GRBM_SOFT_RESET);
5315 		tmp |= grbm_soft_reset;
5316 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5317 		WREG32(GRBM_SOFT_RESET, tmp);
5318 		tmp = RREG32(GRBM_SOFT_RESET);
5319 
5320 		udelay(50);
5321 
5322 		tmp &= ~grbm_soft_reset;
5323 		WREG32(GRBM_SOFT_RESET, tmp);
5324 		tmp = RREG32(GRBM_SOFT_RESET);
5325 	}
5326 
5327 	if (srbm_soft_reset) {
5328 		tmp = RREG32(SRBM_SOFT_RESET);
5329 		tmp |= srbm_soft_reset;
5330 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5331 		WREG32(SRBM_SOFT_RESET, tmp);
5332 		tmp = RREG32(SRBM_SOFT_RESET);
5333 
5334 		udelay(50);
5335 
5336 		tmp &= ~srbm_soft_reset;
5337 		WREG32(SRBM_SOFT_RESET, tmp);
5338 		tmp = RREG32(SRBM_SOFT_RESET);
5339 	}
5340 
5341 	/* Wait a little for things to settle down */
5342 	udelay(50);
5343 
5344 	evergreen_mc_resume(rdev, &save);
5345 	udelay(50);
5346 
5347 	cik_print_gpu_status_regs(rdev);
5348 }
5349 
5350 struct kv_reset_save_regs {
5351 	u32 gmcon_reng_execute;
5352 	u32 gmcon_misc;
5353 	u32 gmcon_misc3;
5354 };
5355 
5356 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5357 				   struct kv_reset_save_regs *save)
5358 {
5359 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5360 	save->gmcon_misc = RREG32(GMCON_MISC);
5361 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5362 
5363 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5364 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5365 						STCTRL_STUTTER_EN));
5366 }
5367 
5368 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5369 				      struct kv_reset_save_regs *save)
5370 {
5371 	int i;
5372 
5373 	WREG32(GMCON_PGFSM_WRITE, 0);
5374 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5375 
5376 	for (i = 0; i < 5; i++)
5377 		WREG32(GMCON_PGFSM_WRITE, 0);
5378 
5379 	WREG32(GMCON_PGFSM_WRITE, 0);
5380 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5381 
5382 	for (i = 0; i < 5; i++)
5383 		WREG32(GMCON_PGFSM_WRITE, 0);
5384 
5385 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5386 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5387 
5388 	for (i = 0; i < 5; i++)
5389 		WREG32(GMCON_PGFSM_WRITE, 0);
5390 
5391 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5392 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5393 
5394 	for (i = 0; i < 5; i++)
5395 		WREG32(GMCON_PGFSM_WRITE, 0);
5396 
5397 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5398 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5399 
5400 	for (i = 0; i < 5; i++)
5401 		WREG32(GMCON_PGFSM_WRITE, 0);
5402 
5403 	WREG32(GMCON_PGFSM_WRITE, 0);
5404 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5405 
5406 	for (i = 0; i < 5; i++)
5407 		WREG32(GMCON_PGFSM_WRITE, 0);
5408 
5409 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5410 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5411 
5412 	for (i = 0; i < 5; i++)
5413 		WREG32(GMCON_PGFSM_WRITE, 0);
5414 
5415 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5416 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5417 
5418 	for (i = 0; i < 5; i++)
5419 		WREG32(GMCON_PGFSM_WRITE, 0);
5420 
5421 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5422 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5423 
5424 	for (i = 0; i < 5; i++)
5425 		WREG32(GMCON_PGFSM_WRITE, 0);
5426 
5427 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5428 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5429 
5430 	for (i = 0; i < 5; i++)
5431 		WREG32(GMCON_PGFSM_WRITE, 0);
5432 
5433 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5434 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5435 
5436 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5437 	WREG32(GMCON_MISC, save->gmcon_misc);
5438 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5439 }
5440 
5441 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5442 {
5443 	struct evergreen_mc_save save;
5444 	struct kv_reset_save_regs kv_save = { 0 };
5445 	u32 tmp, i;
5446 
5447 	dev_info(rdev->dev, "GPU pci config reset\n");
5448 
5449 	/* disable dpm? */
5450 
5451 	/* disable cg/pg */
5452 	cik_fini_pg(rdev);
5453 	cik_fini_cg(rdev);
5454 
5455 	/* Disable GFX parsing/prefetching */
5456 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5457 
5458 	/* Disable MEC parsing/prefetching */
5459 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5460 
5461 	/* sdma0 */
5462 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5463 	tmp |= SDMA_HALT;
5464 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5465 	/* sdma1 */
5466 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5467 	tmp |= SDMA_HALT;
5468 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5469 	/* XXX other engines? */
5470 
5471 	/* halt the rlc, disable cp internal ints */
5472 	cik_rlc_stop(rdev);
5473 
5474 	udelay(50);
5475 
5476 	/* disable mem access */
5477 	evergreen_mc_stop(rdev, &save);
5478 	if (evergreen_mc_wait_for_idle(rdev)) {
5479 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5480 	}
5481 
5482 	if (rdev->flags & RADEON_IS_IGP)
5483 		kv_save_regs_for_reset(rdev, &kv_save);
5484 
5485 	/* disable BM */
5486 	pci_disable_busmaster(rdev->pdev->dev.bsddev);
5487 	/* reset */
5488 	radeon_pci_config_reset(rdev);
5489 
5490 	udelay(100);
5491 
5492 	/* wait for asic to come out of reset */
5493 	for (i = 0; i < rdev->usec_timeout; i++) {
5494 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5495 			break;
5496 		udelay(1);
5497 	}
5498 
5499 	/* does asic init need to be run first??? */
5500 	if (rdev->flags & RADEON_IS_IGP)
5501 		kv_restore_regs_for_reset(rdev, &kv_save);
5502 }
5503 
5504 /**
5505  * cik_asic_reset - soft reset GPU
5506  *
5507  * @rdev: radeon_device pointer
5508  *
5509  * Look up which blocks are hung and attempt
5510  * to reset them.
5511  * Returns 0 for success.
5512  */
5513 int cik_asic_reset(struct radeon_device *rdev)
5514 {
5515 	u32 reset_mask;
5516 
5517 	reset_mask = cik_gpu_check_soft_reset(rdev);
5518 
5519 	if (reset_mask)
5520 		r600_set_bios_scratch_engine_hung(rdev, true);
5521 
5522 	/* try soft reset */
5523 	cik_gpu_soft_reset(rdev, reset_mask);
5524 
5525 	reset_mask = cik_gpu_check_soft_reset(rdev);
5526 
5527 	/* try pci config reset */
5528 	if (reset_mask && radeon_hard_reset)
5529 		cik_gpu_pci_config_reset(rdev);
5530 
5531 	reset_mask = cik_gpu_check_soft_reset(rdev);
5532 
5533 	if (!reset_mask)
5534 		r600_set_bios_scratch_engine_hung(rdev, false);
5535 
5536 	return 0;
5537 }
5538 
5539 /**
5540  * cik_gfx_is_lockup - check if the 3D engine is locked up
5541  *
5542  * @rdev: radeon_device pointer
5543  * @ring: radeon_ring structure holding ring information
5544  *
5545  * Check if the 3D engine is locked up (CIK).
5546  * Returns true if the engine is locked, false if not.
5547  */
5548 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5549 {
5550 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5551 
5552 	if (!(reset_mask & (RADEON_RESET_GFX |
5553 			    RADEON_RESET_COMPUTE |
5554 			    RADEON_RESET_CP))) {
5555 		radeon_ring_lockup_update(rdev, ring);
5556 		return false;
5557 	}
5558 	return radeon_ring_test_lockup(rdev, ring);
5559 }
5560 
5561 /* MC */
5562 /**
5563  * cik_mc_program - program the GPU memory controller
5564  *
5565  * @rdev: radeon_device pointer
5566  *
5567  * Set the location of vram, gart, and AGP in the GPU's
5568  * physical address space (CIK).
5569  */
5570 static void cik_mc_program(struct radeon_device *rdev)
5571 {
5572 	struct evergreen_mc_save save;
5573 	u32 tmp;
5574 	int i, j;
5575 
5576 	/* Initialize HDP */
5577 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5578 		WREG32((0x2c14 + j), 0x00000000);
5579 		WREG32((0x2c18 + j), 0x00000000);
5580 		WREG32((0x2c1c + j), 0x00000000);
5581 		WREG32((0x2c20 + j), 0x00000000);
5582 		WREG32((0x2c24 + j), 0x00000000);
5583 	}
5584 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5585 
5586 	evergreen_mc_stop(rdev, &save);
5587 	if (radeon_mc_wait_for_idle(rdev)) {
5588 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5589 	}
5590 	/* Lockout access through VGA aperture*/
5591 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5592 	/* Update configuration */
5593 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5594 	       rdev->mc.vram_start >> 12);
5595 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5596 	       rdev->mc.vram_end >> 12);
5597 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5598 	       rdev->vram_scratch.gpu_addr >> 12);
5599 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5600 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5601 	WREG32(MC_VM_FB_LOCATION, tmp);
5602 	/* XXX double check these! */
5603 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5604 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5605 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5606 	WREG32(MC_VM_AGP_BASE, 0);
5607 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5608 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5609 	if (radeon_mc_wait_for_idle(rdev)) {
5610 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5611 	}
5612 	evergreen_mc_resume(rdev, &save);
5613 	/* we need to own VRAM, so turn off the VGA renderer here
5614 	 * to stop it overwriting our objects */
5615 	rv515_vga_render_disable(rdev);
5616 }
5617 
5618 /**
5619  * cik_mc_init - initialize the memory controller driver params
5620  *
5621  * @rdev: radeon_device pointer
5622  *
5623  * Look up the amount of vram, vram width, and decide how to place
5624  * vram and gart within the GPU's physical address space (CIK).
5625  * Returns 0 for success.
5626  */
5627 static int cik_mc_init(struct radeon_device *rdev)
5628 {
5629 	u32 tmp;
5630 	int chansize, numchan;
5631 
5632 	/* Get VRAM informations */
5633 	rdev->mc.vram_is_ddr = true;
5634 	tmp = RREG32(MC_ARB_RAMCFG);
5635 	if (tmp & CHANSIZE_MASK) {
5636 		chansize = 64;
5637 	} else {
5638 		chansize = 32;
5639 	}
5640 	tmp = RREG32(MC_SHARED_CHMAP);
5641 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5642 	case 0:
5643 	default:
5644 		numchan = 1;
5645 		break;
5646 	case 1:
5647 		numchan = 2;
5648 		break;
5649 	case 2:
5650 		numchan = 4;
5651 		break;
5652 	case 3:
5653 		numchan = 8;
5654 		break;
5655 	case 4:
5656 		numchan = 3;
5657 		break;
5658 	case 5:
5659 		numchan = 6;
5660 		break;
5661 	case 6:
5662 		numchan = 10;
5663 		break;
5664 	case 7:
5665 		numchan = 12;
5666 		break;
5667 	case 8:
5668 		numchan = 16;
5669 		break;
5670 	}
5671 	rdev->mc.vram_width = numchan * chansize;
5672 	/* Could aper size report 0 ? */
5673 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5674 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5675 	/* size in MB on si */
5676 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5677 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5678 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5679 	si_vram_gtt_location(rdev, &rdev->mc);
5680 	radeon_update_bandwidth_info(rdev);
5681 
5682 	return 0;
5683 }
5684 
5685 /*
5686  * GART
5687  * VMID 0 is the physical GPU addresses as used by the kernel.
5688  * VMIDs 1-15 are used for userspace clients and are handled
5689  * by the radeon vm/hsa code.
5690  */
5691 /**
5692  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5693  *
5694  * @rdev: radeon_device pointer
5695  *
5696  * Flush the TLB for the VMID 0 page table (CIK).
5697  */
5698 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5699 {
5700 	/* flush hdp cache */
5701 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5702 
5703 	/* bits 0-15 are the VM contexts0-15 */
5704 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5705 }
5706 
5707 /**
5708  * cik_pcie_gart_enable - gart enable
5709  *
5710  * @rdev: radeon_device pointer
5711  *
5712  * This sets up the TLBs, programs the page tables for VMID0,
5713  * sets up the hw for VMIDs 1-15 which are allocated on
5714  * demand, and sets up the global locations for the LDS, GDS,
5715  * and GPUVM for FSA64 clients (CIK).
5716  * Returns 0 for success, errors for failure.
5717  */
5718 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5719 {
5720 	int r, i;
5721 
5722 	if (rdev->gart.robj == NULL) {
5723 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5724 		return -EINVAL;
5725 	}
5726 	r = radeon_gart_table_vram_pin(rdev);
5727 	if (r)
5728 		return r;
5729 	/* Setup TLB control */
5730 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5731 	       (0xA << 7) |
5732 	       ENABLE_L1_TLB |
5733 	       ENABLE_L1_FRAGMENT_PROCESSING |
5734 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5735 	       ENABLE_ADVANCED_DRIVER_MODEL |
5736 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5737 	/* Setup L2 cache */
5738 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5739 	       ENABLE_L2_FRAGMENT_PROCESSING |
5740 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5741 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5742 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5743 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5744 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5745 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5746 	       BANK_SELECT(4) |
5747 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5748 	/* setup context0 */
5749 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5750 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5751 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5752 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5753 			(u32)(rdev->dummy_page.addr >> 12));
5754 	WREG32(VM_CONTEXT0_CNTL2, 0);
5755 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5756 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5757 
5758 	WREG32(0x15D4, 0);
5759 	WREG32(0x15D8, 0);
5760 	WREG32(0x15DC, 0);
5761 
5762 	/* restore context1-15 */
5763 	/* set vm size, must be a multiple of 4 */
5764 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5765 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5766 	for (i = 1; i < 16; i++) {
5767 		if (i < 8)
5768 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5769 			       rdev->vm_manager.saved_table_addr[i]);
5770 		else
5771 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5772 			       rdev->vm_manager.saved_table_addr[i]);
5773 	}
5774 
5775 	/* enable context1-15 */
5776 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5777 	       (u32)(rdev->dummy_page.addr >> 12));
5778 	WREG32(VM_CONTEXT1_CNTL2, 4);
5779 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5780 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5781 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5782 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5783 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5784 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5785 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5786 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5787 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5788 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5789 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5790 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5791 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5792 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5793 
5794 	if (rdev->family == CHIP_KAVERI) {
5795 		u32 tmp = RREG32(CHUB_CONTROL);
5796 		tmp &= ~BYPASS_VM;
5797 		WREG32(CHUB_CONTROL, tmp);
5798 	}
5799 
5800 	/* XXX SH_MEM regs */
5801 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5802 	mutex_lock(&rdev->srbm_mutex);
5803 	for (i = 0; i < 16; i++) {
5804 		cik_srbm_select(rdev, 0, 0, 0, i);
5805 		/* CP and shaders */
5806 		WREG32(SH_MEM_CONFIG, 0);
5807 		WREG32(SH_MEM_APE1_BASE, 1);
5808 		WREG32(SH_MEM_APE1_LIMIT, 0);
5809 		WREG32(SH_MEM_BASES, 0);
5810 		/* SDMA GFX */
5811 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5812 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5813 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5814 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5815 		/* XXX SDMA RLC - todo */
5816 	}
5817 	cik_srbm_select(rdev, 0, 0, 0, 0);
5818 	mutex_unlock(&rdev->srbm_mutex);
5819 
5820 	cik_pcie_gart_tlb_flush(rdev);
5821 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5822 		 (unsigned)(rdev->mc.gtt_size >> 20),
5823 		 (unsigned long long)rdev->gart.table_addr);
5824 	rdev->gart.ready = true;
5825 	return 0;
5826 }
5827 
5828 /**
5829  * cik_pcie_gart_disable - gart disable
5830  *
5831  * @rdev: radeon_device pointer
5832  *
5833  * This disables all VM page table (CIK).
5834  */
5835 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5836 {
5837 	unsigned i;
5838 
5839 	for (i = 1; i < 16; ++i) {
5840 		uint32_t reg;
5841 		if (i < 8)
5842 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5843 		else
5844 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5845 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5846 	}
5847 
5848 	/* Disable all tables */
5849 	WREG32(VM_CONTEXT0_CNTL, 0);
5850 	WREG32(VM_CONTEXT1_CNTL, 0);
5851 	/* Setup TLB control */
5852 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5853 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5854 	/* Setup L2 cache */
5855 	WREG32(VM_L2_CNTL,
5856 	       ENABLE_L2_FRAGMENT_PROCESSING |
5857 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5858 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5859 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5860 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5861 	WREG32(VM_L2_CNTL2, 0);
5862 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5863 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5864 	radeon_gart_table_vram_unpin(rdev);
5865 }
5866 
5867 /**
5868  * cik_pcie_gart_fini - vm fini callback
5869  *
5870  * @rdev: radeon_device pointer
5871  *
5872  * Tears down the driver GART/VM setup (CIK).
5873  */
5874 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5875 {
5876 	cik_pcie_gart_disable(rdev);
5877 	radeon_gart_table_vram_free(rdev);
5878 	radeon_gart_fini(rdev);
5879 }
5880 
5881 /* vm parser */
5882 /**
5883  * cik_ib_parse - vm ib_parse callback
5884  *
5885  * @rdev: radeon_device pointer
5886  * @ib: indirect buffer pointer
5887  *
5888  * CIK uses hw IB checking so this is a nop (CIK).
5889  */
5890 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5891 {
5892 	return 0;
5893 }
5894 
5895 /*
5896  * vm
5897  * VMID 0 is the physical GPU addresses as used by the kernel.
5898  * VMIDs 1-15 are used for userspace clients and are handled
5899  * by the radeon vm/hsa code.
5900  */
5901 /**
5902  * cik_vm_init - cik vm init callback
5903  *
5904  * @rdev: radeon_device pointer
5905  *
5906  * Inits cik specific vm parameters (number of VMs, base of vram for
5907  * VMIDs 1-15) (CIK).
5908  * Returns 0 for success.
5909  */
5910 int cik_vm_init(struct radeon_device *rdev)
5911 {
5912 	/* number of VMs */
5913 	rdev->vm_manager.nvm = 16;
5914 	/* base offset of vram pages */
5915 	if (rdev->flags & RADEON_IS_IGP) {
5916 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5917 		tmp <<= 22;
5918 		rdev->vm_manager.vram_base_offset = tmp;
5919 	} else
5920 		rdev->vm_manager.vram_base_offset = 0;
5921 
5922 	return 0;
5923 }
5924 
5925 /**
5926  * cik_vm_fini - cik vm fini callback
5927  *
5928  * @rdev: radeon_device pointer
5929  *
5930  * Tear down any asic specific VM setup (CIK).
5931  */
5932 void cik_vm_fini(struct radeon_device *rdev)
5933 {
5934 }
5935 
5936 /**
5937  * cik_vm_decode_fault - print human readable fault info
5938  *
5939  * @rdev: radeon_device pointer
5940  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5941  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5942  *
5943  * Print human readable fault information (CIK).
5944  */
5945 static void cik_vm_decode_fault(struct radeon_device *rdev,
5946 				u32 status, u32 addr, u32 mc_client)
5947 {
5948 	u32 mc_id;
5949 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5950 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5951 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5952 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5953 
5954 	if (rdev->family == CHIP_HAWAII)
5955 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5956 	else
5957 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5958 
5959 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5960 	       protections, vmid, addr,
5961 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5962 	       block, mc_client, mc_id);
5963 }
5964 
5965 /**
5966  * cik_vm_flush - cik vm flush using the CP
5967  *
5968  * @rdev: radeon_device pointer
5969  *
5970  * Update the page table base and flush the VM TLB
5971  * using the CP (CIK).
5972  */
5973 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5974 {
5975 	struct radeon_ring *ring = &rdev->ring[ridx];
5976 	int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
5977 
5978 	if (vm == NULL)
5979 		return;
5980 
5981 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5982 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5983 				 WRITE_DATA_DST_SEL(0)));
5984 	if (vm->id < 8) {
5985 		radeon_ring_write(ring,
5986 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5987 	} else {
5988 		radeon_ring_write(ring,
5989 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5990 	}
5991 	radeon_ring_write(ring, 0);
5992 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5993 
5994 	/* update SH_MEM_* regs */
5995 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5996 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5997 				 WRITE_DATA_DST_SEL(0)));
5998 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5999 	radeon_ring_write(ring, 0);
6000 	radeon_ring_write(ring, VMID(vm->id));
6001 
6002 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6003 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6004 				 WRITE_DATA_DST_SEL(0)));
6005 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6006 	radeon_ring_write(ring, 0);
6007 
6008 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6009 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6010 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6011 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6012 
6013 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6014 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6015 				 WRITE_DATA_DST_SEL(0)));
6016 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6017 	radeon_ring_write(ring, 0);
6018 	radeon_ring_write(ring, VMID(0));
6019 
6020 	/* HDP flush */
6021 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
6022 
6023 	/* bits 0-15 are the VM contexts0-15 */
6024 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6025 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6026 				 WRITE_DATA_DST_SEL(0)));
6027 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6028 	radeon_ring_write(ring, 0);
6029 	radeon_ring_write(ring, 1 << vm->id);
6030 
6031 	/* compute doesn't have PFP */
6032 	if (usepfp) {
6033 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6034 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6035 		radeon_ring_write(ring, 0x0);
6036 	}
6037 }
6038 
6039 /*
6040  * RLC
6041  * The RLC is a multi-purpose microengine that handles a
6042  * variety of functions, the most important of which is
6043  * the interrupt controller.
6044  */
6045 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6046 					  bool enable)
6047 {
6048 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6049 
6050 	if (enable)
6051 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6052 	else
6053 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6054 	WREG32(CP_INT_CNTL_RING0, tmp);
6055 }
6056 
6057 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6058 {
6059 	u32 tmp;
6060 
6061 	tmp = RREG32(RLC_LB_CNTL);
6062 	if (enable)
6063 		tmp |= LOAD_BALANCE_ENABLE;
6064 	else
6065 		tmp &= ~LOAD_BALANCE_ENABLE;
6066 	WREG32(RLC_LB_CNTL, tmp);
6067 }
6068 
6069 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6070 {
6071 	u32 i, j, k;
6072 	u32 mask;
6073 
6074 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6075 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6076 			cik_select_se_sh(rdev, i, j);
6077 			for (k = 0; k < rdev->usec_timeout; k++) {
6078 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6079 					break;
6080 				udelay(1);
6081 			}
6082 		}
6083 	}
6084 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6085 
6086 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6087 	for (k = 0; k < rdev->usec_timeout; k++) {
6088 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6089 			break;
6090 		udelay(1);
6091 	}
6092 }
6093 
6094 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6095 {
6096 	u32 tmp;
6097 
6098 	tmp = RREG32(RLC_CNTL);
6099 	if (tmp != rlc)
6100 		WREG32(RLC_CNTL, rlc);
6101 }
6102 
6103 static u32 cik_halt_rlc(struct radeon_device *rdev)
6104 {
6105 	u32 data, orig;
6106 
6107 	orig = data = RREG32(RLC_CNTL);
6108 
6109 	if (data & RLC_ENABLE) {
6110 		u32 i;
6111 
6112 		data &= ~RLC_ENABLE;
6113 		WREG32(RLC_CNTL, data);
6114 
6115 		for (i = 0; i < rdev->usec_timeout; i++) {
6116 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6117 				break;
6118 			udelay(1);
6119 		}
6120 
6121 		cik_wait_for_rlc_serdes(rdev);
6122 	}
6123 
6124 	return orig;
6125 }
6126 
6127 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6128 {
6129 	u32 tmp, i, mask;
6130 
6131 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6132 	WREG32(RLC_GPR_REG2, tmp);
6133 
6134 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6135 	for (i = 0; i < rdev->usec_timeout; i++) {
6136 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6137 			break;
6138 		udelay(1);
6139 	}
6140 
6141 	for (i = 0; i < rdev->usec_timeout; i++) {
6142 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6143 			break;
6144 		udelay(1);
6145 	}
6146 }
6147 
6148 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6149 {
6150 	u32 tmp;
6151 
6152 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6153 	WREG32(RLC_GPR_REG2, tmp);
6154 }
6155 
6156 /**
6157  * cik_rlc_stop - stop the RLC ME
6158  *
6159  * @rdev: radeon_device pointer
6160  *
6161  * Halt the RLC ME (MicroEngine) (CIK).
6162  */
6163 static void cik_rlc_stop(struct radeon_device *rdev)
6164 {
6165 	WREG32(RLC_CNTL, 0);
6166 
6167 	cik_enable_gui_idle_interrupt(rdev, false);
6168 
6169 	cik_wait_for_rlc_serdes(rdev);
6170 }
6171 
6172 /**
6173  * cik_rlc_start - start the RLC ME
6174  *
6175  * @rdev: radeon_device pointer
6176  *
6177  * Unhalt the RLC ME (MicroEngine) (CIK).
6178  */
6179 static void cik_rlc_start(struct radeon_device *rdev)
6180 {
6181 	WREG32(RLC_CNTL, RLC_ENABLE);
6182 
6183 	cik_enable_gui_idle_interrupt(rdev, true);
6184 
6185 	udelay(50);
6186 }
6187 
6188 /**
6189  * cik_rlc_resume - setup the RLC hw
6190  *
6191  * @rdev: radeon_device pointer
6192  *
6193  * Initialize the RLC registers, load the ucode,
6194  * and start the RLC (CIK).
6195  * Returns 0 for success, -EINVAL if the ucode is not available.
6196  */
6197 static int cik_rlc_resume(struct radeon_device *rdev)
6198 {
6199 	u32 i, size, tmp;
6200 
6201 	if (!rdev->rlc_fw)
6202 		return -EINVAL;
6203 
6204 	cik_rlc_stop(rdev);
6205 
6206 	/* disable CG */
6207 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6208 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6209 
6210 	si_rlc_reset(rdev);
6211 
6212 	cik_init_pg(rdev);
6213 
6214 	cik_init_cg(rdev);
6215 
6216 	WREG32(RLC_LB_CNTR_INIT, 0);
6217 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6218 
6219 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6220 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6221 	WREG32(RLC_LB_PARAMS, 0x00600408);
6222 	WREG32(RLC_LB_CNTL, 0x80000004);
6223 
6224 	WREG32(RLC_MC_CNTL, 0);
6225 	WREG32(RLC_UCODE_CNTL, 0);
6226 
6227 	if (rdev->new_fw) {
6228 		const struct rlc_firmware_header_v1_0 *hdr =
6229 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6230 		const __le32 *fw_data = (const __le32 *)
6231 			((const char *)rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6232 
6233 		radeon_ucode_print_rlc_hdr(&hdr->header);
6234 
6235 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6236 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6237 		for (i = 0; i < size; i++)
6238 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6239 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6240 	} else {
6241 		const __be32 *fw_data;
6242 
6243 		switch (rdev->family) {
6244 		case CHIP_BONAIRE:
6245 		case CHIP_HAWAII:
6246 		default:
6247 			size = BONAIRE_RLC_UCODE_SIZE;
6248 			break;
6249 		case CHIP_KAVERI:
6250 			size = KV_RLC_UCODE_SIZE;
6251 			break;
6252 		case CHIP_KABINI:
6253 			size = KB_RLC_UCODE_SIZE;
6254 			break;
6255 		case CHIP_MULLINS:
6256 			size = ML_RLC_UCODE_SIZE;
6257 			break;
6258 		}
6259 
6260 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6261 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6262 		for (i = 0; i < size; i++)
6263 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6264 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6265 	}
6266 
6267 	/* XXX - find out what chips support lbpw */
6268 	cik_enable_lbpw(rdev, false);
6269 
6270 	if (rdev->family == CHIP_BONAIRE)
6271 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6272 
6273 	cik_rlc_start(rdev);
6274 
6275 	return 0;
6276 }
6277 
6278 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6279 {
6280 	u32 data, orig, tmp, tmp2;
6281 
6282 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6283 
6284 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6285 		cik_enable_gui_idle_interrupt(rdev, true);
6286 
6287 		tmp = cik_halt_rlc(rdev);
6288 
6289 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6290 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6291 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6292 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6293 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6294 
6295 		cik_update_rlc(rdev, tmp);
6296 
6297 		data |= CGCG_EN | CGLS_EN;
6298 	} else {
6299 		cik_enable_gui_idle_interrupt(rdev, false);
6300 
6301 		RREG32(CB_CGTT_SCLK_CTRL);
6302 		RREG32(CB_CGTT_SCLK_CTRL);
6303 		RREG32(CB_CGTT_SCLK_CTRL);
6304 		RREG32(CB_CGTT_SCLK_CTRL);
6305 
6306 		data &= ~(CGCG_EN | CGLS_EN);
6307 	}
6308 
6309 	if (orig != data)
6310 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6311 
6312 }
6313 
6314 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6315 {
6316 	u32 data, orig, tmp = 0;
6317 
6318 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6319 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6320 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6321 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6322 				data |= CP_MEM_LS_EN;
6323 				if (orig != data)
6324 					WREG32(CP_MEM_SLP_CNTL, data);
6325 			}
6326 		}
6327 
6328 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6329 		data &= 0xfffffffd;
6330 		if (orig != data)
6331 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6332 
6333 		tmp = cik_halt_rlc(rdev);
6334 
6335 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6336 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6337 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6338 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6339 		WREG32(RLC_SERDES_WR_CTRL, data);
6340 
6341 		cik_update_rlc(rdev, tmp);
6342 
6343 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6344 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6345 			data &= ~SM_MODE_MASK;
6346 			data |= SM_MODE(0x2);
6347 			data |= SM_MODE_ENABLE;
6348 			data &= ~CGTS_OVERRIDE;
6349 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6350 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6351 				data &= ~CGTS_LS_OVERRIDE;
6352 			data &= ~ON_MONITOR_ADD_MASK;
6353 			data |= ON_MONITOR_ADD_EN;
6354 			data |= ON_MONITOR_ADD(0x96);
6355 			if (orig != data)
6356 				WREG32(CGTS_SM_CTRL_REG, data);
6357 		}
6358 	} else {
6359 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6360 		data |= 0x00000002;
6361 		if (orig != data)
6362 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6363 
6364 		data = RREG32(RLC_MEM_SLP_CNTL);
6365 		if (data & RLC_MEM_LS_EN) {
6366 			data &= ~RLC_MEM_LS_EN;
6367 			WREG32(RLC_MEM_SLP_CNTL, data);
6368 		}
6369 
6370 		data = RREG32(CP_MEM_SLP_CNTL);
6371 		if (data & CP_MEM_LS_EN) {
6372 			data &= ~CP_MEM_LS_EN;
6373 			WREG32(CP_MEM_SLP_CNTL, data);
6374 		}
6375 
6376 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6377 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6378 		if (orig != data)
6379 			WREG32(CGTS_SM_CTRL_REG, data);
6380 
6381 		tmp = cik_halt_rlc(rdev);
6382 
6383 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6384 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6385 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6386 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6387 		WREG32(RLC_SERDES_WR_CTRL, data);
6388 
6389 		cik_update_rlc(rdev, tmp);
6390 	}
6391 }
6392 
6393 static const u32 mc_cg_registers[] =
6394 {
6395 	MC_HUB_MISC_HUB_CG,
6396 	MC_HUB_MISC_SIP_CG,
6397 	MC_HUB_MISC_VM_CG,
6398 	MC_XPB_CLK_GAT,
6399 	ATC_MISC_CG,
6400 	MC_CITF_MISC_WR_CG,
6401 	MC_CITF_MISC_RD_CG,
6402 	MC_CITF_MISC_VM_CG,
6403 	VM_L2_CG,
6404 };
6405 
6406 static void cik_enable_mc_ls(struct radeon_device *rdev,
6407 			     bool enable)
6408 {
6409 	int i;
6410 	u32 orig, data;
6411 
6412 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6413 		orig = data = RREG32(mc_cg_registers[i]);
6414 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6415 			data |= MC_LS_ENABLE;
6416 		else
6417 			data &= ~MC_LS_ENABLE;
6418 		if (data != orig)
6419 			WREG32(mc_cg_registers[i], data);
6420 	}
6421 }
6422 
6423 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6424 			       bool enable)
6425 {
6426 	int i;
6427 	u32 orig, data;
6428 
6429 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6430 		orig = data = RREG32(mc_cg_registers[i]);
6431 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6432 			data |= MC_CG_ENABLE;
6433 		else
6434 			data &= ~MC_CG_ENABLE;
6435 		if (data != orig)
6436 			WREG32(mc_cg_registers[i], data);
6437 	}
6438 }
6439 
6440 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6441 				 bool enable)
6442 {
6443 	u32 orig, data;
6444 
6445 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6446 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6447 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6448 	} else {
6449 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6450 		data |= 0xff000000;
6451 		if (data != orig)
6452 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6453 
6454 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6455 		data |= 0xff000000;
6456 		if (data != orig)
6457 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6458 	}
6459 }
6460 
6461 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6462 				 bool enable)
6463 {
6464 	u32 orig, data;
6465 
6466 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6467 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6468 		data |= 0x100;
6469 		if (orig != data)
6470 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6471 
6472 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6473 		data |= 0x100;
6474 		if (orig != data)
6475 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6476 	} else {
6477 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6478 		data &= ~0x100;
6479 		if (orig != data)
6480 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6481 
6482 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6483 		data &= ~0x100;
6484 		if (orig != data)
6485 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6486 	}
6487 }
6488 
6489 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6490 				bool enable)
6491 {
6492 	u32 orig, data;
6493 
6494 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6495 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6496 		data = 0xfff;
6497 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6498 
6499 		orig = data = RREG32(UVD_CGC_CTRL);
6500 		data |= DCM;
6501 		if (orig != data)
6502 			WREG32(UVD_CGC_CTRL, data);
6503 	} else {
6504 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6505 		data &= ~0xfff;
6506 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6507 
6508 		orig = data = RREG32(UVD_CGC_CTRL);
6509 		data &= ~DCM;
6510 		if (orig != data)
6511 			WREG32(UVD_CGC_CTRL, data);
6512 	}
6513 }
6514 
6515 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6516 			       bool enable)
6517 {
6518 	u32 orig, data;
6519 
6520 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6521 
6522 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6523 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6524 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6525 	else
6526 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6527 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6528 
6529 	if (orig != data)
6530 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6531 }
6532 
6533 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6534 				bool enable)
6535 {
6536 	u32 orig, data;
6537 
6538 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6539 
6540 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6541 		data &= ~CLOCK_GATING_DIS;
6542 	else
6543 		data |= CLOCK_GATING_DIS;
6544 
6545 	if (orig != data)
6546 		WREG32(HDP_HOST_PATH_CNTL, data);
6547 }
6548 
6549 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6550 			      bool enable)
6551 {
6552 	u32 orig, data;
6553 
6554 	orig = data = RREG32(HDP_MEM_POWER_LS);
6555 
6556 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6557 		data |= HDP_LS_ENABLE;
6558 	else
6559 		data &= ~HDP_LS_ENABLE;
6560 
6561 	if (orig != data)
6562 		WREG32(HDP_MEM_POWER_LS, data);
6563 }
6564 
6565 void cik_update_cg(struct radeon_device *rdev,
6566 		   u32 block, bool enable)
6567 {
6568 
6569 	if (block & RADEON_CG_BLOCK_GFX) {
6570 		cik_enable_gui_idle_interrupt(rdev, false);
6571 		/* order matters! */
6572 		if (enable) {
6573 			cik_enable_mgcg(rdev, true);
6574 			cik_enable_cgcg(rdev, true);
6575 		} else {
6576 			cik_enable_cgcg(rdev, false);
6577 			cik_enable_mgcg(rdev, false);
6578 		}
6579 		cik_enable_gui_idle_interrupt(rdev, true);
6580 	}
6581 
6582 	if (block & RADEON_CG_BLOCK_MC) {
6583 		if (!(rdev->flags & RADEON_IS_IGP)) {
6584 			cik_enable_mc_mgcg(rdev, enable);
6585 			cik_enable_mc_ls(rdev, enable);
6586 		}
6587 	}
6588 
6589 	if (block & RADEON_CG_BLOCK_SDMA) {
6590 		cik_enable_sdma_mgcg(rdev, enable);
6591 		cik_enable_sdma_mgls(rdev, enable);
6592 	}
6593 
6594 	if (block & RADEON_CG_BLOCK_BIF) {
6595 		cik_enable_bif_mgls(rdev, enable);
6596 	}
6597 
6598 	if (block & RADEON_CG_BLOCK_UVD) {
6599 		if (rdev->has_uvd)
6600 			cik_enable_uvd_mgcg(rdev, enable);
6601 	}
6602 
6603 	if (block & RADEON_CG_BLOCK_HDP) {
6604 		cik_enable_hdp_mgcg(rdev, enable);
6605 		cik_enable_hdp_ls(rdev, enable);
6606 	}
6607 
6608 	if (block & RADEON_CG_BLOCK_VCE) {
6609 		vce_v2_0_enable_mgcg(rdev, enable);
6610 	}
6611 }
6612 
6613 static void cik_init_cg(struct radeon_device *rdev)
6614 {
6615 
6616 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6617 
6618 	if (rdev->has_uvd)
6619 		si_init_uvd_internal_cg(rdev);
6620 
6621 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6622 			     RADEON_CG_BLOCK_SDMA |
6623 			     RADEON_CG_BLOCK_BIF |
6624 			     RADEON_CG_BLOCK_UVD |
6625 			     RADEON_CG_BLOCK_HDP), true);
6626 }
6627 
6628 static void cik_fini_cg(struct radeon_device *rdev)
6629 {
6630 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6631 			     RADEON_CG_BLOCK_SDMA |
6632 			     RADEON_CG_BLOCK_BIF |
6633 			     RADEON_CG_BLOCK_UVD |
6634 			     RADEON_CG_BLOCK_HDP), false);
6635 
6636 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6637 }
6638 
6639 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6640 					  bool enable)
6641 {
6642 	u32 data, orig;
6643 
6644 	orig = data = RREG32(RLC_PG_CNTL);
6645 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6646 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6647 	else
6648 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6649 	if (orig != data)
6650 		WREG32(RLC_PG_CNTL, data);
6651 }
6652 
6653 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6654 					  bool enable)
6655 {
6656 	u32 data, orig;
6657 
6658 	orig = data = RREG32(RLC_PG_CNTL);
6659 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6660 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6661 	else
6662 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6663 	if (orig != data)
6664 		WREG32(RLC_PG_CNTL, data);
6665 }
6666 
6667 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6668 {
6669 	u32 data, orig;
6670 
6671 	orig = data = RREG32(RLC_PG_CNTL);
6672 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6673 		data &= ~DISABLE_CP_PG;
6674 	else
6675 		data |= DISABLE_CP_PG;
6676 	if (orig != data)
6677 		WREG32(RLC_PG_CNTL, data);
6678 }
6679 
6680 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6681 {
6682 	u32 data, orig;
6683 
6684 	orig = data = RREG32(RLC_PG_CNTL);
6685 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6686 		data &= ~DISABLE_GDS_PG;
6687 	else
6688 		data |= DISABLE_GDS_PG;
6689 	if (orig != data)
6690 		WREG32(RLC_PG_CNTL, data);
6691 }
6692 
6693 #define CP_ME_TABLE_SIZE    96
6694 #define CP_ME_TABLE_OFFSET  2048
6695 #define CP_MEC_TABLE_OFFSET 4096
6696 
6697 void cik_init_cp_pg_table(struct radeon_device *rdev)
6698 {
6699 	volatile u32 *dst_ptr;
6700 	int me, i, max_me = 4;
6701 	u32 bo_offset = 0;
6702 	u32 table_offset, table_size;
6703 
6704 	if (rdev->family == CHIP_KAVERI)
6705 		max_me = 5;
6706 
6707 	if (rdev->rlc.cp_table_ptr == NULL)
6708 		return;
6709 
6710 	/* write the cp table buffer */
6711 	dst_ptr = rdev->rlc.cp_table_ptr;
6712 	for (me = 0; me < max_me; me++) {
6713 		if (rdev->new_fw) {
6714 			const __le32 *fw_data;
6715 			const struct gfx_firmware_header_v1_0 *hdr;
6716 
6717 			if (me == 0) {
6718 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6719 				fw_data = (const __le32 *)
6720 					((const char *)rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6721 				table_offset = le32_to_cpu(hdr->jt_offset);
6722 				table_size = le32_to_cpu(hdr->jt_size);
6723 			} else if (me == 1) {
6724 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6725 				fw_data = (const __le32 *)
6726 					((const char *)rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6727 				table_offset = le32_to_cpu(hdr->jt_offset);
6728 				table_size = le32_to_cpu(hdr->jt_size);
6729 			} else if (me == 2) {
6730 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6731 				fw_data = (const __le32 *)
6732 					((const char *)rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6733 				table_offset = le32_to_cpu(hdr->jt_offset);
6734 				table_size = le32_to_cpu(hdr->jt_size);
6735 			} else if (me == 3) {
6736 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6737 				fw_data = (const __le32 *)
6738 					((const char *)rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6739 				table_offset = le32_to_cpu(hdr->jt_offset);
6740 				table_size = le32_to_cpu(hdr->jt_size);
6741 			} else {
6742 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6743 				fw_data = (const __le32 *)
6744 					((const char *)rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6745 				table_offset = le32_to_cpu(hdr->jt_offset);
6746 				table_size = le32_to_cpu(hdr->jt_size);
6747 			}
6748 
6749 			for (i = 0; i < table_size; i ++) {
6750 				dst_ptr[bo_offset + i] =
6751 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6752 			}
6753 			bo_offset += table_size;
6754 		} else {
6755 			const __be32 *fw_data;
6756 			table_size = CP_ME_TABLE_SIZE;
6757 
6758 			if (me == 0) {
6759 				fw_data = (const __be32 *)rdev->ce_fw->data;
6760 				table_offset = CP_ME_TABLE_OFFSET;
6761 			} else if (me == 1) {
6762 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6763 				table_offset = CP_ME_TABLE_OFFSET;
6764 			} else if (me == 2) {
6765 				fw_data = (const __be32 *)rdev->me_fw->data;
6766 				table_offset = CP_ME_TABLE_OFFSET;
6767 			} else {
6768 				fw_data = (const __be32 *)rdev->mec_fw->data;
6769 				table_offset = CP_MEC_TABLE_OFFSET;
6770 			}
6771 
6772 			for (i = 0; i < table_size; i ++) {
6773 				dst_ptr[bo_offset + i] =
6774 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6775 			}
6776 			bo_offset += table_size;
6777 		}
6778 	}
6779 }
6780 
6781 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6782 				bool enable)
6783 {
6784 	u32 data, orig;
6785 
6786 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6787 		orig = data = RREG32(RLC_PG_CNTL);
6788 		data |= GFX_PG_ENABLE;
6789 		if (orig != data)
6790 			WREG32(RLC_PG_CNTL, data);
6791 
6792 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6793 		data |= AUTO_PG_EN;
6794 		if (orig != data)
6795 			WREG32(RLC_AUTO_PG_CTRL, data);
6796 	} else {
6797 		orig = data = RREG32(RLC_PG_CNTL);
6798 		data &= ~GFX_PG_ENABLE;
6799 		if (orig != data)
6800 			WREG32(RLC_PG_CNTL, data);
6801 
6802 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6803 		data &= ~AUTO_PG_EN;
6804 		if (orig != data)
6805 			WREG32(RLC_AUTO_PG_CTRL, data);
6806 
6807 		data = RREG32(DB_RENDER_CONTROL);
6808 	}
6809 }
6810 
6811 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6812 {
6813 	u32 mask = 0, tmp, tmp1;
6814 	int i;
6815 
6816 	cik_select_se_sh(rdev, se, sh);
6817 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6818 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6819 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6820 
6821 	tmp &= 0xffff0000;
6822 
6823 	tmp |= tmp1;
6824 	tmp >>= 16;
6825 
6826 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6827 		mask <<= 1;
6828 		mask |= 1;
6829 	}
6830 
6831 	return (~tmp) & mask;
6832 }
6833 
6834 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6835 {
6836 	u32 i, j, k, active_cu_number = 0;
6837 	u32 mask, counter, cu_bitmap;
6838 	u32 tmp = 0;
6839 
6840 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6841 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6842 			mask = 1;
6843 			cu_bitmap = 0;
6844 			counter = 0;
6845 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6846 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6847 					if (counter < 2)
6848 						cu_bitmap |= mask;
6849 					counter ++;
6850 				}
6851 				mask <<= 1;
6852 			}
6853 
6854 			active_cu_number += counter;
6855 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6856 		}
6857 	}
6858 
6859 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6860 
6861 	tmp = RREG32(RLC_MAX_PG_CU);
6862 	tmp &= ~MAX_PU_CU_MASK;
6863 	tmp |= MAX_PU_CU(active_cu_number);
6864 	WREG32(RLC_MAX_PG_CU, tmp);
6865 }
6866 
6867 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6868 				       bool enable)
6869 {
6870 	u32 data, orig;
6871 
6872 	orig = data = RREG32(RLC_PG_CNTL);
6873 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6874 		data |= STATIC_PER_CU_PG_ENABLE;
6875 	else
6876 		data &= ~STATIC_PER_CU_PG_ENABLE;
6877 	if (orig != data)
6878 		WREG32(RLC_PG_CNTL, data);
6879 }
6880 
6881 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6882 					bool enable)
6883 {
6884 	u32 data, orig;
6885 
6886 	orig = data = RREG32(RLC_PG_CNTL);
6887 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6888 		data |= DYN_PER_CU_PG_ENABLE;
6889 	else
6890 		data &= ~DYN_PER_CU_PG_ENABLE;
6891 	if (orig != data)
6892 		WREG32(RLC_PG_CNTL, data);
6893 }
6894 
6895 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6896 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6897 
6898 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6899 {
6900 	u32 data, orig;
6901 	u32 i;
6902 
6903 	if (rdev->rlc.cs_data) {
6904 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6905 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6906 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6907 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6908 	} else {
6909 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6910 		for (i = 0; i < 3; i++)
6911 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6912 	}
6913 	if (rdev->rlc.reg_list) {
6914 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6915 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6916 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6917 	}
6918 
6919 	orig = data = RREG32(RLC_PG_CNTL);
6920 	data |= GFX_PG_SRC;
6921 	if (orig != data)
6922 		WREG32(RLC_PG_CNTL, data);
6923 
6924 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6925 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6926 
6927 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6928 	data &= ~IDLE_POLL_COUNT_MASK;
6929 	data |= IDLE_POLL_COUNT(0x60);
6930 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6931 
6932 	data = 0x10101010;
6933 	WREG32(RLC_PG_DELAY, data);
6934 
6935 	data = RREG32(RLC_PG_DELAY_2);
6936 	data &= ~0xff;
6937 	data |= 0x3;
6938 	WREG32(RLC_PG_DELAY_2, data);
6939 
6940 	data = RREG32(RLC_AUTO_PG_CTRL);
6941 	data &= ~GRBM_REG_SGIT_MASK;
6942 	data |= GRBM_REG_SGIT(0x700);
6943 	WREG32(RLC_AUTO_PG_CTRL, data);
6944 
6945 }
6946 
6947 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6948 {
6949 	cik_enable_gfx_cgpg(rdev, enable);
6950 	cik_enable_gfx_static_mgpg(rdev, enable);
6951 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6952 }
6953 
6954 u32 cik_get_csb_size(struct radeon_device *rdev)
6955 {
6956 	u32 count = 0;
6957 	const struct cs_section_def *sect = NULL;
6958 	const struct cs_extent_def *ext = NULL;
6959 
6960 	if (rdev->rlc.cs_data == NULL)
6961 		return 0;
6962 
6963 	/* begin clear state */
6964 	count += 2;
6965 	/* context control state */
6966 	count += 3;
6967 
6968 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6969 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6970 			if (sect->id == SECT_CONTEXT)
6971 				count += 2 + ext->reg_count;
6972 			else
6973 				return 0;
6974 		}
6975 	}
6976 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6977 	count += 4;
6978 	/* end clear state */
6979 	count += 2;
6980 	/* clear state */
6981 	count += 2;
6982 
6983 	return count;
6984 }
6985 
6986 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6987 {
6988 	u32 count = 0, i;
6989 	const struct cs_section_def *sect = NULL;
6990 	const struct cs_extent_def *ext = NULL;
6991 
6992 	if (rdev->rlc.cs_data == NULL)
6993 		return;
6994 	if (buffer == NULL)
6995 		return;
6996 
6997 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6998 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6999 
7000 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7001 	buffer[count++] = cpu_to_le32(0x80000000);
7002 	buffer[count++] = cpu_to_le32(0x80000000);
7003 
7004 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7005 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7006 			if (sect->id == SECT_CONTEXT) {
7007 				buffer[count++] =
7008 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7009 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7010 				for (i = 0; i < ext->reg_count; i++)
7011 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7012 			} else {
7013 				return;
7014 			}
7015 		}
7016 	}
7017 
7018 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7019 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7020 	switch (rdev->family) {
7021 	case CHIP_BONAIRE:
7022 		buffer[count++] = cpu_to_le32(0x16000012);
7023 		buffer[count++] = cpu_to_le32(0x00000000);
7024 		break;
7025 	case CHIP_KAVERI:
7026 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7027 		buffer[count++] = cpu_to_le32(0x00000000);
7028 		break;
7029 	case CHIP_KABINI:
7030 	case CHIP_MULLINS:
7031 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7032 		buffer[count++] = cpu_to_le32(0x00000000);
7033 		break;
7034 	case CHIP_HAWAII:
7035 		buffer[count++] = cpu_to_le32(0x3a00161a);
7036 		buffer[count++] = cpu_to_le32(0x0000002e);
7037 		break;
7038 	default:
7039 		buffer[count++] = cpu_to_le32(0x00000000);
7040 		buffer[count++] = cpu_to_le32(0x00000000);
7041 		break;
7042 	}
7043 
7044 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7045 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7046 
7047 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7048 	buffer[count++] = cpu_to_le32(0);
7049 }
7050 
7051 static void cik_init_pg(struct radeon_device *rdev)
7052 {
7053 	if (rdev->pg_flags) {
7054 		cik_enable_sck_slowdown_on_pu(rdev, true);
7055 		cik_enable_sck_slowdown_on_pd(rdev, true);
7056 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7057 			cik_init_gfx_cgpg(rdev);
7058 			cik_enable_cp_pg(rdev, true);
7059 			cik_enable_gds_pg(rdev, true);
7060 		}
7061 		cik_init_ao_cu_mask(rdev);
7062 		cik_update_gfx_pg(rdev, true);
7063 	}
7064 }
7065 
7066 static void cik_fini_pg(struct radeon_device *rdev)
7067 {
7068 	if (rdev->pg_flags) {
7069 		cik_update_gfx_pg(rdev, false);
7070 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7071 			cik_enable_cp_pg(rdev, false);
7072 			cik_enable_gds_pg(rdev, false);
7073 		}
7074 	}
7075 }
7076 
7077 /*
7078  * Interrupts
7079  * Starting with r6xx, interrupts are handled via a ring buffer.
7080  * Ring buffers are areas of GPU accessible memory that the GPU
7081  * writes interrupt vectors into and the host reads vectors out of.
7082  * There is a rptr (read pointer) that determines where the
7083  * host is currently reading, and a wptr (write pointer)
7084  * which determines where the GPU has written.  When the
7085  * pointers are equal, the ring is idle.  When the GPU
7086  * writes vectors to the ring buffer, it increments the
7087  * wptr.  When there is an interrupt, the host then starts
7088  * fetching commands and processing them until the pointers are
7089  * equal again at which point it updates the rptr.
7090  */
7091 
7092 /**
7093  * cik_enable_interrupts - Enable the interrupt ring buffer
7094  *
7095  * @rdev: radeon_device pointer
7096  *
7097  * Enable the interrupt ring buffer (CIK).
7098  */
7099 static void cik_enable_interrupts(struct radeon_device *rdev)
7100 {
7101 	u32 ih_cntl = RREG32(IH_CNTL);
7102 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7103 
7104 	ih_cntl |= ENABLE_INTR;
7105 	ih_rb_cntl |= IH_RB_ENABLE;
7106 	WREG32(IH_CNTL, ih_cntl);
7107 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7108 	rdev->ih.enabled = true;
7109 }
7110 
7111 /**
7112  * cik_disable_interrupts - Disable the interrupt ring buffer
7113  *
7114  * @rdev: radeon_device pointer
7115  *
7116  * Disable the interrupt ring buffer (CIK).
7117  */
7118 static void cik_disable_interrupts(struct radeon_device *rdev)
7119 {
7120 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7121 	u32 ih_cntl = RREG32(IH_CNTL);
7122 
7123 	ih_rb_cntl &= ~IH_RB_ENABLE;
7124 	ih_cntl &= ~ENABLE_INTR;
7125 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7126 	WREG32(IH_CNTL, ih_cntl);
7127 	/* set rptr, wptr to 0 */
7128 	WREG32(IH_RB_RPTR, 0);
7129 	WREG32(IH_RB_WPTR, 0);
7130 	rdev->ih.enabled = false;
7131 	rdev->ih.rptr = 0;
7132 }
7133 
7134 /**
7135  * cik_disable_interrupt_state - Disable all interrupt sources
7136  *
7137  * @rdev: radeon_device pointer
7138  *
7139  * Clear all interrupt enable bits used by the driver (CIK).
7140  */
7141 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7142 {
7143 	u32 tmp;
7144 
7145 	/* gfx ring */
7146 	tmp = RREG32(CP_INT_CNTL_RING0) &
7147 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7148 	WREG32(CP_INT_CNTL_RING0, tmp);
7149 	/* sdma */
7150 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7151 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7152 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7153 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7154 	/* compute queues */
7155 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7156 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7157 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7158 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7159 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7160 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7161 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7162 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7163 	/* grbm */
7164 	WREG32(GRBM_INT_CNTL, 0);
7165 	/* vline/vblank, etc. */
7166 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7167 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7168 	if (rdev->num_crtc >= 4) {
7169 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7170 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7171 	}
7172 	if (rdev->num_crtc >= 6) {
7173 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7174 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7175 	}
7176 	/* pflip */
7177 	if (rdev->num_crtc >= 2) {
7178 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7179 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7180 	}
7181 	if (rdev->num_crtc >= 4) {
7182 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7183 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7184 	}
7185 	if (rdev->num_crtc >= 6) {
7186 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7187 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7188 	}
7189 
7190 	/* dac hotplug */
7191 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7192 
7193 	/* digital hotplug */
7194 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7195 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7196 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7197 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7198 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7199 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7200 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7201 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7202 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7203 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7204 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7205 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7206 
7207 }
7208 
7209 /**
7210  * cik_irq_init - init and enable the interrupt ring
7211  *
7212  * @rdev: radeon_device pointer
7213  *
7214  * Allocate a ring buffer for the interrupt controller,
7215  * enable the RLC, disable interrupts, enable the IH
7216  * ring buffer and enable it (CIK).
7217  * Called at device load and reume.
7218  * Returns 0 for success, errors for failure.
7219  */
7220 static int cik_irq_init(struct radeon_device *rdev)
7221 {
7222 	int ret = 0;
7223 	int rb_bufsz;
7224 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7225 
7226 	/* allocate ring */
7227 	ret = r600_ih_ring_alloc(rdev);
7228 	if (ret)
7229 		return ret;
7230 
7231 	/* disable irqs */
7232 	cik_disable_interrupts(rdev);
7233 
7234 	/* init rlc */
7235 	ret = cik_rlc_resume(rdev);
7236 	if (ret) {
7237 		r600_ih_ring_fini(rdev);
7238 		return ret;
7239 	}
7240 
7241 	/* setup interrupt control */
7242 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7243 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7244 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7245 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7246 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7247 	 */
7248 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7249 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7250 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7251 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7252 
7253 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7254 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7255 
7256 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7257 		      IH_WPTR_OVERFLOW_CLEAR |
7258 		      (rb_bufsz << 1));
7259 
7260 	if (rdev->wb.enabled)
7261 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7262 
7263 	/* set the writeback address whether it's enabled or not */
7264 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7265 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7266 
7267 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7268 
7269 	/* set rptr, wptr to 0 */
7270 	WREG32(IH_RB_RPTR, 0);
7271 	WREG32(IH_RB_WPTR, 0);
7272 
7273 	/* Default settings for IH_CNTL (disabled at first) */
7274 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7275 	/* RPTR_REARM only works if msi's are enabled */
7276 	if (rdev->msi_enabled)
7277 		ih_cntl |= RPTR_REARM;
7278 	WREG32(IH_CNTL, ih_cntl);
7279 
7280 	/* force the active interrupt state to all disabled */
7281 	cik_disable_interrupt_state(rdev);
7282 
7283 	pci_enable_busmaster(rdev->pdev->dev.bsddev);
7284 
7285 	/* enable irqs */
7286 	cik_enable_interrupts(rdev);
7287 
7288 	return ret;
7289 }
7290 
7291 /**
7292  * cik_irq_set - enable/disable interrupt sources
7293  *
7294  * @rdev: radeon_device pointer
7295  *
7296  * Enable interrupt sources on the GPU (vblanks, hpd,
7297  * etc.) (CIK).
7298  * Returns 0 for success, errors for failure.
7299  */
7300 int cik_irq_set(struct radeon_device *rdev)
7301 {
7302 	u32 cp_int_cntl;
7303 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7304 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7305 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7306 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7307 	u32 grbm_int_cntl = 0;
7308 	u32 dma_cntl, dma_cntl1;
7309 	u32 thermal_int;
7310 
7311 	if (!rdev->irq.installed) {
7312 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7313 		return -EINVAL;
7314 	}
7315 	/* don't enable anything if the ih is disabled */
7316 	if (!rdev->ih.enabled) {
7317 		cik_disable_interrupts(rdev);
7318 		/* force the active interrupt state to all disabled */
7319 		cik_disable_interrupt_state(rdev);
7320 		return 0;
7321 	}
7322 
7323 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7324 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7325 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7326 
7327 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7328 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7329 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7330 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7331 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7332 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7333 
7334 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7335 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7336 
7337 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7338 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7339 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7340 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7341 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7342 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7343 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7344 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7345 
7346 	if (rdev->flags & RADEON_IS_IGP)
7347 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7348 			~(THERM_INTH_MASK | THERM_INTL_MASK);
7349 	else
7350 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7351 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7352 
7353 	/* enable CP interrupts on all rings */
7354 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7355 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7356 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7357 	}
7358 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7359 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7360 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7361 		if (ring->me == 1) {
7362 			switch (ring->pipe) {
7363 			case 0:
7364 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7365 				break;
7366 			case 1:
7367 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7368 				break;
7369 			case 2:
7370 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7371 				break;
7372 			case 3:
7373 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7374 				break;
7375 			default:
7376 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7377 				break;
7378 			}
7379 		} else if (ring->me == 2) {
7380 			switch (ring->pipe) {
7381 			case 0:
7382 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7383 				break;
7384 			case 1:
7385 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7386 				break;
7387 			case 2:
7388 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7389 				break;
7390 			case 3:
7391 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7392 				break;
7393 			default:
7394 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7395 				break;
7396 			}
7397 		} else {
7398 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7399 		}
7400 	}
7401 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7402 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7403 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7404 		if (ring->me == 1) {
7405 			switch (ring->pipe) {
7406 			case 0:
7407 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7408 				break;
7409 			case 1:
7410 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7411 				break;
7412 			case 2:
7413 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7414 				break;
7415 			case 3:
7416 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7417 				break;
7418 			default:
7419 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7420 				break;
7421 			}
7422 		} else if (ring->me == 2) {
7423 			switch (ring->pipe) {
7424 			case 0:
7425 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7426 				break;
7427 			case 1:
7428 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7429 				break;
7430 			case 2:
7431 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7432 				break;
7433 			case 3:
7434 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7435 				break;
7436 			default:
7437 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7438 				break;
7439 			}
7440 		} else {
7441 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7442 		}
7443 	}
7444 
7445 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7446 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7447 		dma_cntl |= TRAP_ENABLE;
7448 	}
7449 
7450 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7451 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7452 		dma_cntl1 |= TRAP_ENABLE;
7453 	}
7454 
7455 	if (rdev->irq.crtc_vblank_int[0] ||
7456 	    atomic_read(&rdev->irq.pflip[0])) {
7457 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7458 		crtc1 |= VBLANK_INTERRUPT_MASK;
7459 	}
7460 	if (rdev->irq.crtc_vblank_int[1] ||
7461 	    atomic_read(&rdev->irq.pflip[1])) {
7462 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7463 		crtc2 |= VBLANK_INTERRUPT_MASK;
7464 	}
7465 	if (rdev->irq.crtc_vblank_int[2] ||
7466 	    atomic_read(&rdev->irq.pflip[2])) {
7467 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7468 		crtc3 |= VBLANK_INTERRUPT_MASK;
7469 	}
7470 	if (rdev->irq.crtc_vblank_int[3] ||
7471 	    atomic_read(&rdev->irq.pflip[3])) {
7472 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7473 		crtc4 |= VBLANK_INTERRUPT_MASK;
7474 	}
7475 	if (rdev->irq.crtc_vblank_int[4] ||
7476 	    atomic_read(&rdev->irq.pflip[4])) {
7477 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7478 		crtc5 |= VBLANK_INTERRUPT_MASK;
7479 	}
7480 	if (rdev->irq.crtc_vblank_int[5] ||
7481 	    atomic_read(&rdev->irq.pflip[5])) {
7482 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7483 		crtc6 |= VBLANK_INTERRUPT_MASK;
7484 	}
7485 	if (rdev->irq.hpd[0]) {
7486 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7487 		hpd1 |= DC_HPDx_INT_EN;
7488 	}
7489 	if (rdev->irq.hpd[1]) {
7490 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7491 		hpd2 |= DC_HPDx_INT_EN;
7492 	}
7493 	if (rdev->irq.hpd[2]) {
7494 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7495 		hpd3 |= DC_HPDx_INT_EN;
7496 	}
7497 	if (rdev->irq.hpd[3]) {
7498 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7499 		hpd4 |= DC_HPDx_INT_EN;
7500 	}
7501 	if (rdev->irq.hpd[4]) {
7502 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7503 		hpd5 |= DC_HPDx_INT_EN;
7504 	}
7505 	if (rdev->irq.hpd[5]) {
7506 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7507 		hpd6 |= DC_HPDx_INT_EN;
7508 	}
7509 
7510 	if (rdev->irq.dpm_thermal) {
7511 		DRM_DEBUG("dpm thermal\n");
7512 		if (rdev->flags & RADEON_IS_IGP)
7513 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7514 		else
7515 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7516 	}
7517 
7518 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7519 
7520 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7521 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7522 
7523 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7524 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7525 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7526 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7527 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7528 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7529 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7530 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7531 
7532 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7533 
7534 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7535 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7536 	if (rdev->num_crtc >= 4) {
7537 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7538 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7539 	}
7540 	if (rdev->num_crtc >= 6) {
7541 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7542 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7543 	}
7544 
7545 	if (rdev->num_crtc >= 2) {
7546 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7547 		       GRPH_PFLIP_INT_MASK);
7548 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7549 		       GRPH_PFLIP_INT_MASK);
7550 	}
7551 	if (rdev->num_crtc >= 4) {
7552 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7553 		       GRPH_PFLIP_INT_MASK);
7554 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7555 		       GRPH_PFLIP_INT_MASK);
7556 	}
7557 	if (rdev->num_crtc >= 6) {
7558 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7559 		       GRPH_PFLIP_INT_MASK);
7560 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7561 		       GRPH_PFLIP_INT_MASK);
7562 	}
7563 
7564 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7565 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7566 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7567 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7568 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7569 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7570 
7571 	if (rdev->flags & RADEON_IS_IGP)
7572 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7573 	else
7574 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7575 
7576 	return 0;
7577 }
7578 
7579 /**
7580  * cik_irq_ack - ack interrupt sources
7581  *
7582  * @rdev: radeon_device pointer
7583  *
7584  * Ack interrupt sources on the GPU (vblanks, hpd,
7585  * etc.) (CIK).  Certain interrupts sources are sw
7586  * generated and do not require an explicit ack.
7587  */
7588 static inline void cik_irq_ack(struct radeon_device *rdev)
7589 {
7590 	u32 tmp;
7591 
7592 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7593 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7594 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7595 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7596 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7597 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7598 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7599 
7600 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7601 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7602 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7603 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7604 	if (rdev->num_crtc >= 4) {
7605 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7606 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7607 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7608 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7609 	}
7610 	if (rdev->num_crtc >= 6) {
7611 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7612 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7613 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7614 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7615 	}
7616 
7617 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7618 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7619 		       GRPH_PFLIP_INT_CLEAR);
7620 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7621 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7622 		       GRPH_PFLIP_INT_CLEAR);
7623 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7624 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7625 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7626 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7627 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7628 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7629 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7630 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7631 
7632 	if (rdev->num_crtc >= 4) {
7633 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7634 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7635 			       GRPH_PFLIP_INT_CLEAR);
7636 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7637 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7638 			       GRPH_PFLIP_INT_CLEAR);
7639 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7640 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7641 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7642 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7643 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7644 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7645 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7646 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7647 	}
7648 
7649 	if (rdev->num_crtc >= 6) {
7650 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7651 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7652 			       GRPH_PFLIP_INT_CLEAR);
7653 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7654 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7655 			       GRPH_PFLIP_INT_CLEAR);
7656 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7657 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7658 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7659 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7660 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7661 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7662 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7663 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7664 	}
7665 
7666 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7667 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7668 		tmp |= DC_HPDx_INT_ACK;
7669 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7670 	}
7671 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7672 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7673 		tmp |= DC_HPDx_INT_ACK;
7674 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7675 	}
7676 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7677 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7678 		tmp |= DC_HPDx_INT_ACK;
7679 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7680 	}
7681 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7682 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7683 		tmp |= DC_HPDx_INT_ACK;
7684 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7685 	}
7686 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7687 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7688 		tmp |= DC_HPDx_INT_ACK;
7689 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7690 	}
7691 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7692 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7693 		tmp |= DC_HPDx_INT_ACK;
7694 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7695 	}
7696 }
7697 
7698 /**
7699  * cik_irq_disable - disable interrupts
7700  *
7701  * @rdev: radeon_device pointer
7702  *
7703  * Disable interrupts on the hw (CIK).
7704  */
7705 static void cik_irq_disable(struct radeon_device *rdev)
7706 {
7707 	cik_disable_interrupts(rdev);
7708 	/* Wait and acknowledge irq */
7709 	mdelay(1);
7710 	cik_irq_ack(rdev);
7711 	cik_disable_interrupt_state(rdev);
7712 }
7713 
7714 /**
7715  * cik_irq_disable - disable interrupts for suspend
7716  *
7717  * @rdev: radeon_device pointer
7718  *
7719  * Disable interrupts and stop the RLC (CIK).
7720  * Used for suspend.
7721  */
7722 static void cik_irq_suspend(struct radeon_device *rdev)
7723 {
7724 	cik_irq_disable(rdev);
7725 	cik_rlc_stop(rdev);
7726 }
7727 
7728 /**
7729  * cik_irq_fini - tear down interrupt support
7730  *
7731  * @rdev: radeon_device pointer
7732  *
7733  * Disable interrupts on the hw and free the IH ring
7734  * buffer (CIK).
7735  * Used for driver unload.
7736  */
7737 static void cik_irq_fini(struct radeon_device *rdev)
7738 {
7739 	cik_irq_suspend(rdev);
7740 	r600_ih_ring_fini(rdev);
7741 }
7742 
7743 /**
7744  * cik_get_ih_wptr - get the IH ring buffer wptr
7745  *
7746  * @rdev: radeon_device pointer
7747  *
7748  * Get the IH ring buffer wptr from either the register
7749  * or the writeback memory buffer (CIK).  Also check for
7750  * ring buffer overflow and deal with it.
7751  * Used by cik_irq_process().
7752  * Returns the value of the wptr.
7753  */
7754 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7755 {
7756 	u32 wptr, tmp;
7757 
7758 	if (rdev->wb.enabled)
7759 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7760 	else
7761 		wptr = RREG32(IH_RB_WPTR);
7762 
7763 	if (wptr & RB_OVERFLOW) {
7764 		wptr &= ~RB_OVERFLOW;
7765 		/* When a ring buffer overflow happen start parsing interrupt
7766 		 * from the last not overwritten vector (wptr + 16). Hopefully
7767 		 * this should allow us to catchup.
7768 		 */
7769 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7770 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7771 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7772 		tmp = RREG32(IH_RB_CNTL);
7773 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7774 		WREG32(IH_RB_CNTL, tmp);
7775 	}
7776 	return (wptr & rdev->ih.ptr_mask);
7777 }
7778 
7779 /*        CIK IV Ring
7780  * Each IV ring entry is 128 bits:
7781  * [7:0]    - interrupt source id
7782  * [31:8]   - reserved
7783  * [59:32]  - interrupt source data
7784  * [63:60]  - reserved
7785  * [71:64]  - RINGID
7786  *            CP:
7787  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7788  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7789  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7790  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7791  *            PIPE_ID - ME0 0=3D
7792  *                    - ME1&2 compute dispatcher (4 pipes each)
7793  *            SDMA:
7794  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7795  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7796  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7797  * [79:72]  - VMID
7798  * [95:80]  - PASID
7799  * [127:96] - reserved
7800  */
7801 /**
7802  * cik_irq_process - interrupt handler
7803  *
7804  * @rdev: radeon_device pointer
7805  *
7806  * Interrupt hander (CIK).  Walk the IH ring,
7807  * ack interrupts and schedule work to handle
7808  * interrupt events.
7809  * Returns irq process return code.
7810  */
7811 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7812 {
7813 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7814 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7815 	u32 wptr;
7816 	u32 rptr;
7817 	u32 src_id, src_data, ring_id;
7818 	u8 me_id, pipe_id, queue_id;
7819 	u32 ring_index;
7820 	bool queue_hotplug = false;
7821 	bool queue_reset = false;
7822 	u32 addr, status, mc_client;
7823 	bool queue_thermal = false;
7824 
7825 	if (!rdev->ih.enabled || rdev->shutdown)
7826 		return IRQ_NONE;
7827 
7828 	wptr = cik_get_ih_wptr(rdev);
7829 
7830 restart_ih:
7831 	/* is somebody else already processing irqs? */
7832 	if (atomic_xchg(&rdev->ih.lock, 1))
7833 		return IRQ_NONE;
7834 
7835 	rptr = rdev->ih.rptr;
7836 	DRM_DEBUG_VBLANK("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7837 
7838 	/* Order reading of wptr vs. reading of IH ring data */
7839 	rmb();
7840 
7841 	/* display interrupts */
7842 	cik_irq_ack(rdev);
7843 
7844 	while (rptr != wptr) {
7845 		/* wptr/rptr are in bytes! */
7846 		ring_index = rptr / 4;
7847 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7848 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7849 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7850 
7851 		switch (src_id) {
7852 		case 1: /* D1 vblank/vline */
7853 			switch (src_data) {
7854 			case 0: /* D1 vblank */
7855 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7856 					if (rdev->irq.crtc_vblank_int[0]) {
7857 						drm_handle_vblank(rdev->ddev, 0);
7858 						rdev->pm.vblank_sync = true;
7859 						wake_up(&rdev->irq.vblank_queue);
7860 					}
7861 					if (atomic_read(&rdev->irq.pflip[0]))
7862 						radeon_crtc_handle_vblank(rdev, 0);
7863 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7864 					DRM_DEBUG_VBLANK("IH: D1 vblank\n");
7865 				}
7866 				break;
7867 			case 1: /* D1 vline */
7868 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7869 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7870 					DRM_DEBUG_VBLANK("IH: D1 vline\n");
7871 				}
7872 				break;
7873 			default:
7874 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7875 				break;
7876 			}
7877 			break;
7878 		case 2: /* D2 vblank/vline */
7879 			switch (src_data) {
7880 			case 0: /* D2 vblank */
7881 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7882 					if (rdev->irq.crtc_vblank_int[1]) {
7883 						drm_handle_vblank(rdev->ddev, 1);
7884 						rdev->pm.vblank_sync = true;
7885 						wake_up(&rdev->irq.vblank_queue);
7886 					}
7887 					if (atomic_read(&rdev->irq.pflip[1]))
7888 						radeon_crtc_handle_vblank(rdev, 1);
7889 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7890 					DRM_DEBUG_VBLANK("IH: D2 vblank\n");
7891 				}
7892 				break;
7893 			case 1: /* D2 vline */
7894 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7895 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7896 					DRM_DEBUG_VBLANK("IH: D2 vline\n");
7897 				}
7898 				break;
7899 			default:
7900 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7901 				break;
7902 			}
7903 			break;
7904 		case 3: /* D3 vblank/vline */
7905 			switch (src_data) {
7906 			case 0: /* D3 vblank */
7907 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7908 					if (rdev->irq.crtc_vblank_int[2]) {
7909 						drm_handle_vblank(rdev->ddev, 2);
7910 						rdev->pm.vblank_sync = true;
7911 						wake_up(&rdev->irq.vblank_queue);
7912 					}
7913 					if (atomic_read(&rdev->irq.pflip[2]))
7914 						radeon_crtc_handle_vblank(rdev, 2);
7915 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7916 					DRM_DEBUG_VBLANK("IH: D3 vblank\n");
7917 				}
7918 				break;
7919 			case 1: /* D3 vline */
7920 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7921 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7922 					DRM_DEBUG_VBLANK("IH: D3 vline\n");
7923 				}
7924 				break;
7925 			default:
7926 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7927 				break;
7928 			}
7929 			break;
7930 		case 4: /* D4 vblank/vline */
7931 			switch (src_data) {
7932 			case 0: /* D4 vblank */
7933 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7934 					if (rdev->irq.crtc_vblank_int[3]) {
7935 						drm_handle_vblank(rdev->ddev, 3);
7936 						rdev->pm.vblank_sync = true;
7937 						wake_up(&rdev->irq.vblank_queue);
7938 					}
7939 					if (atomic_read(&rdev->irq.pflip[3]))
7940 						radeon_crtc_handle_vblank(rdev, 3);
7941 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7942 					DRM_DEBUG_VBLANK("IH: D4 vblank\n");
7943 				}
7944 				break;
7945 			case 1: /* D4 vline */
7946 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7947 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7948 					DRM_DEBUG_VBLANK("IH: D4 vline\n");
7949 				}
7950 				break;
7951 			default:
7952 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7953 				break;
7954 			}
7955 			break;
7956 		case 5: /* D5 vblank/vline */
7957 			switch (src_data) {
7958 			case 0: /* D5 vblank */
7959 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7960 					if (rdev->irq.crtc_vblank_int[4]) {
7961 						drm_handle_vblank(rdev->ddev, 4);
7962 						rdev->pm.vblank_sync = true;
7963 						wake_up(&rdev->irq.vblank_queue);
7964 					}
7965 					if (atomic_read(&rdev->irq.pflip[4]))
7966 						radeon_crtc_handle_vblank(rdev, 4);
7967 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7968 					DRM_DEBUG_VBLANK("IH: D5 vblank\n");
7969 				}
7970 				break;
7971 			case 1: /* D5 vline */
7972 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7973 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7974 					DRM_DEBUG_VBLANK("IH: D5 vline\n");
7975 				}
7976 				break;
7977 			default:
7978 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7979 				break;
7980 			}
7981 			break;
7982 		case 6: /* D6 vblank/vline */
7983 			switch (src_data) {
7984 			case 0: /* D6 vblank */
7985 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7986 					if (rdev->irq.crtc_vblank_int[5]) {
7987 						drm_handle_vblank(rdev->ddev, 5);
7988 						rdev->pm.vblank_sync = true;
7989 						wake_up(&rdev->irq.vblank_queue);
7990 					}
7991 					if (atomic_read(&rdev->irq.pflip[5]))
7992 						radeon_crtc_handle_vblank(rdev, 5);
7993 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7994 					DRM_DEBUG_VBLANK("IH: D6 vblank\n");
7995 				}
7996 				break;
7997 			case 1: /* D6 vline */
7998 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7999 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8000 					DRM_DEBUG_VBLANK("IH: D6 vline\n");
8001 				}
8002 				break;
8003 			default:
8004 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8005 				break;
8006 			}
8007 			break;
8008 		case 8: /* D1 page flip */
8009 		case 10: /* D2 page flip */
8010 		case 12: /* D3 page flip */
8011 		case 14: /* D4 page flip */
8012 		case 16: /* D5 page flip */
8013 		case 18: /* D6 page flip */
8014 			DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8015 			if (radeon_use_pflipirq > 0)
8016 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8017 			break;
8018 		case 42: /* HPD hotplug */
8019 			switch (src_data) {
8020 			case 0:
8021 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8022 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8023 					queue_hotplug = true;
8024 					DRM_DEBUG("IH: HPD1\n");
8025 				}
8026 				break;
8027 			case 1:
8028 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8029 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8030 					queue_hotplug = true;
8031 					DRM_DEBUG("IH: HPD2\n");
8032 				}
8033 				break;
8034 			case 2:
8035 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8036 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8037 					queue_hotplug = true;
8038 					DRM_DEBUG("IH: HPD3\n");
8039 				}
8040 				break;
8041 			case 3:
8042 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8043 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8044 					queue_hotplug = true;
8045 					DRM_DEBUG("IH: HPD4\n");
8046 				}
8047 				break;
8048 			case 4:
8049 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8050 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8051 					queue_hotplug = true;
8052 					DRM_DEBUG("IH: HPD5\n");
8053 				}
8054 				break;
8055 			case 5:
8056 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8057 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8058 					queue_hotplug = true;
8059 					DRM_DEBUG("IH: HPD6\n");
8060 				}
8061 				break;
8062 			default:
8063 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8064 				break;
8065 			}
8066 			break;
8067 		case 124: /* UVD */
8068 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8069 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8070 			break;
8071 		case 146:
8072 		case 147:
8073 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8074 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8075 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8076 			/* reset addr and status */
8077 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8078 			if (addr == 0x0 && status == 0x0)
8079 				break;
8080 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8081 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8082 				addr);
8083 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8084 				status);
8085 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8086 			break;
8087 		case 167: /* VCE */
8088 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8089 			switch (src_data) {
8090 			case 0:
8091 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8092 				break;
8093 			case 1:
8094 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8095 				break;
8096 			default:
8097 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8098 				break;
8099 			}
8100 			break;
8101 		case 176: /* GFX RB CP_INT */
8102 		case 177: /* GFX IB CP_INT */
8103 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8104 			break;
8105 		case 181: /* CP EOP event */
8106 			DRM_DEBUG("IH: CP EOP\n");
8107 			/* XXX check the bitfield order! */
8108 			me_id = (ring_id & 0x60) >> 5;
8109 			pipe_id = (ring_id & 0x18) >> 3;
8110 			queue_id = (ring_id & 0x7) >> 0;
8111 			switch (me_id) {
8112 			case 0:
8113 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8114 				break;
8115 			case 1:
8116 			case 2:
8117 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8118 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8119 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8120 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8121 				break;
8122 			}
8123 			break;
8124 		case 184: /* CP Privileged reg access */
8125 			DRM_ERROR("Illegal register access in command stream\n");
8126 			/* XXX check the bitfield order! */
8127 			me_id = (ring_id & 0x60) >> 5;
8128 			pipe_id = (ring_id & 0x18) >> 3;
8129 			queue_id = (ring_id & 0x7) >> 0;
8130 			switch (me_id) {
8131 			case 0:
8132 				/* This results in a full GPU reset, but all we need to do is soft
8133 				 * reset the CP for gfx
8134 				 */
8135 				queue_reset = true;
8136 				break;
8137 			case 1:
8138 				/* XXX compute */
8139 				queue_reset = true;
8140 				break;
8141 			case 2:
8142 				/* XXX compute */
8143 				queue_reset = true;
8144 				break;
8145 			}
8146 			break;
8147 		case 185: /* CP Privileged inst */
8148 			DRM_ERROR("Illegal instruction in command stream\n");
8149 			/* XXX check the bitfield order! */
8150 			me_id = (ring_id & 0x60) >> 5;
8151 			pipe_id = (ring_id & 0x18) >> 3;
8152 			queue_id = (ring_id & 0x7) >> 0;
8153 			switch (me_id) {
8154 			case 0:
8155 				/* This results in a full GPU reset, but all we need to do is soft
8156 				 * reset the CP for gfx
8157 				 */
8158 				queue_reset = true;
8159 				break;
8160 			case 1:
8161 				/* XXX compute */
8162 				queue_reset = true;
8163 				break;
8164 			case 2:
8165 				/* XXX compute */
8166 				queue_reset = true;
8167 				break;
8168 			}
8169 			break;
8170 		case 224: /* SDMA trap event */
8171 			/* XXX check the bitfield order! */
8172 			me_id = (ring_id & 0x3) >> 0;
8173 			queue_id = (ring_id & 0xc) >> 2;
8174 			DRM_DEBUG("IH: SDMA trap\n");
8175 			switch (me_id) {
8176 			case 0:
8177 				switch (queue_id) {
8178 				case 0:
8179 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8180 					break;
8181 				case 1:
8182 					/* XXX compute */
8183 					break;
8184 				case 2:
8185 					/* XXX compute */
8186 					break;
8187 				}
8188 				break;
8189 			case 1:
8190 				switch (queue_id) {
8191 				case 0:
8192 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8193 					break;
8194 				case 1:
8195 					/* XXX compute */
8196 					break;
8197 				case 2:
8198 					/* XXX compute */
8199 					break;
8200 				}
8201 				break;
8202 			}
8203 			break;
8204 		case 230: /* thermal low to high */
8205 			DRM_DEBUG("IH: thermal low to high\n");
8206 			rdev->pm.dpm.thermal.high_to_low = false;
8207 			queue_thermal = true;
8208 			break;
8209 		case 231: /* thermal high to low */
8210 			DRM_DEBUG("IH: thermal high to low\n");
8211 			rdev->pm.dpm.thermal.high_to_low = true;
8212 			queue_thermal = true;
8213 			break;
8214 		case 233: /* GUI IDLE */
8215 			DRM_DEBUG("IH: GUI idle\n");
8216 			break;
8217 		case 241: /* SDMA Privileged inst */
8218 		case 247: /* SDMA Privileged inst */
8219 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8220 			/* XXX check the bitfield order! */
8221 			me_id = (ring_id & 0x3) >> 0;
8222 			queue_id = (ring_id & 0xc) >> 2;
8223 			switch (me_id) {
8224 			case 0:
8225 				switch (queue_id) {
8226 				case 0:
8227 					queue_reset = true;
8228 					break;
8229 				case 1:
8230 					/* XXX compute */
8231 					queue_reset = true;
8232 					break;
8233 				case 2:
8234 					/* XXX compute */
8235 					queue_reset = true;
8236 					break;
8237 				}
8238 				break;
8239 			case 1:
8240 				switch (queue_id) {
8241 				case 0:
8242 					queue_reset = true;
8243 					break;
8244 				case 1:
8245 					/* XXX compute */
8246 					queue_reset = true;
8247 					break;
8248 				case 2:
8249 					/* XXX compute */
8250 					queue_reset = true;
8251 					break;
8252 				}
8253 				break;
8254 			}
8255 			break;
8256 		default:
8257 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8258 			break;
8259 		}
8260 
8261 		/* wptr/rptr are in bytes! */
8262 		rptr += 16;
8263 		rptr &= rdev->ih.ptr_mask;
8264 		WREG32(IH_RB_RPTR, rptr);
8265 	}
8266 	if (queue_hotplug)
8267 		schedule_work(&rdev->hotplug_work);
8268 	if (queue_reset) {
8269 		rdev->needs_reset = true;
8270 		wake_up_all(&rdev->fence_queue);
8271 	}
8272 	if (queue_thermal)
8273 		schedule_work(&rdev->pm.dpm.thermal.work);
8274 	rdev->ih.rptr = rptr;
8275 	atomic_set(&rdev->ih.lock, 0);
8276 
8277 	/* make sure wptr hasn't changed while processing */
8278 	wptr = cik_get_ih_wptr(rdev);
8279 	if (wptr != rptr)
8280 		goto restart_ih;
8281 
8282 	return IRQ_HANDLED;
8283 }
8284 
8285 /*
8286  * startup/shutdown callbacks
8287  */
8288 /**
8289  * cik_startup - program the asic to a functional state
8290  *
8291  * @rdev: radeon_device pointer
8292  *
8293  * Programs the asic to a functional state (CIK).
8294  * Called by cik_init() and cik_resume().
8295  * Returns 0 for success, error for failure.
8296  */
8297 static int cik_startup(struct radeon_device *rdev)
8298 {
8299 	struct radeon_ring *ring;
8300 	u32 nop;
8301 	int r;
8302 
8303 	/* enable pcie gen2/3 link */
8304 	cik_pcie_gen3_enable(rdev);
8305 	/* enable aspm */
8306 	cik_program_aspm(rdev);
8307 
8308 	/* scratch needs to be initialized before MC */
8309 	r = r600_vram_scratch_init(rdev);
8310 	if (r)
8311 		return r;
8312 
8313 	cik_mc_program(rdev);
8314 
8315 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8316 		r = ci_mc_load_microcode(rdev);
8317 		if (r) {
8318 			DRM_ERROR("Failed to load MC firmware!\n");
8319 			return r;
8320 		}
8321 	}
8322 
8323 	r = cik_pcie_gart_enable(rdev);
8324 	if (r)
8325 		return r;
8326 	cik_gpu_init(rdev);
8327 
8328 	/* allocate rlc buffers */
8329 	if (rdev->flags & RADEON_IS_IGP) {
8330 		if (rdev->family == CHIP_KAVERI) {
8331 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8332 			rdev->rlc.reg_list_size =
8333 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8334 		} else {
8335 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8336 			rdev->rlc.reg_list_size =
8337 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8338 		}
8339 	}
8340 	rdev->rlc.cs_data = ci_cs_data;
8341 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8342 	r = sumo_rlc_init(rdev);
8343 	if (r) {
8344 		DRM_ERROR("Failed to init rlc BOs!\n");
8345 		return r;
8346 	}
8347 
8348 	/* allocate wb buffer */
8349 	r = radeon_wb_init(rdev);
8350 	if (r)
8351 		return r;
8352 
8353 	/* allocate mec buffers */
8354 	r = cik_mec_init(rdev);
8355 	if (r) {
8356 		DRM_ERROR("Failed to init MEC BOs!\n");
8357 		return r;
8358 	}
8359 
8360 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8361 	if (r) {
8362 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8363 		return r;
8364 	}
8365 
8366 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8367 	if (r) {
8368 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8369 		return r;
8370 	}
8371 
8372 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8373 	if (r) {
8374 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8375 		return r;
8376 	}
8377 
8378 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8379 	if (r) {
8380 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8381 		return r;
8382 	}
8383 
8384 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8385 	if (r) {
8386 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8387 		return r;
8388 	}
8389 
8390 	r = radeon_uvd_resume(rdev);
8391 	if (!r) {
8392 		r = uvd_v4_2_resume(rdev);
8393 		if (!r) {
8394 			r = radeon_fence_driver_start_ring(rdev,
8395 							   R600_RING_TYPE_UVD_INDEX);
8396 			if (r)
8397 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8398 		}
8399 	}
8400 	if (r)
8401 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8402 
8403 	r = radeon_vce_resume(rdev);
8404 	if (!r) {
8405 		r = vce_v2_0_resume(rdev);
8406 		if (!r)
8407 			r = radeon_fence_driver_start_ring(rdev,
8408 							   TN_RING_TYPE_VCE1_INDEX);
8409 		if (!r)
8410 			r = radeon_fence_driver_start_ring(rdev,
8411 							   TN_RING_TYPE_VCE2_INDEX);
8412 	}
8413 	if (r) {
8414 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8415 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8416 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8417 	}
8418 
8419 	/* Enable IRQ */
8420 	if (!rdev->irq.installed) {
8421 		r = radeon_irq_kms_init(rdev);
8422 		if (r)
8423 			return r;
8424 	}
8425 
8426 	r = cik_irq_init(rdev);
8427 	if (r) {
8428 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8429 		radeon_irq_kms_fini(rdev);
8430 		return r;
8431 	}
8432 	cik_irq_set(rdev);
8433 
8434 	if (rdev->family == CHIP_HAWAII) {
8435 		if (rdev->new_fw)
8436 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8437 		else
8438 			nop = RADEON_CP_PACKET2;
8439 	} else {
8440 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8441 	}
8442 
8443 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8444 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8445 			     nop);
8446 	if (r)
8447 		return r;
8448 
8449 	/* set up the compute queues */
8450 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8451 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8452 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8453 			     nop);
8454 	if (r)
8455 		return r;
8456 	ring->me = 1; /* first MEC */
8457 	ring->pipe = 0; /* first pipe */
8458 	ring->queue = 0; /* first queue */
8459 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8460 
8461 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8462 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8463 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8464 			     nop);
8465 	if (r)
8466 		return r;
8467 	/* dGPU only have 1 MEC */
8468 	ring->me = 1; /* first MEC */
8469 	ring->pipe = 0; /* first pipe */
8470 	ring->queue = 1; /* second queue */
8471 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8472 
8473 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8474 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8475 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8476 	if (r)
8477 		return r;
8478 
8479 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8480 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8481 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8482 	if (r)
8483 		return r;
8484 
8485 	r = cik_cp_resume(rdev);
8486 	if (r)
8487 		return r;
8488 
8489 	r = cik_sdma_resume(rdev);
8490 	if (r)
8491 		return r;
8492 
8493 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8494 	if (ring->ring_size) {
8495 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8496 				     RADEON_CP_PACKET2);
8497 		if (!r)
8498 			r = uvd_v1_0_init(rdev);
8499 		if (r)
8500 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8501 	}
8502 
8503 	r = -ENOENT;
8504 
8505 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8506 	if (ring->ring_size)
8507 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8508 				     VCE_CMD_NO_OP);
8509 
8510 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8511 	if (ring->ring_size)
8512 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8513 				     VCE_CMD_NO_OP);
8514 
8515 	if (!r)
8516 		r = vce_v1_0_init(rdev);
8517 	else if (r != -ENOENT)
8518 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8519 
8520 	r = radeon_ib_pool_init(rdev);
8521 	if (r) {
8522 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8523 		return r;
8524 	}
8525 
8526 	r = radeon_vm_manager_init(rdev);
8527 	if (r) {
8528 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8529 		return r;
8530 	}
8531 
8532 	r = dce6_audio_init(rdev);
8533 	if (r)
8534 		return r;
8535 
8536 	return 0;
8537 }
8538 
8539 /**
8540  * cik_resume - resume the asic to a functional state
8541  *
8542  * @rdev: radeon_device pointer
8543  *
8544  * Programs the asic to a functional state (CIK).
8545  * Called at resume.
8546  * Returns 0 for success, error for failure.
8547  */
8548 int cik_resume(struct radeon_device *rdev)
8549 {
8550 	int r;
8551 
8552 	/* post card */
8553 	atom_asic_init(rdev->mode_info.atom_context);
8554 
8555 	/* init golden registers */
8556 	cik_init_golden_registers(rdev);
8557 
8558 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8559 		radeon_pm_resume(rdev);
8560 
8561 	rdev->accel_working = true;
8562 	r = cik_startup(rdev);
8563 	if (r) {
8564 		DRM_ERROR("cik startup failed on resume\n");
8565 		rdev->accel_working = false;
8566 		return r;
8567 	}
8568 
8569 	return r;
8570 
8571 }
8572 
8573 /**
8574  * cik_suspend - suspend the asic
8575  *
8576  * @rdev: radeon_device pointer
8577  *
8578  * Bring the chip into a state suitable for suspend (CIK).
8579  * Called at suspend.
8580  * Returns 0 for success.
8581  */
8582 int cik_suspend(struct radeon_device *rdev)
8583 {
8584 	radeon_pm_suspend(rdev);
8585 	dce6_audio_fini(rdev);
8586 	radeon_vm_manager_fini(rdev);
8587 	cik_cp_enable(rdev, false);
8588 	cik_sdma_enable(rdev, false);
8589 	uvd_v1_0_fini(rdev);
8590 	radeon_uvd_suspend(rdev);
8591 	radeon_vce_suspend(rdev);
8592 	cik_fini_pg(rdev);
8593 	cik_fini_cg(rdev);
8594 	cik_irq_suspend(rdev);
8595 	radeon_wb_disable(rdev);
8596 	cik_pcie_gart_disable(rdev);
8597 	return 0;
8598 }
8599 
8600 /* Plan is to move initialization in that function and use
8601  * helper function so that radeon_device_init pretty much
8602  * do nothing more than calling asic specific function. This
8603  * should also allow to remove a bunch of callback function
8604  * like vram_info.
8605  */
8606 /**
8607  * cik_init - asic specific driver and hw init
8608  *
8609  * @rdev: radeon_device pointer
8610  *
8611  * Setup asic specific driver variables and program the hw
8612  * to a functional state (CIK).
8613  * Called at driver startup.
8614  * Returns 0 for success, errors for failure.
8615  */
8616 int cik_init(struct radeon_device *rdev)
8617 {
8618 	struct radeon_ring *ring;
8619 	int r;
8620 
8621 	/* Read BIOS */
8622 	if (!radeon_get_bios(rdev)) {
8623 		if (ASIC_IS_AVIVO(rdev))
8624 			return -EINVAL;
8625 	}
8626 	/* Must be an ATOMBIOS */
8627 	if (!rdev->is_atom_bios) {
8628 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8629 		return -EINVAL;
8630 	}
8631 	r = radeon_atombios_init(rdev);
8632 	if (r)
8633 		return r;
8634 
8635 	/* Post card if necessary */
8636 	if (!radeon_card_posted(rdev)) {
8637 		if (!rdev->bios) {
8638 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8639 			return -EINVAL;
8640 		}
8641 		DRM_INFO("GPU not posted. posting now...\n");
8642 		atom_asic_init(rdev->mode_info.atom_context);
8643 	}
8644 	/* init golden registers */
8645 	cik_init_golden_registers(rdev);
8646 	/* Initialize scratch registers */
8647 	cik_scratch_init(rdev);
8648 	/* Initialize surface registers */
8649 	radeon_surface_init(rdev);
8650 	/* Initialize clocks */
8651 	radeon_get_clock_info(rdev->ddev);
8652 
8653 	/* Fence driver */
8654 	r = radeon_fence_driver_init(rdev);
8655 	if (r)
8656 		return r;
8657 
8658 	/* initialize memory controller */
8659 	r = cik_mc_init(rdev);
8660 	if (r)
8661 		return r;
8662 	/* Memory manager */
8663 	r = radeon_bo_init(rdev);
8664 	if (r)
8665 		return r;
8666 
8667 	if (rdev->flags & RADEON_IS_IGP) {
8668 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8669 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8670 			r = cik_init_microcode(rdev);
8671 			if (r) {
8672 				DRM_ERROR("Failed to load firmware!\n");
8673 				return r;
8674 			}
8675 		}
8676 	} else {
8677 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8678 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8679 		    !rdev->mc_fw) {
8680 			r = cik_init_microcode(rdev);
8681 			if (r) {
8682 				DRM_ERROR("Failed to load firmware!\n");
8683 				return r;
8684 			}
8685 		}
8686 	}
8687 
8688 	/* Initialize power management */
8689 	radeon_pm_init(rdev);
8690 
8691 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8692 	ring->ring_obj = NULL;
8693 	r600_ring_init(rdev, ring, 1024 * 1024);
8694 
8695 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8696 	ring->ring_obj = NULL;
8697 	r600_ring_init(rdev, ring, 1024 * 1024);
8698 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8699 	if (r)
8700 		return r;
8701 
8702 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8703 	ring->ring_obj = NULL;
8704 	r600_ring_init(rdev, ring, 1024 * 1024);
8705 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8706 	if (r)
8707 		return r;
8708 
8709 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8710 	ring->ring_obj = NULL;
8711 	r600_ring_init(rdev, ring, 256 * 1024);
8712 
8713 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8714 	ring->ring_obj = NULL;
8715 	r600_ring_init(rdev, ring, 256 * 1024);
8716 
8717 	r = radeon_uvd_init(rdev);
8718 	if (!r) {
8719 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8720 		ring->ring_obj = NULL;
8721 		r600_ring_init(rdev, ring, 4096);
8722 	}
8723 
8724 	r = radeon_vce_init(rdev);
8725 	if (!r) {
8726 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8727 		ring->ring_obj = NULL;
8728 		r600_ring_init(rdev, ring, 4096);
8729 
8730 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8731 		ring->ring_obj = NULL;
8732 		r600_ring_init(rdev, ring, 4096);
8733 	}
8734 
8735 	rdev->ih.ring_obj = NULL;
8736 	r600_ih_ring_init(rdev, 64 * 1024);
8737 
8738 	r = r600_pcie_gart_init(rdev);
8739 	if (r)
8740 		return r;
8741 
8742 	rdev->accel_working = true;
8743 	r = cik_startup(rdev);
8744 	if (r) {
8745 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8746 		cik_cp_fini(rdev);
8747 		cik_sdma_fini(rdev);
8748 		cik_irq_fini(rdev);
8749 		sumo_rlc_fini(rdev);
8750 		cik_mec_fini(rdev);
8751 		radeon_wb_fini(rdev);
8752 		radeon_ib_pool_fini(rdev);
8753 		radeon_vm_manager_fini(rdev);
8754 		radeon_irq_kms_fini(rdev);
8755 		cik_pcie_gart_fini(rdev);
8756 		rdev->accel_working = false;
8757 	}
8758 
8759 	/* Don't start up if the MC ucode is missing.
8760 	 * The default clocks and voltages before the MC ucode
8761 	 * is loaded are not suffient for advanced operations.
8762 	 */
8763 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8764 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8765 		return -EINVAL;
8766 	}
8767 
8768 	return 0;
8769 }
8770 
8771 /**
8772  * cik_fini - asic specific driver and hw fini
8773  *
8774  * @rdev: radeon_device pointer
8775  *
8776  * Tear down the asic specific driver variables and program the hw
8777  * to an idle state (CIK).
8778  * Called at driver unload.
8779  */
8780 void cik_fini(struct radeon_device *rdev)
8781 {
8782 	radeon_pm_fini(rdev);
8783 	cik_cp_fini(rdev);
8784 	cik_sdma_fini(rdev);
8785 	cik_fini_pg(rdev);
8786 	cik_fini_cg(rdev);
8787 	cik_irq_fini(rdev);
8788 	sumo_rlc_fini(rdev);
8789 	cik_mec_fini(rdev);
8790 	radeon_wb_fini(rdev);
8791 	radeon_vm_manager_fini(rdev);
8792 	radeon_ib_pool_fini(rdev);
8793 	radeon_irq_kms_fini(rdev);
8794 	uvd_v1_0_fini(rdev);
8795 	radeon_uvd_fini(rdev);
8796 	radeon_vce_fini(rdev);
8797 	cik_pcie_gart_fini(rdev);
8798 	r600_vram_scratch_fini(rdev);
8799 	radeon_gem_fini(rdev);
8800 	radeon_fence_driver_fini(rdev);
8801 	radeon_bo_fini(rdev);
8802 	radeon_atombios_fini(rdev);
8803 	cik_fini_microcode(rdev);
8804 	kfree(rdev->bios);
8805 	rdev->bios = NULL;
8806 }
8807 
8808 void dce8_program_fmt(struct drm_encoder *encoder)
8809 {
8810 	struct drm_device *dev = encoder->dev;
8811 	struct radeon_device *rdev = dev->dev_private;
8812 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8813 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8814 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8815 	int bpc = 0;
8816 	u32 tmp = 0;
8817 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8818 
8819 	if (connector) {
8820 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8821 		bpc = radeon_get_monitor_bpc(connector);
8822 		dither = radeon_connector->dither;
8823 	}
8824 
8825 	/* LVDS/eDP FMT is set up by atom */
8826 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8827 		return;
8828 
8829 	/* not needed for analog */
8830 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8831 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8832 		return;
8833 
8834 	if (bpc == 0)
8835 		return;
8836 
8837 	switch (bpc) {
8838 	case 6:
8839 		if (dither == RADEON_FMT_DITHER_ENABLE)
8840 			/* XXX sort out optimal dither settings */
8841 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8842 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8843 		else
8844 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8845 		break;
8846 	case 8:
8847 		if (dither == RADEON_FMT_DITHER_ENABLE)
8848 			/* XXX sort out optimal dither settings */
8849 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8850 				FMT_RGB_RANDOM_ENABLE |
8851 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8852 		else
8853 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8854 		break;
8855 	case 10:
8856 		if (dither == RADEON_FMT_DITHER_ENABLE)
8857 			/* XXX sort out optimal dither settings */
8858 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8859 				FMT_RGB_RANDOM_ENABLE |
8860 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8861 		else
8862 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8863 		break;
8864 	default:
8865 		/* not needed */
8866 		break;
8867 	}
8868 
8869 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8870 }
8871 
8872 /* display watermark setup */
8873 /**
8874  * dce8_line_buffer_adjust - Set up the line buffer
8875  *
8876  * @rdev: radeon_device pointer
8877  * @radeon_crtc: the selected display controller
8878  * @mode: the current display mode on the selected display
8879  * controller
8880  *
8881  * Setup up the line buffer allocation for
8882  * the selected display controller (CIK).
8883  * Returns the line buffer size in pixels.
8884  */
8885 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8886 				   struct radeon_crtc *radeon_crtc,
8887 				   struct drm_display_mode *mode)
8888 {
8889 	u32 tmp, buffer_alloc, i;
8890 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8891 	/*
8892 	 * Line Buffer Setup
8893 	 * There are 6 line buffers, one for each display controllers.
8894 	 * There are 3 partitions per LB. Select the number of partitions
8895 	 * to enable based on the display width.  For display widths larger
8896 	 * than 4096, you need use to use 2 display controllers and combine
8897 	 * them using the stereo blender.
8898 	 */
8899 	if (radeon_crtc->base.enabled && mode) {
8900 		if (mode->crtc_hdisplay < 1920) {
8901 			tmp = 1;
8902 			buffer_alloc = 2;
8903 		} else if (mode->crtc_hdisplay < 2560) {
8904 			tmp = 2;
8905 			buffer_alloc = 2;
8906 		} else if (mode->crtc_hdisplay < 4096) {
8907 			tmp = 0;
8908 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8909 		} else {
8910 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8911 			tmp = 0;
8912 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8913 		}
8914 	} else {
8915 		tmp = 1;
8916 		buffer_alloc = 0;
8917 	}
8918 
8919 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8920 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8921 
8922 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8923 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8924 	for (i = 0; i < rdev->usec_timeout; i++) {
8925 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8926 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8927 			break;
8928 		udelay(1);
8929 	}
8930 
8931 	if (radeon_crtc->base.enabled && mode) {
8932 		switch (tmp) {
8933 		case 0:
8934 		default:
8935 			return 4096 * 2;
8936 		case 1:
8937 			return 1920 * 2;
8938 		case 2:
8939 			return 2560 * 2;
8940 		}
8941 	}
8942 
8943 	/* controller not enabled, so no lb used */
8944 	return 0;
8945 }
8946 
8947 /**
8948  * cik_get_number_of_dram_channels - get the number of dram channels
8949  *
8950  * @rdev: radeon_device pointer
8951  *
8952  * Look up the number of video ram channels (CIK).
8953  * Used for display watermark bandwidth calculations
8954  * Returns the number of dram channels
8955  */
8956 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8957 {
8958 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8959 
8960 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8961 	case 0:
8962 	default:
8963 		return 1;
8964 	case 1:
8965 		return 2;
8966 	case 2:
8967 		return 4;
8968 	case 3:
8969 		return 8;
8970 	case 4:
8971 		return 3;
8972 	case 5:
8973 		return 6;
8974 	case 6:
8975 		return 10;
8976 	case 7:
8977 		return 12;
8978 	case 8:
8979 		return 16;
8980 	}
8981 }
8982 
8983 struct dce8_wm_params {
8984 	u32 dram_channels; /* number of dram channels */
8985 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8986 	u32 sclk;          /* engine clock in kHz */
8987 	u32 disp_clk;      /* display clock in kHz */
8988 	u32 src_width;     /* viewport width */
8989 	u32 active_time;   /* active display time in ns */
8990 	u32 blank_time;    /* blank time in ns */
8991 	bool interlaced;    /* mode is interlaced */
8992 	fixed20_12 vsc;    /* vertical scale ratio */
8993 	u32 num_heads;     /* number of active crtcs */
8994 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8995 	u32 lb_size;       /* line buffer allocated to pipe */
8996 	u32 vtaps;         /* vertical scaler taps */
8997 };
8998 
8999 /**
9000  * dce8_dram_bandwidth - get the dram bandwidth
9001  *
9002  * @wm: watermark calculation data
9003  *
9004  * Calculate the raw dram bandwidth (CIK).
9005  * Used for display watermark bandwidth calculations
9006  * Returns the dram bandwidth in MBytes/s
9007  */
9008 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9009 {
9010 	/* Calculate raw DRAM Bandwidth */
9011 	fixed20_12 dram_efficiency; /* 0.7 */
9012 	fixed20_12 yclk, dram_channels, bandwidth;
9013 	fixed20_12 a;
9014 
9015 	a.full = dfixed_const(1000);
9016 	yclk.full = dfixed_const(wm->yclk);
9017 	yclk.full = dfixed_div(yclk, a);
9018 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9019 	a.full = dfixed_const(10);
9020 	dram_efficiency.full = dfixed_const(7);
9021 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9022 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9023 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9024 
9025 	return dfixed_trunc(bandwidth);
9026 }
9027 
9028 /**
9029  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9030  *
9031  * @wm: watermark calculation data
9032  *
9033  * Calculate the dram bandwidth used for display (CIK).
9034  * Used for display watermark bandwidth calculations
9035  * Returns the dram bandwidth for display in MBytes/s
9036  */
9037 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9038 {
9039 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9040 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9041 	fixed20_12 yclk, dram_channels, bandwidth;
9042 	fixed20_12 a;
9043 
9044 	a.full = dfixed_const(1000);
9045 	yclk.full = dfixed_const(wm->yclk);
9046 	yclk.full = dfixed_div(yclk, a);
9047 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9048 	a.full = dfixed_const(10);
9049 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9050 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9051 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9052 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9053 
9054 	return dfixed_trunc(bandwidth);
9055 }
9056 
9057 /**
9058  * dce8_data_return_bandwidth - get the data return bandwidth
9059  *
9060  * @wm: watermark calculation data
9061  *
9062  * Calculate the data return bandwidth used for display (CIK).
9063  * Used for display watermark bandwidth calculations
9064  * Returns the data return bandwidth in MBytes/s
9065  */
9066 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9067 {
9068 	/* Calculate the display Data return Bandwidth */
9069 	fixed20_12 return_efficiency; /* 0.8 */
9070 	fixed20_12 sclk, bandwidth;
9071 	fixed20_12 a;
9072 
9073 	a.full = dfixed_const(1000);
9074 	sclk.full = dfixed_const(wm->sclk);
9075 	sclk.full = dfixed_div(sclk, a);
9076 	a.full = dfixed_const(10);
9077 	return_efficiency.full = dfixed_const(8);
9078 	return_efficiency.full = dfixed_div(return_efficiency, a);
9079 	a.full = dfixed_const(32);
9080 	bandwidth.full = dfixed_mul(a, sclk);
9081 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9082 
9083 	return dfixed_trunc(bandwidth);
9084 }
9085 
9086 /**
9087  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9088  *
9089  * @wm: watermark calculation data
9090  *
9091  * Calculate the dmif bandwidth used for display (CIK).
9092  * Used for display watermark bandwidth calculations
9093  * Returns the dmif bandwidth in MBytes/s
9094  */
9095 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9096 {
9097 	/* Calculate the DMIF Request Bandwidth */
9098 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9099 	fixed20_12 disp_clk, bandwidth;
9100 	fixed20_12 a, b;
9101 
9102 	a.full = dfixed_const(1000);
9103 	disp_clk.full = dfixed_const(wm->disp_clk);
9104 	disp_clk.full = dfixed_div(disp_clk, a);
9105 	a.full = dfixed_const(32);
9106 	b.full = dfixed_mul(a, disp_clk);
9107 
9108 	a.full = dfixed_const(10);
9109 	disp_clk_request_efficiency.full = dfixed_const(8);
9110 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9111 
9112 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9113 
9114 	return dfixed_trunc(bandwidth);
9115 }
9116 
9117 /**
9118  * dce8_available_bandwidth - get the min available bandwidth
9119  *
9120  * @wm: watermark calculation data
9121  *
9122  * Calculate the min available bandwidth used for display (CIK).
9123  * Used for display watermark bandwidth calculations
9124  * Returns the min available bandwidth in MBytes/s
9125  */
9126 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9127 {
9128 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9129 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9130 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9131 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9132 
9133 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9134 }
9135 
9136 /**
9137  * dce8_average_bandwidth - get the average available bandwidth
9138  *
9139  * @wm: watermark calculation data
9140  *
9141  * Calculate the average available bandwidth used for display (CIK).
9142  * Used for display watermark bandwidth calculations
9143  * Returns the average available bandwidth in MBytes/s
9144  */
9145 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9146 {
9147 	/* Calculate the display mode Average Bandwidth
9148 	 * DisplayMode should contain the source and destination dimensions,
9149 	 * timing, etc.
9150 	 */
9151 	fixed20_12 bpp;
9152 	fixed20_12 line_time;
9153 	fixed20_12 src_width;
9154 	fixed20_12 bandwidth;
9155 	fixed20_12 a;
9156 
9157 	a.full = dfixed_const(1000);
9158 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9159 	line_time.full = dfixed_div(line_time, a);
9160 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9161 	src_width.full = dfixed_const(wm->src_width);
9162 	bandwidth.full = dfixed_mul(src_width, bpp);
9163 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9164 	bandwidth.full = dfixed_div(bandwidth, line_time);
9165 
9166 	return dfixed_trunc(bandwidth);
9167 }
9168 
9169 /**
9170  * dce8_latency_watermark - get the latency watermark
9171  *
9172  * @wm: watermark calculation data
9173  *
9174  * Calculate the latency watermark (CIK).
9175  * Used for display watermark bandwidth calculations
9176  * Returns the latency watermark in ns
9177  */
9178 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9179 {
9180 	/* First calculate the latency in ns */
9181 	u32 mc_latency = 2000; /* 2000 ns. */
9182 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9183 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9184 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9185 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9186 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9187 		(wm->num_heads * cursor_line_pair_return_time);
9188 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9189 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9190 	u32 tmp, dmif_size = 12288;
9191 	fixed20_12 a, b, c;
9192 
9193 	if (wm->num_heads == 0)
9194 		return 0;
9195 
9196 	a.full = dfixed_const(2);
9197 	b.full = dfixed_const(1);
9198 	if ((wm->vsc.full > a.full) ||
9199 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9200 	    (wm->vtaps >= 5) ||
9201 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9202 		max_src_lines_per_dst_line = 4;
9203 	else
9204 		max_src_lines_per_dst_line = 2;
9205 
9206 	a.full = dfixed_const(available_bandwidth);
9207 	b.full = dfixed_const(wm->num_heads);
9208 	a.full = dfixed_div(a, b);
9209 
9210 	b.full = dfixed_const(mc_latency + 512);
9211 	c.full = dfixed_const(wm->disp_clk);
9212 	b.full = dfixed_div(b, c);
9213 
9214 	c.full = dfixed_const(dmif_size);
9215 	b.full = dfixed_div(c, b);
9216 
9217 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9218 
9219 	b.full = dfixed_const(1000);
9220 	c.full = dfixed_const(wm->disp_clk);
9221 	b.full = dfixed_div(c, b);
9222 	c.full = dfixed_const(wm->bytes_per_pixel);
9223 	b.full = dfixed_mul(b, c);
9224 
9225 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9226 
9227 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9228 	b.full = dfixed_const(1000);
9229 	c.full = dfixed_const(lb_fill_bw);
9230 	b.full = dfixed_div(c, b);
9231 	a.full = dfixed_div(a, b);
9232 	line_fill_time = dfixed_trunc(a);
9233 
9234 	if (line_fill_time < wm->active_time)
9235 		return latency;
9236 	else
9237 		return latency + (line_fill_time - wm->active_time);
9238 
9239 }
9240 
9241 /**
9242  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9243  * average and available dram bandwidth
9244  *
9245  * @wm: watermark calculation data
9246  *
9247  * Check if the display average bandwidth fits in the display
9248  * dram bandwidth (CIK).
9249  * Used for display watermark bandwidth calculations
9250  * Returns true if the display fits, false if not.
9251  */
9252 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9253 {
9254 	if (dce8_average_bandwidth(wm) <=
9255 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9256 		return true;
9257 	else
9258 		return false;
9259 }
9260 
9261 /**
9262  * dce8_average_bandwidth_vs_available_bandwidth - check
9263  * average and available bandwidth
9264  *
9265  * @wm: watermark calculation data
9266  *
9267  * Check if the display average bandwidth fits in the display
9268  * available bandwidth (CIK).
9269  * Used for display watermark bandwidth calculations
9270  * Returns true if the display fits, false if not.
9271  */
9272 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9273 {
9274 	if (dce8_average_bandwidth(wm) <=
9275 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9276 		return true;
9277 	else
9278 		return false;
9279 }
9280 
9281 /**
9282  * dce8_check_latency_hiding - check latency hiding
9283  *
9284  * @wm: watermark calculation data
9285  *
9286  * Check latency hiding (CIK).
9287  * Used for display watermark bandwidth calculations
9288  * Returns true if the display fits, false if not.
9289  */
9290 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9291 {
9292 	u32 lb_partitions = wm->lb_size / wm->src_width;
9293 	u32 line_time = wm->active_time + wm->blank_time;
9294 	u32 latency_tolerant_lines;
9295 	u32 latency_hiding;
9296 	fixed20_12 a;
9297 
9298 	a.full = dfixed_const(1);
9299 	if (wm->vsc.full > a.full)
9300 		latency_tolerant_lines = 1;
9301 	else {
9302 		if (lb_partitions <= (wm->vtaps + 1))
9303 			latency_tolerant_lines = 1;
9304 		else
9305 			latency_tolerant_lines = 2;
9306 	}
9307 
9308 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9309 
9310 	if (dce8_latency_watermark(wm) <= latency_hiding)
9311 		return true;
9312 	else
9313 		return false;
9314 }
9315 
9316 /**
9317  * dce8_program_watermarks - program display watermarks
9318  *
9319  * @rdev: radeon_device pointer
9320  * @radeon_crtc: the selected display controller
9321  * @lb_size: line buffer size
9322  * @num_heads: number of display controllers in use
9323  *
9324  * Calculate and program the display watermarks for the
9325  * selected display controller (CIK).
9326  */
9327 static void dce8_program_watermarks(struct radeon_device *rdev,
9328 				    struct radeon_crtc *radeon_crtc,
9329 				    u32 lb_size, u32 num_heads)
9330 {
9331 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9332 	struct dce8_wm_params wm_low, wm_high;
9333 	u32 pixel_period;
9334 	u32 line_time = 0;
9335 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9336 	u32 tmp, wm_mask;
9337 
9338 	if (radeon_crtc->base.enabled && num_heads && mode) {
9339 		pixel_period = 1000000 / (u32)mode->clock;
9340 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9341 
9342 		/* watermark for high clocks */
9343 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9344 		    rdev->pm.dpm_enabled) {
9345 			wm_high.yclk =
9346 				radeon_dpm_get_mclk(rdev, false) * 10;
9347 			wm_high.sclk =
9348 				radeon_dpm_get_sclk(rdev, false) * 10;
9349 		} else {
9350 			wm_high.yclk = rdev->pm.current_mclk * 10;
9351 			wm_high.sclk = rdev->pm.current_sclk * 10;
9352 		}
9353 
9354 		wm_high.disp_clk = mode->clock;
9355 		wm_high.src_width = mode->crtc_hdisplay;
9356 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9357 		wm_high.blank_time = line_time - wm_high.active_time;
9358 		wm_high.interlaced = false;
9359 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9360 			wm_high.interlaced = true;
9361 		wm_high.vsc = radeon_crtc->vsc;
9362 		wm_high.vtaps = 1;
9363 		if (radeon_crtc->rmx_type != RMX_OFF)
9364 			wm_high.vtaps = 2;
9365 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9366 		wm_high.lb_size = lb_size;
9367 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9368 		wm_high.num_heads = num_heads;
9369 
9370 		/* set for high clocks */
9371 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9372 
9373 		/* possibly force display priority to high */
9374 		/* should really do this at mode validation time... */
9375 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9376 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9377 		    !dce8_check_latency_hiding(&wm_high) ||
9378 		    (rdev->disp_priority == 2)) {
9379 			DRM_DEBUG_KMS("force priority to high\n");
9380 		}
9381 
9382 		/* watermark for low clocks */
9383 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9384 		    rdev->pm.dpm_enabled) {
9385 			wm_low.yclk =
9386 				radeon_dpm_get_mclk(rdev, true) * 10;
9387 			wm_low.sclk =
9388 				radeon_dpm_get_sclk(rdev, true) * 10;
9389 		} else {
9390 			wm_low.yclk = rdev->pm.current_mclk * 10;
9391 			wm_low.sclk = rdev->pm.current_sclk * 10;
9392 		}
9393 
9394 		wm_low.disp_clk = mode->clock;
9395 		wm_low.src_width = mode->crtc_hdisplay;
9396 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9397 		wm_low.blank_time = line_time - wm_low.active_time;
9398 		wm_low.interlaced = false;
9399 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9400 			wm_low.interlaced = true;
9401 		wm_low.vsc = radeon_crtc->vsc;
9402 		wm_low.vtaps = 1;
9403 		if (radeon_crtc->rmx_type != RMX_OFF)
9404 			wm_low.vtaps = 2;
9405 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9406 		wm_low.lb_size = lb_size;
9407 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9408 		wm_low.num_heads = num_heads;
9409 
9410 		/* set for low clocks */
9411 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9412 
9413 		/* possibly force display priority to high */
9414 		/* should really do this at mode validation time... */
9415 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9416 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9417 		    !dce8_check_latency_hiding(&wm_low) ||
9418 		    (rdev->disp_priority == 2)) {
9419 			DRM_DEBUG_KMS("force priority to high\n");
9420 		}
9421 	}
9422 
9423 	/* select wm A */
9424 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9425 	tmp = wm_mask;
9426 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9427 	tmp |= LATENCY_WATERMARK_MASK(1);
9428 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9429 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9430 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9431 		LATENCY_HIGH_WATERMARK(line_time)));
9432 	/* select wm B */
9433 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9434 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9435 	tmp |= LATENCY_WATERMARK_MASK(2);
9436 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9437 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9438 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9439 		LATENCY_HIGH_WATERMARK(line_time)));
9440 	/* restore original selection */
9441 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9442 
9443 	/* save values for DPM */
9444 	radeon_crtc->line_time = line_time;
9445 	radeon_crtc->wm_high = latency_watermark_a;
9446 	radeon_crtc->wm_low = latency_watermark_b;
9447 }
9448 
9449 /**
9450  * dce8_bandwidth_update - program display watermarks
9451  *
9452  * @rdev: radeon_device pointer
9453  *
9454  * Calculate and program the display watermarks and line
9455  * buffer allocation (CIK).
9456  */
9457 void dce8_bandwidth_update(struct radeon_device *rdev)
9458 {
9459 	struct drm_display_mode *mode = NULL;
9460 	u32 num_heads = 0, lb_size;
9461 	int i;
9462 
9463 	if (!rdev->mode_info.mode_config_initialized)
9464 		return;
9465 
9466 	radeon_update_display_priority(rdev);
9467 
9468 	for (i = 0; i < rdev->num_crtc; i++) {
9469 		if (rdev->mode_info.crtcs[i]->base.enabled)
9470 			num_heads++;
9471 	}
9472 	for (i = 0; i < rdev->num_crtc; i++) {
9473 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9474 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9475 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9476 	}
9477 }
9478 
9479 /**
9480  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9481  *
9482  * @rdev: radeon_device pointer
9483  *
9484  * Fetches a GPU clock counter snapshot (SI).
9485  * Returns the 64 bit clock counter snapshot.
9486  */
9487 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9488 {
9489 	uint64_t clock;
9490 
9491 	mutex_lock(&rdev->gpu_clock_mutex);
9492 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9493 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9494 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9495 	mutex_unlock(&rdev->gpu_clock_mutex);
9496 	return clock;
9497 }
9498 
9499 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9500                               u32 cntl_reg, u32 status_reg)
9501 {
9502 	int r, i;
9503 	struct atom_clock_dividers dividers;
9504 	uint32_t tmp;
9505 
9506 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9507 					   clock, false, &dividers);
9508 	if (r)
9509 		return r;
9510 
9511 	tmp = RREG32_SMC(cntl_reg);
9512 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9513 	tmp |= dividers.post_divider;
9514 	WREG32_SMC(cntl_reg, tmp);
9515 
9516 	for (i = 0; i < 100; i++) {
9517 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9518 			break;
9519 		mdelay(10);
9520 	}
9521 	if (i == 100)
9522 		return -ETIMEDOUT;
9523 
9524 	return 0;
9525 }
9526 
9527 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9528 {
9529 	int r = 0;
9530 
9531 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9532 	if (r)
9533 		return r;
9534 
9535 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9536 	return r;
9537 }
9538 
9539 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9540 {
9541 	int r, i;
9542 	struct atom_clock_dividers dividers;
9543 	u32 tmp;
9544 
9545 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9546 					   ecclk, false, &dividers);
9547 	if (r)
9548 		return r;
9549 
9550 	for (i = 0; i < 100; i++) {
9551 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9552 			break;
9553 		mdelay(10);
9554 	}
9555 	if (i == 100)
9556 		return -ETIMEDOUT;
9557 
9558 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9559 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9560 	tmp |= dividers.post_divider;
9561 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9562 
9563 	for (i = 0; i < 100; i++) {
9564 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9565 			break;
9566 		mdelay(10);
9567 	}
9568 	if (i == 100)
9569 		return -ETIMEDOUT;
9570 
9571 	return 0;
9572 }
9573 
9574 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9575 {
9576 	struct pci_dev *root = rdev->pdev->bus->self;
9577 	int bridge_pos, gpu_pos;
9578 	u32 speed_cntl, mask, current_data_rate;
9579 	int ret, i;
9580 	u16 tmp16;
9581 
9582 	if (radeon_pcie_gen2 == 0)
9583 		return;
9584 
9585 	if (rdev->flags & RADEON_IS_IGP)
9586 		return;
9587 
9588 	if (!(rdev->flags & RADEON_IS_PCIE))
9589 		return;
9590 
9591 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9592 	if (ret != 0)
9593 		return;
9594 
9595 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9596 		return;
9597 
9598 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9599 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9600 		LC_CURRENT_DATA_RATE_SHIFT;
9601 	if (mask & DRM_PCIE_SPEED_80) {
9602 		if (current_data_rate == 2) {
9603 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9604 			return;
9605 		}
9606 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9607 	} else if (mask & DRM_PCIE_SPEED_50) {
9608 		if (current_data_rate == 1) {
9609 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9610 			return;
9611 		}
9612 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9613 	}
9614 
9615 	bridge_pos = pci_get_pciecap_ptr(root->dev.bsddev);
9616 	if (!bridge_pos)
9617 		return;
9618 
9619 	gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev.bsddev);
9620 	if (!gpu_pos)
9621 		return;
9622 
9623 	if (mask & DRM_PCIE_SPEED_80) {
9624 		/* re-try equalization if gen3 is not already enabled */
9625 		if (current_data_rate != 2) {
9626 			u16 bridge_cfg, gpu_cfg;
9627 			u16 bridge_cfg2, gpu_cfg2;
9628 			u32 max_lw, current_lw, tmp;
9629 
9630 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9631 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9632 
9633 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9634 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9635 
9636 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9637 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9638 
9639 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9640 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9641 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9642 
9643 			if (current_lw < max_lw) {
9644 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9645 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9646 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9647 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9648 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9649 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9650 				}
9651 			}
9652 
9653 			for (i = 0; i < 10; i++) {
9654 				/* check status */
9655 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9656 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9657 					break;
9658 
9659 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9660 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9661 
9662 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9663 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9664 
9665 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9666 				tmp |= LC_SET_QUIESCE;
9667 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9668 
9669 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9670 				tmp |= LC_REDO_EQ;
9671 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9672 
9673 				mdelay(100);
9674 
9675 				/* linkctl */
9676 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9677 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9678 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9679 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9680 
9681 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9682 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9683 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9684 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9685 
9686 				/* linkctl2 */
9687 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9688 				tmp16 &= ~((1 << 4) | (7 << 9));
9689 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9690 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9691 
9692 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9693 				tmp16 &= ~((1 << 4) | (7 << 9));
9694 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9695 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9696 
9697 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9698 				tmp &= ~LC_SET_QUIESCE;
9699 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9700 			}
9701 		}
9702 	}
9703 
9704 	/* set the link speed */
9705 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9706 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9707 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9708 
9709 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9710 	tmp16 &= ~0xf;
9711 	if (mask & DRM_PCIE_SPEED_80)
9712 		tmp16 |= 3; /* gen3 */
9713 	else if (mask & DRM_PCIE_SPEED_50)
9714 		tmp16 |= 2; /* gen2 */
9715 	else
9716 		tmp16 |= 1; /* gen1 */
9717 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9718 
9719 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9720 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9721 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9722 
9723 	for (i = 0; i < rdev->usec_timeout; i++) {
9724 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9725 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9726 			break;
9727 		udelay(1);
9728 	}
9729 }
9730 
9731 static void cik_program_aspm(struct radeon_device *rdev)
9732 {
9733 	u32 data, orig;
9734 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9735 	bool disable_clkreq = false;
9736 
9737 	if (radeon_aspm == 0)
9738 		return;
9739 
9740 	/* XXX double check IGPs */
9741 	if (rdev->flags & RADEON_IS_IGP)
9742 		return;
9743 
9744 	if (!(rdev->flags & RADEON_IS_PCIE))
9745 		return;
9746 
9747 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9748 	data &= ~LC_XMIT_N_FTS_MASK;
9749 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9750 	if (orig != data)
9751 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9752 
9753 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9754 	data |= LC_GO_TO_RECOVERY;
9755 	if (orig != data)
9756 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9757 
9758 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9759 	data |= P_IGNORE_EDB_ERR;
9760 	if (orig != data)
9761 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9762 
9763 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9764 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9765 	data |= LC_PMI_TO_L1_DIS;
9766 	if (!disable_l0s)
9767 		data |= LC_L0S_INACTIVITY(7);
9768 
9769 	if (!disable_l1) {
9770 		data |= LC_L1_INACTIVITY(7);
9771 		data &= ~LC_PMI_TO_L1_DIS;
9772 		if (orig != data)
9773 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9774 
9775 		if (!disable_plloff_in_l1) {
9776 			bool clk_req_support;
9777 
9778 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9779 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9780 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9781 			if (orig != data)
9782 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9783 
9784 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9785 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9786 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9787 			if (orig != data)
9788 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9789 
9790 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9791 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9792 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9793 			if (orig != data)
9794 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9795 
9796 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9797 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9798 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9799 			if (orig != data)
9800 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9801 
9802 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9803 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9804 			data |= LC_DYN_LANES_PWR_STATE(3);
9805 			if (orig != data)
9806 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9807 
9808 			if (!disable_clkreq) {
9809 #ifdef zMN_TODO
9810 				struct pci_dev *root = rdev->pdev->bus->self;
9811 				u32 lnkcap;
9812 
9813 				clk_req_support = false;
9814 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9815 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9816 					clk_req_support = true;
9817 #else
9818 				clk_req_support = false;
9819 #endif
9820 			} else {
9821 				clk_req_support = false;
9822 			}
9823 
9824 			if (clk_req_support) {
9825 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9826 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9827 				if (orig != data)
9828 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9829 
9830 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9831 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9832 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9833 				if (orig != data)
9834 					WREG32_SMC(THM_CLK_CNTL, data);
9835 
9836 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9837 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9838 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9839 				if (orig != data)
9840 					WREG32_SMC(MISC_CLK_CTRL, data);
9841 
9842 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9843 				data &= ~BCLK_AS_XCLK;
9844 				if (orig != data)
9845 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9846 
9847 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9848 				data &= ~FORCE_BIF_REFCLK_EN;
9849 				if (orig != data)
9850 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9851 
9852 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9853 				data &= ~MPLL_CLKOUT_SEL_MASK;
9854 				data |= MPLL_CLKOUT_SEL(4);
9855 				if (orig != data)
9856 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9857 			}
9858 		}
9859 	} else {
9860 		if (orig != data)
9861 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9862 	}
9863 
9864 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9865 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9866 	if (orig != data)
9867 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9868 
9869 	if (!disable_l0s) {
9870 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9871 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9872 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9873 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9874 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9875 				data &= ~LC_L0S_INACTIVITY_MASK;
9876 				if (orig != data)
9877 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9878 			}
9879 		}
9880 	}
9881 }
9882