xref: /dragonfly/sys/dev/drm/radeon/cik.c (revision ae24b5e0)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "cikd.h"
30 #include "atom.h"
31 #include "cik_blit_shaders.h"
32 #include "radeon_ucode.h"
33 #include "clearstate_ci.h"
34 
35 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
36 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 
45 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
46 MODULE_FIRMWARE("radeon/bonaire_me.bin");
47 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
48 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
49 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
50 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
51 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
52 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
55 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
56 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
63 
64 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
65 MODULE_FIRMWARE("radeon/hawaii_me.bin");
66 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
67 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
68 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
69 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
70 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
71 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
72 
73 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
74 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
75 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
79 
80 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
81 MODULE_FIRMWARE("radeon/kaveri_me.bin");
82 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
83 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
84 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
85 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
86 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
87 
88 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
89 MODULE_FIRMWARE("radeon/KABINI_me.bin");
90 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
91 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
92 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
93 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
94 
95 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
96 MODULE_FIRMWARE("radeon/kabini_me.bin");
97 MODULE_FIRMWARE("radeon/kabini_ce.bin");
98 MODULE_FIRMWARE("radeon/kabini_mec.bin");
99 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
100 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
101 
102 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
103 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
104 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
108 
109 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
110 MODULE_FIRMWARE("radeon/mullins_me.bin");
111 MODULE_FIRMWARE("radeon/mullins_ce.bin");
112 MODULE_FIRMWARE("radeon/mullins_mec.bin");
113 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
114 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
115 
116 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
117 static void cik_rlc_stop(struct radeon_device *rdev);
118 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
119 static void cik_program_aspm(struct radeon_device *rdev);
120 static void cik_init_pg(struct radeon_device *rdev);
121 static void cik_init_cg(struct radeon_device *rdev);
122 static void cik_fini_pg(struct radeon_device *rdev);
123 static void cik_fini_cg(struct radeon_device *rdev);
124 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
125 					  bool enable);
126 
127 /* get temperature in millidegrees */
128 int ci_get_temp(struct radeon_device *rdev)
129 {
130 	u32 temp;
131 	int actual_temp = 0;
132 
133 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
134 		CTF_TEMP_SHIFT;
135 
136 	if (temp & 0x200)
137 		actual_temp = 255;
138 	else
139 		actual_temp = temp & 0x1ff;
140 
141 	actual_temp = actual_temp * 1000;
142 
143 	return actual_temp;
144 }
145 
146 /* get temperature in millidegrees */
147 int kv_get_temp(struct radeon_device *rdev)
148 {
149 	u32 temp;
150 	int actual_temp = 0;
151 
152 	temp = RREG32_SMC(0xC0300E0C);
153 
154 	if (temp)
155 		actual_temp = (temp / 8) - 49;
156 	else
157 		actual_temp = 0;
158 
159 	actual_temp = actual_temp * 1000;
160 
161 	return actual_temp;
162 }
163 
164 /*
165  * Indirect registers accessor
166  */
167 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
168 {
169 	u32 r;
170 
171 	spin_lock(&rdev->pciep_idx_lock);
172 	WREG32(PCIE_INDEX, reg);
173 	(void)RREG32(PCIE_INDEX);
174 	r = RREG32(PCIE_DATA);
175 	spin_unlock(&rdev->pciep_idx_lock);
176 	return r;
177 }
178 
179 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
180 {
181 	spin_lock(&rdev->pciep_idx_lock);
182 	WREG32(PCIE_INDEX, reg);
183 	(void)RREG32(PCIE_INDEX);
184 	WREG32(PCIE_DATA, v);
185 	(void)RREG32(PCIE_DATA);
186 	spin_unlock(&rdev->pciep_idx_lock);
187 }
188 
189 static const u32 spectre_rlc_save_restore_register_list[] =
190 {
191 	(0x0e00 << 16) | (0xc12c >> 2),
192 	0x00000000,
193 	(0x0e00 << 16) | (0xc140 >> 2),
194 	0x00000000,
195 	(0x0e00 << 16) | (0xc150 >> 2),
196 	0x00000000,
197 	(0x0e00 << 16) | (0xc15c >> 2),
198 	0x00000000,
199 	(0x0e00 << 16) | (0xc168 >> 2),
200 	0x00000000,
201 	(0x0e00 << 16) | (0xc170 >> 2),
202 	0x00000000,
203 	(0x0e00 << 16) | (0xc178 >> 2),
204 	0x00000000,
205 	(0x0e00 << 16) | (0xc204 >> 2),
206 	0x00000000,
207 	(0x0e00 << 16) | (0xc2b4 >> 2),
208 	0x00000000,
209 	(0x0e00 << 16) | (0xc2b8 >> 2),
210 	0x00000000,
211 	(0x0e00 << 16) | (0xc2bc >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xc2c0 >> 2),
214 	0x00000000,
215 	(0x0e00 << 16) | (0x8228 >> 2),
216 	0x00000000,
217 	(0x0e00 << 16) | (0x829c >> 2),
218 	0x00000000,
219 	(0x0e00 << 16) | (0x869c >> 2),
220 	0x00000000,
221 	(0x0600 << 16) | (0x98f4 >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0x98f8 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0x9900 >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc260 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0x90e8 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0x3c000 >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0x3c00c >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0x8c1c >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0x9700 >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0xcd20 >> 2),
240 	0x00000000,
241 	(0x4e00 << 16) | (0xcd20 >> 2),
242 	0x00000000,
243 	(0x5e00 << 16) | (0xcd20 >> 2),
244 	0x00000000,
245 	(0x6e00 << 16) | (0xcd20 >> 2),
246 	0x00000000,
247 	(0x7e00 << 16) | (0xcd20 >> 2),
248 	0x00000000,
249 	(0x8e00 << 16) | (0xcd20 >> 2),
250 	0x00000000,
251 	(0x9e00 << 16) | (0xcd20 >> 2),
252 	0x00000000,
253 	(0xae00 << 16) | (0xcd20 >> 2),
254 	0x00000000,
255 	(0xbe00 << 16) | (0xcd20 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0x89bc >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0x8900 >> 2),
260 	0x00000000,
261 	0x3,
262 	(0x0e00 << 16) | (0xc130 >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc134 >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc1fc >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc208 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc264 >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc268 >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0xc26c >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0xc270 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0xc274 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0xc278 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc27c >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc280 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc284 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc288 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc28c >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc290 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc294 >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc298 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xc29c >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xc2a0 >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0xc2a4 >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0xc2a8 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0xc2ac  >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0xc2b0 >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0x301d0 >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0x30238 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0x30250 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0x30254 >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0x30258 >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0x3025c >> 2),
321 	0x00000000,
322 	(0x4e00 << 16) | (0xc900 >> 2),
323 	0x00000000,
324 	(0x5e00 << 16) | (0xc900 >> 2),
325 	0x00000000,
326 	(0x6e00 << 16) | (0xc900 >> 2),
327 	0x00000000,
328 	(0x7e00 << 16) | (0xc900 >> 2),
329 	0x00000000,
330 	(0x8e00 << 16) | (0xc900 >> 2),
331 	0x00000000,
332 	(0x9e00 << 16) | (0xc900 >> 2),
333 	0x00000000,
334 	(0xae00 << 16) | (0xc900 >> 2),
335 	0x00000000,
336 	(0xbe00 << 16) | (0xc900 >> 2),
337 	0x00000000,
338 	(0x4e00 << 16) | (0xc904 >> 2),
339 	0x00000000,
340 	(0x5e00 << 16) | (0xc904 >> 2),
341 	0x00000000,
342 	(0x6e00 << 16) | (0xc904 >> 2),
343 	0x00000000,
344 	(0x7e00 << 16) | (0xc904 >> 2),
345 	0x00000000,
346 	(0x8e00 << 16) | (0xc904 >> 2),
347 	0x00000000,
348 	(0x9e00 << 16) | (0xc904 >> 2),
349 	0x00000000,
350 	(0xae00 << 16) | (0xc904 >> 2),
351 	0x00000000,
352 	(0xbe00 << 16) | (0xc904 >> 2),
353 	0x00000000,
354 	(0x4e00 << 16) | (0xc908 >> 2),
355 	0x00000000,
356 	(0x5e00 << 16) | (0xc908 >> 2),
357 	0x00000000,
358 	(0x6e00 << 16) | (0xc908 >> 2),
359 	0x00000000,
360 	(0x7e00 << 16) | (0xc908 >> 2),
361 	0x00000000,
362 	(0x8e00 << 16) | (0xc908 >> 2),
363 	0x00000000,
364 	(0x9e00 << 16) | (0xc908 >> 2),
365 	0x00000000,
366 	(0xae00 << 16) | (0xc908 >> 2),
367 	0x00000000,
368 	(0xbe00 << 16) | (0xc908 >> 2),
369 	0x00000000,
370 	(0x4e00 << 16) | (0xc90c >> 2),
371 	0x00000000,
372 	(0x5e00 << 16) | (0xc90c >> 2),
373 	0x00000000,
374 	(0x6e00 << 16) | (0xc90c >> 2),
375 	0x00000000,
376 	(0x7e00 << 16) | (0xc90c >> 2),
377 	0x00000000,
378 	(0x8e00 << 16) | (0xc90c >> 2),
379 	0x00000000,
380 	(0x9e00 << 16) | (0xc90c >> 2),
381 	0x00000000,
382 	(0xae00 << 16) | (0xc90c >> 2),
383 	0x00000000,
384 	(0xbe00 << 16) | (0xc90c >> 2),
385 	0x00000000,
386 	(0x4e00 << 16) | (0xc910 >> 2),
387 	0x00000000,
388 	(0x5e00 << 16) | (0xc910 >> 2),
389 	0x00000000,
390 	(0x6e00 << 16) | (0xc910 >> 2),
391 	0x00000000,
392 	(0x7e00 << 16) | (0xc910 >> 2),
393 	0x00000000,
394 	(0x8e00 << 16) | (0xc910 >> 2),
395 	0x00000000,
396 	(0x9e00 << 16) | (0xc910 >> 2),
397 	0x00000000,
398 	(0xae00 << 16) | (0xc910 >> 2),
399 	0x00000000,
400 	(0xbe00 << 16) | (0xc910 >> 2),
401 	0x00000000,
402 	(0x0e00 << 16) | (0xc99c >> 2),
403 	0x00000000,
404 	(0x0e00 << 16) | (0x9834 >> 2),
405 	0x00000000,
406 	(0x0000 << 16) | (0x30f00 >> 2),
407 	0x00000000,
408 	(0x0001 << 16) | (0x30f00 >> 2),
409 	0x00000000,
410 	(0x0000 << 16) | (0x30f04 >> 2),
411 	0x00000000,
412 	(0x0001 << 16) | (0x30f04 >> 2),
413 	0x00000000,
414 	(0x0000 << 16) | (0x30f08 >> 2),
415 	0x00000000,
416 	(0x0001 << 16) | (0x30f08 >> 2),
417 	0x00000000,
418 	(0x0000 << 16) | (0x30f0c >> 2),
419 	0x00000000,
420 	(0x0001 << 16) | (0x30f0c >> 2),
421 	0x00000000,
422 	(0x0600 << 16) | (0x9b7c >> 2),
423 	0x00000000,
424 	(0x0e00 << 16) | (0x8a14 >> 2),
425 	0x00000000,
426 	(0x0e00 << 16) | (0x8a18 >> 2),
427 	0x00000000,
428 	(0x0600 << 16) | (0x30a00 >> 2),
429 	0x00000000,
430 	(0x0e00 << 16) | (0x8bf0 >> 2),
431 	0x00000000,
432 	(0x0e00 << 16) | (0x8bcc >> 2),
433 	0x00000000,
434 	(0x0e00 << 16) | (0x8b24 >> 2),
435 	0x00000000,
436 	(0x0e00 << 16) | (0x30a04 >> 2),
437 	0x00000000,
438 	(0x0600 << 16) | (0x30a10 >> 2),
439 	0x00000000,
440 	(0x0600 << 16) | (0x30a14 >> 2),
441 	0x00000000,
442 	(0x0600 << 16) | (0x30a18 >> 2),
443 	0x00000000,
444 	(0x0600 << 16) | (0x30a2c >> 2),
445 	0x00000000,
446 	(0x0e00 << 16) | (0xc700 >> 2),
447 	0x00000000,
448 	(0x0e00 << 16) | (0xc704 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0xc708 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0xc768 >> 2),
453 	0x00000000,
454 	(0x0400 << 16) | (0xc770 >> 2),
455 	0x00000000,
456 	(0x0400 << 16) | (0xc774 >> 2),
457 	0x00000000,
458 	(0x0400 << 16) | (0xc778 >> 2),
459 	0x00000000,
460 	(0x0400 << 16) | (0xc77c >> 2),
461 	0x00000000,
462 	(0x0400 << 16) | (0xc780 >> 2),
463 	0x00000000,
464 	(0x0400 << 16) | (0xc784 >> 2),
465 	0x00000000,
466 	(0x0400 << 16) | (0xc788 >> 2),
467 	0x00000000,
468 	(0x0400 << 16) | (0xc78c >> 2),
469 	0x00000000,
470 	(0x0400 << 16) | (0xc798 >> 2),
471 	0x00000000,
472 	(0x0400 << 16) | (0xc79c >> 2),
473 	0x00000000,
474 	(0x0400 << 16) | (0xc7a0 >> 2),
475 	0x00000000,
476 	(0x0400 << 16) | (0xc7a4 >> 2),
477 	0x00000000,
478 	(0x0400 << 16) | (0xc7a8 >> 2),
479 	0x00000000,
480 	(0x0400 << 16) | (0xc7ac >> 2),
481 	0x00000000,
482 	(0x0400 << 16) | (0xc7b0 >> 2),
483 	0x00000000,
484 	(0x0400 << 16) | (0xc7b4 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x9100 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0x3c010 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0x92a8 >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0x92ac >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0x92b4 >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0x92b8 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0x92bc >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0x92c0 >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0x92c4 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x92c8 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x92cc >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x92d0 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8c00 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8c04 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x8c20 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x8c38 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x8c3c >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0xae00 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x9604 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xac08 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xac0c >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xac10 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xac14 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0xac58 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0xac68 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0xac6c >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0xac70 >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0xac74 >> 2),
541 	0x00000000,
542 	(0x0e00 << 16) | (0xac78 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xac7c >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xac80 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0xac84 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0xac88 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0xac8c >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0x970c >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0x9714 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x9718 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x971c >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x31068 >> 2),
563 	0x00000000,
564 	(0x4e00 << 16) | (0x31068 >> 2),
565 	0x00000000,
566 	(0x5e00 << 16) | (0x31068 >> 2),
567 	0x00000000,
568 	(0x6e00 << 16) | (0x31068 >> 2),
569 	0x00000000,
570 	(0x7e00 << 16) | (0x31068 >> 2),
571 	0x00000000,
572 	(0x8e00 << 16) | (0x31068 >> 2),
573 	0x00000000,
574 	(0x9e00 << 16) | (0x31068 >> 2),
575 	0x00000000,
576 	(0xae00 << 16) | (0x31068 >> 2),
577 	0x00000000,
578 	(0xbe00 << 16) | (0x31068 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0xcd10 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0xcd14 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x88b0 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x88b4 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x88b8 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x88bc >> 2),
591 	0x00000000,
592 	(0x0400 << 16) | (0x89c0 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x88c4 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x88c8 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x88d0 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x88d4 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0x88d8 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x8980 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0x30938 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0x3093c >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0x30940 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0x89a0 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0x30900 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x30904 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x89b4 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x3c210 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x3c214 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x3c218 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x8904 >> 2),
627 	0x00000000,
628 	0x5,
629 	(0x0e00 << 16) | (0x8c28 >> 2),
630 	(0x0e00 << 16) | (0x8c2c >> 2),
631 	(0x0e00 << 16) | (0x8c30 >> 2),
632 	(0x0e00 << 16) | (0x8c34 >> 2),
633 	(0x0e00 << 16) | (0x9600 >> 2),
634 };
635 
636 static const u32 kalindi_rlc_save_restore_register_list[] =
637 {
638 	(0x0e00 << 16) | (0xc12c >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0xc140 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0xc150 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0xc15c >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0xc168 >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0xc170 >> 2),
649 	0x00000000,
650 	(0x0e00 << 16) | (0xc204 >> 2),
651 	0x00000000,
652 	(0x0e00 << 16) | (0xc2b4 >> 2),
653 	0x00000000,
654 	(0x0e00 << 16) | (0xc2b8 >> 2),
655 	0x00000000,
656 	(0x0e00 << 16) | (0xc2bc >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xc2c0 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0x8228 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0x829c >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x869c >> 2),
665 	0x00000000,
666 	(0x0600 << 16) | (0x98f4 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x98f8 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x9900 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc260 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x90e8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x3c000 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x3c00c >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x8c1c >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x9700 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0xcd20 >> 2),
685 	0x00000000,
686 	(0x4e00 << 16) | (0xcd20 >> 2),
687 	0x00000000,
688 	(0x5e00 << 16) | (0xcd20 >> 2),
689 	0x00000000,
690 	(0x6e00 << 16) | (0xcd20 >> 2),
691 	0x00000000,
692 	(0x7e00 << 16) | (0xcd20 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x89bc >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x8900 >> 2),
697 	0x00000000,
698 	0x3,
699 	(0x0e00 << 16) | (0xc130 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0xc134 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0xc1fc >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0xc208 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0xc264 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0xc268 >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0xc26c >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0xc270 >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0xc274 >> 2),
716 	0x00000000,
717 	(0x0e00 << 16) | (0xc28c >> 2),
718 	0x00000000,
719 	(0x0e00 << 16) | (0xc290 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0xc294 >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc298 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc2a0 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc2a4 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc2a8 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc2ac >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0x301d0 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0x30238 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0x30250 >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0x30254 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0x30258 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0x3025c >> 2),
744 	0x00000000,
745 	(0x4e00 << 16) | (0xc900 >> 2),
746 	0x00000000,
747 	(0x5e00 << 16) | (0xc900 >> 2),
748 	0x00000000,
749 	(0x6e00 << 16) | (0xc900 >> 2),
750 	0x00000000,
751 	(0x7e00 << 16) | (0xc900 >> 2),
752 	0x00000000,
753 	(0x4e00 << 16) | (0xc904 >> 2),
754 	0x00000000,
755 	(0x5e00 << 16) | (0xc904 >> 2),
756 	0x00000000,
757 	(0x6e00 << 16) | (0xc904 >> 2),
758 	0x00000000,
759 	(0x7e00 << 16) | (0xc904 >> 2),
760 	0x00000000,
761 	(0x4e00 << 16) | (0xc908 >> 2),
762 	0x00000000,
763 	(0x5e00 << 16) | (0xc908 >> 2),
764 	0x00000000,
765 	(0x6e00 << 16) | (0xc908 >> 2),
766 	0x00000000,
767 	(0x7e00 << 16) | (0xc908 >> 2),
768 	0x00000000,
769 	(0x4e00 << 16) | (0xc90c >> 2),
770 	0x00000000,
771 	(0x5e00 << 16) | (0xc90c >> 2),
772 	0x00000000,
773 	(0x6e00 << 16) | (0xc90c >> 2),
774 	0x00000000,
775 	(0x7e00 << 16) | (0xc90c >> 2),
776 	0x00000000,
777 	(0x4e00 << 16) | (0xc910 >> 2),
778 	0x00000000,
779 	(0x5e00 << 16) | (0xc910 >> 2),
780 	0x00000000,
781 	(0x6e00 << 16) | (0xc910 >> 2),
782 	0x00000000,
783 	(0x7e00 << 16) | (0xc910 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc99c >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0x9834 >> 2),
788 	0x00000000,
789 	(0x0000 << 16) | (0x30f00 >> 2),
790 	0x00000000,
791 	(0x0000 << 16) | (0x30f04 >> 2),
792 	0x00000000,
793 	(0x0000 << 16) | (0x30f08 >> 2),
794 	0x00000000,
795 	(0x0000 << 16) | (0x30f0c >> 2),
796 	0x00000000,
797 	(0x0600 << 16) | (0x9b7c >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0x8a14 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0x8a18 >> 2),
802 	0x00000000,
803 	(0x0600 << 16) | (0x30a00 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x8bf0 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0x8bcc >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0x8b24 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x30a04 >> 2),
812 	0x00000000,
813 	(0x0600 << 16) | (0x30a10 >> 2),
814 	0x00000000,
815 	(0x0600 << 16) | (0x30a14 >> 2),
816 	0x00000000,
817 	(0x0600 << 16) | (0x30a18 >> 2),
818 	0x00000000,
819 	(0x0600 << 16) | (0x30a2c >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0xc700 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0xc704 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xc708 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0xc768 >> 2),
828 	0x00000000,
829 	(0x0400 << 16) | (0xc770 >> 2),
830 	0x00000000,
831 	(0x0400 << 16) | (0xc774 >> 2),
832 	0x00000000,
833 	(0x0400 << 16) | (0xc798 >> 2),
834 	0x00000000,
835 	(0x0400 << 16) | (0xc79c >> 2),
836 	0x00000000,
837 	(0x0e00 << 16) | (0x9100 >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0x3c010 >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0x8c00 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0x8c04 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0x8c20 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0x8c38 >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0x8c3c >> 2),
850 	0x00000000,
851 	(0x0e00 << 16) | (0xae00 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0x9604 >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0xac08 >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0xac0c >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0xac10 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0xac14 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0xac58 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0xac68 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0xac6c >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0xac70 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0xac74 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0xac78 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0xac7c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xac80 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xac84 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0xac88 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0xac8c >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x970c >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x9714 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x9718 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x971c >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x31068 >> 2),
894 	0x00000000,
895 	(0x4e00 << 16) | (0x31068 >> 2),
896 	0x00000000,
897 	(0x5e00 << 16) | (0x31068 >> 2),
898 	0x00000000,
899 	(0x6e00 << 16) | (0x31068 >> 2),
900 	0x00000000,
901 	(0x7e00 << 16) | (0x31068 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xcd10 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xcd14 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x88b0 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x88b4 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x88b8 >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x88bc >> 2),
914 	0x00000000,
915 	(0x0400 << 16) | (0x89c0 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x88c4 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x88c8 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x88d0 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x88d4 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x88d8 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8980 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x30938 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x3093c >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0x30940 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0x89a0 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0x30900 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0x30904 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0x89b4 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0x3e1fc >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0x3c210 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0x3c214 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0x3c218 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0x8904 >> 2),
952 	0x00000000,
953 	0x5,
954 	(0x0e00 << 16) | (0x8c28 >> 2),
955 	(0x0e00 << 16) | (0x8c2c >> 2),
956 	(0x0e00 << 16) | (0x8c30 >> 2),
957 	(0x0e00 << 16) | (0x8c34 >> 2),
958 	(0x0e00 << 16) | (0x9600 >> 2),
959 };
960 
961 static const u32 bonaire_golden_spm_registers[] =
962 {
963 	0x30800, 0xe0ffffff, 0xe0000000
964 };
965 
966 static const u32 bonaire_golden_common_registers[] =
967 {
968 	0xc770, 0xffffffff, 0x00000800,
969 	0xc774, 0xffffffff, 0x00000800,
970 	0xc798, 0xffffffff, 0x00007fbf,
971 	0xc79c, 0xffffffff, 0x00007faf
972 };
973 
974 static const u32 bonaire_golden_registers[] =
975 {
976 	0x3354, 0x00000333, 0x00000333,
977 	0x3350, 0x000c0fc0, 0x00040200,
978 	0x9a10, 0x00010000, 0x00058208,
979 	0x3c000, 0xffff1fff, 0x00140000,
980 	0x3c200, 0xfdfc0fff, 0x00000100,
981 	0x3c234, 0x40000000, 0x40000200,
982 	0x9830, 0xffffffff, 0x00000000,
983 	0x9834, 0xf00fffff, 0x00000400,
984 	0x9838, 0x0002021c, 0x00020200,
985 	0xc78, 0x00000080, 0x00000000,
986 	0x5bb0, 0x000000f0, 0x00000070,
987 	0x5bc0, 0xf0311fff, 0x80300000,
988 	0x98f8, 0x73773777, 0x12010001,
989 	0x350c, 0x00810000, 0x408af000,
990 	0x7030, 0x31000111, 0x00000011,
991 	0x2f48, 0x73773777, 0x12010001,
992 	0x220c, 0x00007fb6, 0x0021a1b1,
993 	0x2210, 0x00007fb6, 0x002021b1,
994 	0x2180, 0x00007fb6, 0x00002191,
995 	0x2218, 0x00007fb6, 0x002121b1,
996 	0x221c, 0x00007fb6, 0x002021b1,
997 	0x21dc, 0x00007fb6, 0x00002191,
998 	0x21e0, 0x00007fb6, 0x00002191,
999 	0x3628, 0x0000003f, 0x0000000a,
1000 	0x362c, 0x0000003f, 0x0000000a,
1001 	0x2ae4, 0x00073ffe, 0x000022a2,
1002 	0x240c, 0x000007ff, 0x00000000,
1003 	0x8a14, 0xf000003f, 0x00000007,
1004 	0x8bf0, 0x00002001, 0x00000001,
1005 	0x8b24, 0xffffffff, 0x00ffffff,
1006 	0x30a04, 0x0000ff0f, 0x00000000,
1007 	0x28a4c, 0x07ffffff, 0x06000000,
1008 	0x4d8, 0x00000fff, 0x00000100,
1009 	0x3e78, 0x00000001, 0x00000002,
1010 	0x9100, 0x03000000, 0x0362c688,
1011 	0x8c00, 0x000000ff, 0x00000001,
1012 	0xe40, 0x00001fff, 0x00001fff,
1013 	0x9060, 0x0000007f, 0x00000020,
1014 	0x9508, 0x00010000, 0x00010000,
1015 	0xac14, 0x000003ff, 0x000000f3,
1016 	0xac0c, 0xffffffff, 0x00001032
1017 };
1018 
1019 static const u32 bonaire_mgcg_cgcg_init[] =
1020 {
1021 	0xc420, 0xffffffff, 0xfffffffc,
1022 	0x30800, 0xffffffff, 0xe0000000,
1023 	0x3c2a0, 0xffffffff, 0x00000100,
1024 	0x3c208, 0xffffffff, 0x00000100,
1025 	0x3c2c0, 0xffffffff, 0xc0000100,
1026 	0x3c2c8, 0xffffffff, 0xc0000100,
1027 	0x3c2c4, 0xffffffff, 0xc0000100,
1028 	0x55e4, 0xffffffff, 0x00600100,
1029 	0x3c280, 0xffffffff, 0x00000100,
1030 	0x3c214, 0xffffffff, 0x06000100,
1031 	0x3c220, 0xffffffff, 0x00000100,
1032 	0x3c218, 0xffffffff, 0x06000100,
1033 	0x3c204, 0xffffffff, 0x00000100,
1034 	0x3c2e0, 0xffffffff, 0x00000100,
1035 	0x3c224, 0xffffffff, 0x00000100,
1036 	0x3c200, 0xffffffff, 0x00000100,
1037 	0x3c230, 0xffffffff, 0x00000100,
1038 	0x3c234, 0xffffffff, 0x00000100,
1039 	0x3c250, 0xffffffff, 0x00000100,
1040 	0x3c254, 0xffffffff, 0x00000100,
1041 	0x3c258, 0xffffffff, 0x00000100,
1042 	0x3c25c, 0xffffffff, 0x00000100,
1043 	0x3c260, 0xffffffff, 0x00000100,
1044 	0x3c27c, 0xffffffff, 0x00000100,
1045 	0x3c278, 0xffffffff, 0x00000100,
1046 	0x3c210, 0xffffffff, 0x06000100,
1047 	0x3c290, 0xffffffff, 0x00000100,
1048 	0x3c274, 0xffffffff, 0x00000100,
1049 	0x3c2b4, 0xffffffff, 0x00000100,
1050 	0x3c2b0, 0xffffffff, 0x00000100,
1051 	0x3c270, 0xffffffff, 0x00000100,
1052 	0x30800, 0xffffffff, 0xe0000000,
1053 	0x3c020, 0xffffffff, 0x00010000,
1054 	0x3c024, 0xffffffff, 0x00030002,
1055 	0x3c028, 0xffffffff, 0x00040007,
1056 	0x3c02c, 0xffffffff, 0x00060005,
1057 	0x3c030, 0xffffffff, 0x00090008,
1058 	0x3c034, 0xffffffff, 0x00010000,
1059 	0x3c038, 0xffffffff, 0x00030002,
1060 	0x3c03c, 0xffffffff, 0x00040007,
1061 	0x3c040, 0xffffffff, 0x00060005,
1062 	0x3c044, 0xffffffff, 0x00090008,
1063 	0x3c048, 0xffffffff, 0x00010000,
1064 	0x3c04c, 0xffffffff, 0x00030002,
1065 	0x3c050, 0xffffffff, 0x00040007,
1066 	0x3c054, 0xffffffff, 0x00060005,
1067 	0x3c058, 0xffffffff, 0x00090008,
1068 	0x3c05c, 0xffffffff, 0x00010000,
1069 	0x3c060, 0xffffffff, 0x00030002,
1070 	0x3c064, 0xffffffff, 0x00040007,
1071 	0x3c068, 0xffffffff, 0x00060005,
1072 	0x3c06c, 0xffffffff, 0x00090008,
1073 	0x3c070, 0xffffffff, 0x00010000,
1074 	0x3c074, 0xffffffff, 0x00030002,
1075 	0x3c078, 0xffffffff, 0x00040007,
1076 	0x3c07c, 0xffffffff, 0x00060005,
1077 	0x3c080, 0xffffffff, 0x00090008,
1078 	0x3c084, 0xffffffff, 0x00010000,
1079 	0x3c088, 0xffffffff, 0x00030002,
1080 	0x3c08c, 0xffffffff, 0x00040007,
1081 	0x3c090, 0xffffffff, 0x00060005,
1082 	0x3c094, 0xffffffff, 0x00090008,
1083 	0x3c098, 0xffffffff, 0x00010000,
1084 	0x3c09c, 0xffffffff, 0x00030002,
1085 	0x3c0a0, 0xffffffff, 0x00040007,
1086 	0x3c0a4, 0xffffffff, 0x00060005,
1087 	0x3c0a8, 0xffffffff, 0x00090008,
1088 	0x3c000, 0xffffffff, 0x96e00200,
1089 	0x8708, 0xffffffff, 0x00900100,
1090 	0xc424, 0xffffffff, 0x0020003f,
1091 	0x38, 0xffffffff, 0x0140001c,
1092 	0x3c, 0x000f0000, 0x000f0000,
1093 	0x220, 0xffffffff, 0xC060000C,
1094 	0x224, 0xc0000fff, 0x00000100,
1095 	0xf90, 0xffffffff, 0x00000100,
1096 	0xf98, 0x00000101, 0x00000000,
1097 	0x20a8, 0xffffffff, 0x00000104,
1098 	0x55e4, 0xff000fff, 0x00000100,
1099 	0x30cc, 0xc0000fff, 0x00000104,
1100 	0xc1e4, 0x00000001, 0x00000001,
1101 	0xd00c, 0xff000ff0, 0x00000100,
1102 	0xd80c, 0xff000ff0, 0x00000100
1103 };
1104 
1105 static const u32 spectre_golden_spm_registers[] =
1106 {
1107 	0x30800, 0xe0ffffff, 0xe0000000
1108 };
1109 
1110 static const u32 spectre_golden_common_registers[] =
1111 {
1112 	0xc770, 0xffffffff, 0x00000800,
1113 	0xc774, 0xffffffff, 0x00000800,
1114 	0xc798, 0xffffffff, 0x00007fbf,
1115 	0xc79c, 0xffffffff, 0x00007faf
1116 };
1117 
1118 static const u32 spectre_golden_registers[] =
1119 {
1120 	0x3c000, 0xffff1fff, 0x96940200,
1121 	0x3c00c, 0xffff0001, 0xff000000,
1122 	0x3c200, 0xfffc0fff, 0x00000100,
1123 	0x6ed8, 0x00010101, 0x00010000,
1124 	0x9834, 0xf00fffff, 0x00000400,
1125 	0x9838, 0xfffffffc, 0x00020200,
1126 	0x5bb0, 0x000000f0, 0x00000070,
1127 	0x5bc0, 0xf0311fff, 0x80300000,
1128 	0x98f8, 0x73773777, 0x12010001,
1129 	0x9b7c, 0x00ff0000, 0x00fc0000,
1130 	0x2f48, 0x73773777, 0x12010001,
1131 	0x8a14, 0xf000003f, 0x00000007,
1132 	0x8b24, 0xffffffff, 0x00ffffff,
1133 	0x28350, 0x3f3f3fff, 0x00000082,
1134 	0x28354, 0x0000003f, 0x00000000,
1135 	0x3e78, 0x00000001, 0x00000002,
1136 	0x913c, 0xffff03df, 0x00000004,
1137 	0xc768, 0x00000008, 0x00000008,
1138 	0x8c00, 0x000008ff, 0x00000800,
1139 	0x9508, 0x00010000, 0x00010000,
1140 	0xac0c, 0xffffffff, 0x54763210,
1141 	0x214f8, 0x01ff01ff, 0x00000002,
1142 	0x21498, 0x007ff800, 0x00200000,
1143 	0x2015c, 0xffffffff, 0x00000f40,
1144 	0x30934, 0xffffffff, 0x00000001
1145 };
1146 
1147 static const u32 spectre_mgcg_cgcg_init[] =
1148 {
1149 	0xc420, 0xffffffff, 0xfffffffc,
1150 	0x30800, 0xffffffff, 0xe0000000,
1151 	0x3c2a0, 0xffffffff, 0x00000100,
1152 	0x3c208, 0xffffffff, 0x00000100,
1153 	0x3c2c0, 0xffffffff, 0x00000100,
1154 	0x3c2c8, 0xffffffff, 0x00000100,
1155 	0x3c2c4, 0xffffffff, 0x00000100,
1156 	0x55e4, 0xffffffff, 0x00600100,
1157 	0x3c280, 0xffffffff, 0x00000100,
1158 	0x3c214, 0xffffffff, 0x06000100,
1159 	0x3c220, 0xffffffff, 0x00000100,
1160 	0x3c218, 0xffffffff, 0x06000100,
1161 	0x3c204, 0xffffffff, 0x00000100,
1162 	0x3c2e0, 0xffffffff, 0x00000100,
1163 	0x3c224, 0xffffffff, 0x00000100,
1164 	0x3c200, 0xffffffff, 0x00000100,
1165 	0x3c230, 0xffffffff, 0x00000100,
1166 	0x3c234, 0xffffffff, 0x00000100,
1167 	0x3c250, 0xffffffff, 0x00000100,
1168 	0x3c254, 0xffffffff, 0x00000100,
1169 	0x3c258, 0xffffffff, 0x00000100,
1170 	0x3c25c, 0xffffffff, 0x00000100,
1171 	0x3c260, 0xffffffff, 0x00000100,
1172 	0x3c27c, 0xffffffff, 0x00000100,
1173 	0x3c278, 0xffffffff, 0x00000100,
1174 	0x3c210, 0xffffffff, 0x06000100,
1175 	0x3c290, 0xffffffff, 0x00000100,
1176 	0x3c274, 0xffffffff, 0x00000100,
1177 	0x3c2b4, 0xffffffff, 0x00000100,
1178 	0x3c2b0, 0xffffffff, 0x00000100,
1179 	0x3c270, 0xffffffff, 0x00000100,
1180 	0x30800, 0xffffffff, 0xe0000000,
1181 	0x3c020, 0xffffffff, 0x00010000,
1182 	0x3c024, 0xffffffff, 0x00030002,
1183 	0x3c028, 0xffffffff, 0x00040007,
1184 	0x3c02c, 0xffffffff, 0x00060005,
1185 	0x3c030, 0xffffffff, 0x00090008,
1186 	0x3c034, 0xffffffff, 0x00010000,
1187 	0x3c038, 0xffffffff, 0x00030002,
1188 	0x3c03c, 0xffffffff, 0x00040007,
1189 	0x3c040, 0xffffffff, 0x00060005,
1190 	0x3c044, 0xffffffff, 0x00090008,
1191 	0x3c048, 0xffffffff, 0x00010000,
1192 	0x3c04c, 0xffffffff, 0x00030002,
1193 	0x3c050, 0xffffffff, 0x00040007,
1194 	0x3c054, 0xffffffff, 0x00060005,
1195 	0x3c058, 0xffffffff, 0x00090008,
1196 	0x3c05c, 0xffffffff, 0x00010000,
1197 	0x3c060, 0xffffffff, 0x00030002,
1198 	0x3c064, 0xffffffff, 0x00040007,
1199 	0x3c068, 0xffffffff, 0x00060005,
1200 	0x3c06c, 0xffffffff, 0x00090008,
1201 	0x3c070, 0xffffffff, 0x00010000,
1202 	0x3c074, 0xffffffff, 0x00030002,
1203 	0x3c078, 0xffffffff, 0x00040007,
1204 	0x3c07c, 0xffffffff, 0x00060005,
1205 	0x3c080, 0xffffffff, 0x00090008,
1206 	0x3c084, 0xffffffff, 0x00010000,
1207 	0x3c088, 0xffffffff, 0x00030002,
1208 	0x3c08c, 0xffffffff, 0x00040007,
1209 	0x3c090, 0xffffffff, 0x00060005,
1210 	0x3c094, 0xffffffff, 0x00090008,
1211 	0x3c098, 0xffffffff, 0x00010000,
1212 	0x3c09c, 0xffffffff, 0x00030002,
1213 	0x3c0a0, 0xffffffff, 0x00040007,
1214 	0x3c0a4, 0xffffffff, 0x00060005,
1215 	0x3c0a8, 0xffffffff, 0x00090008,
1216 	0x3c0ac, 0xffffffff, 0x00010000,
1217 	0x3c0b0, 0xffffffff, 0x00030002,
1218 	0x3c0b4, 0xffffffff, 0x00040007,
1219 	0x3c0b8, 0xffffffff, 0x00060005,
1220 	0x3c0bc, 0xffffffff, 0x00090008,
1221 	0x3c000, 0xffffffff, 0x96e00200,
1222 	0x8708, 0xffffffff, 0x00900100,
1223 	0xc424, 0xffffffff, 0x0020003f,
1224 	0x38, 0xffffffff, 0x0140001c,
1225 	0x3c, 0x000f0000, 0x000f0000,
1226 	0x220, 0xffffffff, 0xC060000C,
1227 	0x224, 0xc0000fff, 0x00000100,
1228 	0xf90, 0xffffffff, 0x00000100,
1229 	0xf98, 0x00000101, 0x00000000,
1230 	0x20a8, 0xffffffff, 0x00000104,
1231 	0x55e4, 0xff000fff, 0x00000100,
1232 	0x30cc, 0xc0000fff, 0x00000104,
1233 	0xc1e4, 0x00000001, 0x00000001,
1234 	0xd00c, 0xff000ff0, 0x00000100,
1235 	0xd80c, 0xff000ff0, 0x00000100
1236 };
1237 
1238 static const u32 kalindi_golden_spm_registers[] =
1239 {
1240 	0x30800, 0xe0ffffff, 0xe0000000
1241 };
1242 
1243 static const u32 kalindi_golden_common_registers[] =
1244 {
1245 	0xc770, 0xffffffff, 0x00000800,
1246 	0xc774, 0xffffffff, 0x00000800,
1247 	0xc798, 0xffffffff, 0x00007fbf,
1248 	0xc79c, 0xffffffff, 0x00007faf
1249 };
1250 
1251 static const u32 kalindi_golden_registers[] =
1252 {
1253 	0x3c000, 0xffffdfff, 0x6e944040,
1254 	0x55e4, 0xff607fff, 0xfc000100,
1255 	0x3c220, 0xff000fff, 0x00000100,
1256 	0x3c224, 0xff000fff, 0x00000100,
1257 	0x3c200, 0xfffc0fff, 0x00000100,
1258 	0x6ed8, 0x00010101, 0x00010000,
1259 	0x9830, 0xffffffff, 0x00000000,
1260 	0x9834, 0xf00fffff, 0x00000400,
1261 	0x5bb0, 0x000000f0, 0x00000070,
1262 	0x5bc0, 0xf0311fff, 0x80300000,
1263 	0x98f8, 0x73773777, 0x12010001,
1264 	0x98fc, 0xffffffff, 0x00000010,
1265 	0x9b7c, 0x00ff0000, 0x00fc0000,
1266 	0x8030, 0x00001f0f, 0x0000100a,
1267 	0x2f48, 0x73773777, 0x12010001,
1268 	0x2408, 0x000fffff, 0x000c007f,
1269 	0x8a14, 0xf000003f, 0x00000007,
1270 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1271 	0x30a04, 0x0000ff0f, 0x00000000,
1272 	0x28a4c, 0x07ffffff, 0x06000000,
1273 	0x4d8, 0x00000fff, 0x00000100,
1274 	0x3e78, 0x00000001, 0x00000002,
1275 	0xc768, 0x00000008, 0x00000008,
1276 	0x8c00, 0x000000ff, 0x00000003,
1277 	0x214f8, 0x01ff01ff, 0x00000002,
1278 	0x21498, 0x007ff800, 0x00200000,
1279 	0x2015c, 0xffffffff, 0x00000f40,
1280 	0x88c4, 0x001f3ae3, 0x00000082,
1281 	0x88d4, 0x0000001f, 0x00000010,
1282 	0x30934, 0xffffffff, 0x00000000
1283 };
1284 
1285 static const u32 kalindi_mgcg_cgcg_init[] =
1286 {
1287 	0xc420, 0xffffffff, 0xfffffffc,
1288 	0x30800, 0xffffffff, 0xe0000000,
1289 	0x3c2a0, 0xffffffff, 0x00000100,
1290 	0x3c208, 0xffffffff, 0x00000100,
1291 	0x3c2c0, 0xffffffff, 0x00000100,
1292 	0x3c2c8, 0xffffffff, 0x00000100,
1293 	0x3c2c4, 0xffffffff, 0x00000100,
1294 	0x55e4, 0xffffffff, 0x00600100,
1295 	0x3c280, 0xffffffff, 0x00000100,
1296 	0x3c214, 0xffffffff, 0x06000100,
1297 	0x3c220, 0xffffffff, 0x00000100,
1298 	0x3c218, 0xffffffff, 0x06000100,
1299 	0x3c204, 0xffffffff, 0x00000100,
1300 	0x3c2e0, 0xffffffff, 0x00000100,
1301 	0x3c224, 0xffffffff, 0x00000100,
1302 	0x3c200, 0xffffffff, 0x00000100,
1303 	0x3c230, 0xffffffff, 0x00000100,
1304 	0x3c234, 0xffffffff, 0x00000100,
1305 	0x3c250, 0xffffffff, 0x00000100,
1306 	0x3c254, 0xffffffff, 0x00000100,
1307 	0x3c258, 0xffffffff, 0x00000100,
1308 	0x3c25c, 0xffffffff, 0x00000100,
1309 	0x3c260, 0xffffffff, 0x00000100,
1310 	0x3c27c, 0xffffffff, 0x00000100,
1311 	0x3c278, 0xffffffff, 0x00000100,
1312 	0x3c210, 0xffffffff, 0x06000100,
1313 	0x3c290, 0xffffffff, 0x00000100,
1314 	0x3c274, 0xffffffff, 0x00000100,
1315 	0x3c2b4, 0xffffffff, 0x00000100,
1316 	0x3c2b0, 0xffffffff, 0x00000100,
1317 	0x3c270, 0xffffffff, 0x00000100,
1318 	0x30800, 0xffffffff, 0xe0000000,
1319 	0x3c020, 0xffffffff, 0x00010000,
1320 	0x3c024, 0xffffffff, 0x00030002,
1321 	0x3c028, 0xffffffff, 0x00040007,
1322 	0x3c02c, 0xffffffff, 0x00060005,
1323 	0x3c030, 0xffffffff, 0x00090008,
1324 	0x3c034, 0xffffffff, 0x00010000,
1325 	0x3c038, 0xffffffff, 0x00030002,
1326 	0x3c03c, 0xffffffff, 0x00040007,
1327 	0x3c040, 0xffffffff, 0x00060005,
1328 	0x3c044, 0xffffffff, 0x00090008,
1329 	0x3c000, 0xffffffff, 0x96e00200,
1330 	0x8708, 0xffffffff, 0x00900100,
1331 	0xc424, 0xffffffff, 0x0020003f,
1332 	0x38, 0xffffffff, 0x0140001c,
1333 	0x3c, 0x000f0000, 0x000f0000,
1334 	0x220, 0xffffffff, 0xC060000C,
1335 	0x224, 0xc0000fff, 0x00000100,
1336 	0x20a8, 0xffffffff, 0x00000104,
1337 	0x55e4, 0xff000fff, 0x00000100,
1338 	0x30cc, 0xc0000fff, 0x00000104,
1339 	0xc1e4, 0x00000001, 0x00000001,
1340 	0xd00c, 0xff000ff0, 0x00000100,
1341 	0xd80c, 0xff000ff0, 0x00000100
1342 };
1343 
1344 static const u32 hawaii_golden_spm_registers[] =
1345 {
1346 	0x30800, 0xe0ffffff, 0xe0000000
1347 };
1348 
1349 static const u32 hawaii_golden_common_registers[] =
1350 {
1351 	0x30800, 0xffffffff, 0xe0000000,
1352 	0x28350, 0xffffffff, 0x3a00161a,
1353 	0x28354, 0xffffffff, 0x0000002e,
1354 	0x9a10, 0xffffffff, 0x00018208,
1355 	0x98f8, 0xffffffff, 0x12011003
1356 };
1357 
1358 static const u32 hawaii_golden_registers[] =
1359 {
1360 	0x3354, 0x00000333, 0x00000333,
1361 	0x9a10, 0x00010000, 0x00058208,
1362 	0x9830, 0xffffffff, 0x00000000,
1363 	0x9834, 0xf00fffff, 0x00000400,
1364 	0x9838, 0x0002021c, 0x00020200,
1365 	0xc78, 0x00000080, 0x00000000,
1366 	0x5bb0, 0x000000f0, 0x00000070,
1367 	0x5bc0, 0xf0311fff, 0x80300000,
1368 	0x350c, 0x00810000, 0x408af000,
1369 	0x7030, 0x31000111, 0x00000011,
1370 	0x2f48, 0x73773777, 0x12010001,
1371 	0x2120, 0x0000007f, 0x0000001b,
1372 	0x21dc, 0x00007fb6, 0x00002191,
1373 	0x3628, 0x0000003f, 0x0000000a,
1374 	0x362c, 0x0000003f, 0x0000000a,
1375 	0x2ae4, 0x00073ffe, 0x000022a2,
1376 	0x240c, 0x000007ff, 0x00000000,
1377 	0x8bf0, 0x00002001, 0x00000001,
1378 	0x8b24, 0xffffffff, 0x00ffffff,
1379 	0x30a04, 0x0000ff0f, 0x00000000,
1380 	0x28a4c, 0x07ffffff, 0x06000000,
1381 	0x3e78, 0x00000001, 0x00000002,
1382 	0xc768, 0x00000008, 0x00000008,
1383 	0xc770, 0x00000f00, 0x00000800,
1384 	0xc774, 0x00000f00, 0x00000800,
1385 	0xc798, 0x00ffffff, 0x00ff7fbf,
1386 	0xc79c, 0x00ffffff, 0x00ff7faf,
1387 	0x8c00, 0x000000ff, 0x00000800,
1388 	0xe40, 0x00001fff, 0x00001fff,
1389 	0x9060, 0x0000007f, 0x00000020,
1390 	0x9508, 0x00010000, 0x00010000,
1391 	0xae00, 0x00100000, 0x000ff07c,
1392 	0xac14, 0x000003ff, 0x0000000f,
1393 	0xac10, 0xffffffff, 0x7564fdec,
1394 	0xac0c, 0xffffffff, 0x3120b9a8,
1395 	0xac08, 0x20000000, 0x0f9c0000
1396 };
1397 
1398 static const u32 hawaii_mgcg_cgcg_init[] =
1399 {
1400 	0xc420, 0xffffffff, 0xfffffffd,
1401 	0x30800, 0xffffffff, 0xe0000000,
1402 	0x3c2a0, 0xffffffff, 0x00000100,
1403 	0x3c208, 0xffffffff, 0x00000100,
1404 	0x3c2c0, 0xffffffff, 0x00000100,
1405 	0x3c2c8, 0xffffffff, 0x00000100,
1406 	0x3c2c4, 0xffffffff, 0x00000100,
1407 	0x55e4, 0xffffffff, 0x00200100,
1408 	0x3c280, 0xffffffff, 0x00000100,
1409 	0x3c214, 0xffffffff, 0x06000100,
1410 	0x3c220, 0xffffffff, 0x00000100,
1411 	0x3c218, 0xffffffff, 0x06000100,
1412 	0x3c204, 0xffffffff, 0x00000100,
1413 	0x3c2e0, 0xffffffff, 0x00000100,
1414 	0x3c224, 0xffffffff, 0x00000100,
1415 	0x3c200, 0xffffffff, 0x00000100,
1416 	0x3c230, 0xffffffff, 0x00000100,
1417 	0x3c234, 0xffffffff, 0x00000100,
1418 	0x3c250, 0xffffffff, 0x00000100,
1419 	0x3c254, 0xffffffff, 0x00000100,
1420 	0x3c258, 0xffffffff, 0x00000100,
1421 	0x3c25c, 0xffffffff, 0x00000100,
1422 	0x3c260, 0xffffffff, 0x00000100,
1423 	0x3c27c, 0xffffffff, 0x00000100,
1424 	0x3c278, 0xffffffff, 0x00000100,
1425 	0x3c210, 0xffffffff, 0x06000100,
1426 	0x3c290, 0xffffffff, 0x00000100,
1427 	0x3c274, 0xffffffff, 0x00000100,
1428 	0x3c2b4, 0xffffffff, 0x00000100,
1429 	0x3c2b0, 0xffffffff, 0x00000100,
1430 	0x3c270, 0xffffffff, 0x00000100,
1431 	0x30800, 0xffffffff, 0xe0000000,
1432 	0x3c020, 0xffffffff, 0x00010000,
1433 	0x3c024, 0xffffffff, 0x00030002,
1434 	0x3c028, 0xffffffff, 0x00040007,
1435 	0x3c02c, 0xffffffff, 0x00060005,
1436 	0x3c030, 0xffffffff, 0x00090008,
1437 	0x3c034, 0xffffffff, 0x00010000,
1438 	0x3c038, 0xffffffff, 0x00030002,
1439 	0x3c03c, 0xffffffff, 0x00040007,
1440 	0x3c040, 0xffffffff, 0x00060005,
1441 	0x3c044, 0xffffffff, 0x00090008,
1442 	0x3c048, 0xffffffff, 0x00010000,
1443 	0x3c04c, 0xffffffff, 0x00030002,
1444 	0x3c050, 0xffffffff, 0x00040007,
1445 	0x3c054, 0xffffffff, 0x00060005,
1446 	0x3c058, 0xffffffff, 0x00090008,
1447 	0x3c05c, 0xffffffff, 0x00010000,
1448 	0x3c060, 0xffffffff, 0x00030002,
1449 	0x3c064, 0xffffffff, 0x00040007,
1450 	0x3c068, 0xffffffff, 0x00060005,
1451 	0x3c06c, 0xffffffff, 0x00090008,
1452 	0x3c070, 0xffffffff, 0x00010000,
1453 	0x3c074, 0xffffffff, 0x00030002,
1454 	0x3c078, 0xffffffff, 0x00040007,
1455 	0x3c07c, 0xffffffff, 0x00060005,
1456 	0x3c080, 0xffffffff, 0x00090008,
1457 	0x3c084, 0xffffffff, 0x00010000,
1458 	0x3c088, 0xffffffff, 0x00030002,
1459 	0x3c08c, 0xffffffff, 0x00040007,
1460 	0x3c090, 0xffffffff, 0x00060005,
1461 	0x3c094, 0xffffffff, 0x00090008,
1462 	0x3c098, 0xffffffff, 0x00010000,
1463 	0x3c09c, 0xffffffff, 0x00030002,
1464 	0x3c0a0, 0xffffffff, 0x00040007,
1465 	0x3c0a4, 0xffffffff, 0x00060005,
1466 	0x3c0a8, 0xffffffff, 0x00090008,
1467 	0x3c0ac, 0xffffffff, 0x00010000,
1468 	0x3c0b0, 0xffffffff, 0x00030002,
1469 	0x3c0b4, 0xffffffff, 0x00040007,
1470 	0x3c0b8, 0xffffffff, 0x00060005,
1471 	0x3c0bc, 0xffffffff, 0x00090008,
1472 	0x3c0c0, 0xffffffff, 0x00010000,
1473 	0x3c0c4, 0xffffffff, 0x00030002,
1474 	0x3c0c8, 0xffffffff, 0x00040007,
1475 	0x3c0cc, 0xffffffff, 0x00060005,
1476 	0x3c0d0, 0xffffffff, 0x00090008,
1477 	0x3c0d4, 0xffffffff, 0x00010000,
1478 	0x3c0d8, 0xffffffff, 0x00030002,
1479 	0x3c0dc, 0xffffffff, 0x00040007,
1480 	0x3c0e0, 0xffffffff, 0x00060005,
1481 	0x3c0e4, 0xffffffff, 0x00090008,
1482 	0x3c0e8, 0xffffffff, 0x00010000,
1483 	0x3c0ec, 0xffffffff, 0x00030002,
1484 	0x3c0f0, 0xffffffff, 0x00040007,
1485 	0x3c0f4, 0xffffffff, 0x00060005,
1486 	0x3c0f8, 0xffffffff, 0x00090008,
1487 	0xc318, 0xffffffff, 0x00020200,
1488 	0x3350, 0xffffffff, 0x00000200,
1489 	0x15c0, 0xffffffff, 0x00000400,
1490 	0x55e8, 0xffffffff, 0x00000000,
1491 	0x2f50, 0xffffffff, 0x00000902,
1492 	0x3c000, 0xffffffff, 0x96940200,
1493 	0x8708, 0xffffffff, 0x00900100,
1494 	0xc424, 0xffffffff, 0x0020003f,
1495 	0x38, 0xffffffff, 0x0140001c,
1496 	0x3c, 0x000f0000, 0x000f0000,
1497 	0x220, 0xffffffff, 0xc060000c,
1498 	0x224, 0xc0000fff, 0x00000100,
1499 	0xf90, 0xffffffff, 0x00000100,
1500 	0xf98, 0x00000101, 0x00000000,
1501 	0x20a8, 0xffffffff, 0x00000104,
1502 	0x55e4, 0xff000fff, 0x00000100,
1503 	0x30cc, 0xc0000fff, 0x00000104,
1504 	0xc1e4, 0x00000001, 0x00000001,
1505 	0xd00c, 0xff000ff0, 0x00000100,
1506 	0xd80c, 0xff000ff0, 0x00000100
1507 };
1508 
1509 static const u32 godavari_golden_registers[] =
1510 {
1511 	0x55e4, 0xff607fff, 0xfc000100,
1512 	0x6ed8, 0x00010101, 0x00010000,
1513 	0x9830, 0xffffffff, 0x00000000,
1514 	0x98302, 0xf00fffff, 0x00000400,
1515 	0x6130, 0xffffffff, 0x00010000,
1516 	0x5bb0, 0x000000f0, 0x00000070,
1517 	0x5bc0, 0xf0311fff, 0x80300000,
1518 	0x98f8, 0x73773777, 0x12010001,
1519 	0x98fc, 0xffffffff, 0x00000010,
1520 	0x8030, 0x00001f0f, 0x0000100a,
1521 	0x2f48, 0x73773777, 0x12010001,
1522 	0x2408, 0x000fffff, 0x000c007f,
1523 	0x8a14, 0xf000003f, 0x00000007,
1524 	0x8b24, 0xffffffff, 0x00ff0fff,
1525 	0x30a04, 0x0000ff0f, 0x00000000,
1526 	0x28a4c, 0x07ffffff, 0x06000000,
1527 	0x4d8, 0x00000fff, 0x00000100,
1528 	0xd014, 0x00010000, 0x00810001,
1529 	0xd814, 0x00010000, 0x00810001,
1530 	0x3e78, 0x00000001, 0x00000002,
1531 	0xc768, 0x00000008, 0x00000008,
1532 	0xc770, 0x00000f00, 0x00000800,
1533 	0xc774, 0x00000f00, 0x00000800,
1534 	0xc798, 0x00ffffff, 0x00ff7fbf,
1535 	0xc79c, 0x00ffffff, 0x00ff7faf,
1536 	0x8c00, 0x000000ff, 0x00000001,
1537 	0x214f8, 0x01ff01ff, 0x00000002,
1538 	0x21498, 0x007ff800, 0x00200000,
1539 	0x2015c, 0xffffffff, 0x00000f40,
1540 	0x88c4, 0x001f3ae3, 0x00000082,
1541 	0x88d4, 0x0000001f, 0x00000010,
1542 	0x30934, 0xffffffff, 0x00000000
1543 };
1544 
1545 
1546 static void cik_init_golden_registers(struct radeon_device *rdev)
1547 {
1548 	switch (rdev->family) {
1549 	case CHIP_BONAIRE:
1550 		radeon_program_register_sequence(rdev,
1551 						 bonaire_mgcg_cgcg_init,
1552 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1553 		radeon_program_register_sequence(rdev,
1554 						 bonaire_golden_registers,
1555 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1556 		radeon_program_register_sequence(rdev,
1557 						 bonaire_golden_common_registers,
1558 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1559 		radeon_program_register_sequence(rdev,
1560 						 bonaire_golden_spm_registers,
1561 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1562 		break;
1563 	case CHIP_KABINI:
1564 		radeon_program_register_sequence(rdev,
1565 						 kalindi_mgcg_cgcg_init,
1566 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1567 		radeon_program_register_sequence(rdev,
1568 						 kalindi_golden_registers,
1569 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1570 		radeon_program_register_sequence(rdev,
1571 						 kalindi_golden_common_registers,
1572 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1573 		radeon_program_register_sequence(rdev,
1574 						 kalindi_golden_spm_registers,
1575 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1576 		break;
1577 	case CHIP_MULLINS:
1578 		radeon_program_register_sequence(rdev,
1579 						 kalindi_mgcg_cgcg_init,
1580 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1581 		radeon_program_register_sequence(rdev,
1582 						 godavari_golden_registers,
1583 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1584 		radeon_program_register_sequence(rdev,
1585 						 kalindi_golden_common_registers,
1586 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1587 		radeon_program_register_sequence(rdev,
1588 						 kalindi_golden_spm_registers,
1589 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1590 		break;
1591 	case CHIP_KAVERI:
1592 		radeon_program_register_sequence(rdev,
1593 						 spectre_mgcg_cgcg_init,
1594 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1595 		radeon_program_register_sequence(rdev,
1596 						 spectre_golden_registers,
1597 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1598 		radeon_program_register_sequence(rdev,
1599 						 spectre_golden_common_registers,
1600 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1601 		radeon_program_register_sequence(rdev,
1602 						 spectre_golden_spm_registers,
1603 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1604 		break;
1605 	case CHIP_HAWAII:
1606 		radeon_program_register_sequence(rdev,
1607 						 hawaii_mgcg_cgcg_init,
1608 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1609 		radeon_program_register_sequence(rdev,
1610 						 hawaii_golden_registers,
1611 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1612 		radeon_program_register_sequence(rdev,
1613 						 hawaii_golden_common_registers,
1614 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1615 		radeon_program_register_sequence(rdev,
1616 						 hawaii_golden_spm_registers,
1617 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1618 		break;
1619 	default:
1620 		break;
1621 	}
1622 }
1623 
1624 /**
1625  * cik_get_xclk - get the xclk
1626  *
1627  * @rdev: radeon_device pointer
1628  *
1629  * Returns the reference clock used by the gfx engine
1630  * (CIK).
1631  */
1632 u32 cik_get_xclk(struct radeon_device *rdev)
1633 {
1634         u32 reference_clock = rdev->clock.spll.reference_freq;
1635 
1636 	if (rdev->flags & RADEON_IS_IGP) {
1637 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1638 			return reference_clock / 2;
1639 	} else {
1640 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1641 			return reference_clock / 4;
1642 	}
1643 	return reference_clock;
1644 }
1645 
1646 /**
1647  * cik_mm_rdoorbell - read a doorbell dword
1648  *
1649  * @rdev: radeon_device pointer
1650  * @index: doorbell index
1651  *
1652  * Returns the value in the doorbell aperture at the
1653  * requested doorbell index (CIK).
1654  */
1655 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1656 {
1657 	if (index < rdev->doorbell.num_doorbells) {
1658 		return readl(rdev->doorbell.ptr + index);
1659 	} else {
1660 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1661 		return 0;
1662 	}
1663 }
1664 
1665 /**
1666  * cik_mm_wdoorbell - write a doorbell dword
1667  *
1668  * @rdev: radeon_device pointer
1669  * @index: doorbell index
1670  * @v: value to write
1671  *
1672  * Writes @v to the doorbell aperture at the
1673  * requested doorbell index (CIK).
1674  */
1675 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1676 {
1677 	if (index < rdev->doorbell.num_doorbells) {
1678 		writel(v, rdev->doorbell.ptr + index);
1679 	} else {
1680 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1681 	}
1682 }
1683 
1684 #define BONAIRE_IO_MC_REGS_SIZE 36
1685 
1686 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1687 {
1688 	{0x00000070, 0x04400000},
1689 	{0x00000071, 0x80c01803},
1690 	{0x00000072, 0x00004004},
1691 	{0x00000073, 0x00000100},
1692 	{0x00000074, 0x00ff0000},
1693 	{0x00000075, 0x34000000},
1694 	{0x00000076, 0x08000014},
1695 	{0x00000077, 0x00cc08ec},
1696 	{0x00000078, 0x00000400},
1697 	{0x00000079, 0x00000000},
1698 	{0x0000007a, 0x04090000},
1699 	{0x0000007c, 0x00000000},
1700 	{0x0000007e, 0x4408a8e8},
1701 	{0x0000007f, 0x00000304},
1702 	{0x00000080, 0x00000000},
1703 	{0x00000082, 0x00000001},
1704 	{0x00000083, 0x00000002},
1705 	{0x00000084, 0xf3e4f400},
1706 	{0x00000085, 0x052024e3},
1707 	{0x00000087, 0x00000000},
1708 	{0x00000088, 0x01000000},
1709 	{0x0000008a, 0x1c0a0000},
1710 	{0x0000008b, 0xff010000},
1711 	{0x0000008d, 0xffffefff},
1712 	{0x0000008e, 0xfff3efff},
1713 	{0x0000008f, 0xfff3efbf},
1714 	{0x00000092, 0xf7ffffff},
1715 	{0x00000093, 0xffffff7f},
1716 	{0x00000095, 0x00101101},
1717 	{0x00000096, 0x00000fff},
1718 	{0x00000097, 0x00116fff},
1719 	{0x00000098, 0x60010000},
1720 	{0x00000099, 0x10010000},
1721 	{0x0000009a, 0x00006000},
1722 	{0x0000009b, 0x00001000},
1723 	{0x0000009f, 0x00b48000}
1724 };
1725 
1726 #define HAWAII_IO_MC_REGS_SIZE 22
1727 
1728 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1729 {
1730 	{0x0000007d, 0x40000000},
1731 	{0x0000007e, 0x40180304},
1732 	{0x0000007f, 0x0000ff00},
1733 	{0x00000081, 0x00000000},
1734 	{0x00000083, 0x00000800},
1735 	{0x00000086, 0x00000000},
1736 	{0x00000087, 0x00000100},
1737 	{0x00000088, 0x00020100},
1738 	{0x00000089, 0x00000000},
1739 	{0x0000008b, 0x00040000},
1740 	{0x0000008c, 0x00000100},
1741 	{0x0000008e, 0xff010000},
1742 	{0x00000090, 0xffffefff},
1743 	{0x00000091, 0xfff3efff},
1744 	{0x00000092, 0xfff3efbf},
1745 	{0x00000093, 0xf7ffffff},
1746 	{0x00000094, 0xffffff7f},
1747 	{0x00000095, 0x00000fff},
1748 	{0x00000096, 0x00116fff},
1749 	{0x00000097, 0x60010000},
1750 	{0x00000098, 0x10010000},
1751 	{0x0000009f, 0x00c79000}
1752 };
1753 
1754 
1755 /**
1756  * cik_srbm_select - select specific register instances
1757  *
1758  * @rdev: radeon_device pointer
1759  * @me: selected ME (micro engine)
1760  * @pipe: pipe
1761  * @queue: queue
1762  * @vmid: VMID
1763  *
1764  * Switches the currently active registers instances.  Some
1765  * registers are instanced per VMID, others are instanced per
1766  * me/pipe/queue combination.
1767  */
1768 static void cik_srbm_select(struct radeon_device *rdev,
1769 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1770 {
1771 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1772 			     MEID(me & 0x3) |
1773 			     VMID(vmid & 0xf) |
1774 			     QUEUEID(queue & 0x7));
1775 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1776 }
1777 
1778 /* ucode loading */
1779 /**
1780  * ci_mc_load_microcode - load MC ucode into the hw
1781  *
1782  * @rdev: radeon_device pointer
1783  *
1784  * Load the GDDR MC ucode into the hw (CIK).
1785  * Returns 0 on success, error on failure.
1786  */
1787 int ci_mc_load_microcode(struct radeon_device *rdev)
1788 {
1789 	const __be32 *fw_data = NULL;
1790 	const __le32 *new_fw_data = NULL;
1791 	u32 running, blackout = 0;
1792 	u32 *io_mc_regs = NULL;
1793 	const __le32 *new_io_mc_regs = NULL;
1794 	int i, regs_size, ucode_size;
1795 
1796 	if (!rdev->mc_fw)
1797 		return -EINVAL;
1798 
1799 	if (rdev->new_fw) {
1800 		const struct mc_firmware_header_v1_0 *hdr =
1801 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1802 
1803 		radeon_ucode_print_mc_hdr(&hdr->header);
1804 
1805 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1806 		new_io_mc_regs = (const __le32 *)
1807 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1808 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1809 		new_fw_data = (const __le32 *)
1810 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1811 	} else {
1812 		ucode_size = rdev->mc_fw->datasize / 4;
1813 
1814 		switch (rdev->family) {
1815 		case CHIP_BONAIRE:
1816 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1817 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1818 			break;
1819 		case CHIP_HAWAII:
1820 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1821 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1822 			break;
1823 		default:
1824 			return -EINVAL;
1825 		}
1826 		fw_data = (const __be32 *)rdev->mc_fw->data;
1827 	}
1828 
1829 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1830 
1831 	if (running == 0) {
1832 		if (running) {
1833 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1834 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1835 		}
1836 
1837 		/* reset the engine and set to writable */
1838 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1839 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1840 
1841 		/* load mc io regs */
1842 		for (i = 0; i < regs_size; i++) {
1843 			if (rdev->new_fw) {
1844 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1845 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1846 			} else {
1847 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1848 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1849 			}
1850 		}
1851 		/* load the MC ucode */
1852 		for (i = 0; i < ucode_size; i++) {
1853 			if (rdev->new_fw)
1854 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1855 			else
1856 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1857 		}
1858 
1859 		/* put the engine back into the active state */
1860 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1861 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1862 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1863 
1864 		/* wait for training to complete */
1865 		for (i = 0; i < rdev->usec_timeout; i++) {
1866 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1867 				break;
1868 			udelay(1);
1869 		}
1870 		for (i = 0; i < rdev->usec_timeout; i++) {
1871 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1872 				break;
1873 			udelay(1);
1874 		}
1875 
1876 		if (running)
1877 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1878 	}
1879 
1880 	return 0;
1881 }
1882 
1883 /**
1884  * cik_init_microcode - load ucode images from disk
1885  *
1886  * @rdev: radeon_device pointer
1887  *
1888  * Use the firmware interface to load the ucode images into
1889  * the driver (not loaded into hw).
1890  * Returns 0 on success, error on failure.
1891  */
1892 static int cik_init_microcode(struct radeon_device *rdev)
1893 {
1894 	const char *chip_name;
1895 	const char *new_chip_name;
1896 	size_t pfp_req_size, me_req_size, ce_req_size,
1897 		mec_req_size, rlc_req_size, mc_req_size = 0,
1898 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1899 	char fw_name[30];
1900 	int new_fw = 0;
1901 	int err;
1902 	int num_fw;
1903 
1904 	DRM_DEBUG("\n");
1905 
1906 	switch (rdev->family) {
1907 	case CHIP_BONAIRE:
1908 		chip_name = "BONAIRE";
1909 		new_chip_name = "bonaire";
1910 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1911 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1912 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1913 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1914 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1915 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1916 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1917 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1918 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1919 		num_fw = 8;
1920 		break;
1921 	case CHIP_HAWAII:
1922 		chip_name = "HAWAII";
1923 		new_chip_name = "hawaii";
1924 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1925 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1926 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1927 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1928 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1929 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1930 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1931 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1932 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1933 		num_fw = 8;
1934 		break;
1935 	case CHIP_KAVERI:
1936 		chip_name = "KAVERI";
1937 		new_chip_name = "kaveri";
1938 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1939 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1940 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1941 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1942 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1943 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1944 		num_fw = 7;
1945 		break;
1946 	case CHIP_KABINI:
1947 		chip_name = "KABINI";
1948 		new_chip_name = "kabini";
1949 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1950 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1951 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1952 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1953 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1954 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1955 		num_fw = 6;
1956 		break;
1957 	case CHIP_MULLINS:
1958 		chip_name = "MULLINS";
1959 		new_chip_name = "mullins";
1960 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1961 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1962 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1963 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1964 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1965 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1966 		num_fw = 6;
1967 		break;
1968 	default: BUG();
1969 	}
1970 
1971 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1972 
1973 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1974 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1975 	if (err) {
1976 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1977 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1978 		if (err)
1979 			goto out;
1980 		if (rdev->pfp_fw->datasize != pfp_req_size) {
1981 			printk(KERN_ERR
1982 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1983 			       rdev->pfp_fw->datasize, fw_name);
1984 			err = -EINVAL;
1985 			goto out;
1986 		}
1987 	} else {
1988 		err = radeon_ucode_validate(rdev->pfp_fw);
1989 		if (err) {
1990 			printk(KERN_ERR
1991 			       "cik_fw: validation failed for firmware \"%s\"\n",
1992 			       fw_name);
1993 			goto out;
1994 		} else {
1995 			new_fw++;
1996 		}
1997 	}
1998 
1999 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
2000 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2001 	if (err) {
2002 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
2003 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2004 		if (err)
2005 			goto out;
2006 		if (rdev->me_fw->datasize != me_req_size) {
2007 			printk(KERN_ERR
2008 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2009 			       rdev->me_fw->datasize, fw_name);
2010 			err = -EINVAL;
2011 		}
2012 	} else {
2013 		err = radeon_ucode_validate(rdev->me_fw);
2014 		if (err) {
2015 			printk(KERN_ERR
2016 			       "cik_fw: validation failed for firmware \"%s\"\n",
2017 			       fw_name);
2018 			goto out;
2019 		} else {
2020 			new_fw++;
2021 		}
2022 	}
2023 
2024 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
2025 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2026 	if (err) {
2027 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
2028 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2029 		if (err)
2030 			goto out;
2031 		if (rdev->ce_fw->datasize != ce_req_size) {
2032 			printk(KERN_ERR
2033 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2034 			       rdev->ce_fw->datasize, fw_name);
2035 			err = -EINVAL;
2036 		}
2037 	} else {
2038 		err = radeon_ucode_validate(rdev->ce_fw);
2039 		if (err) {
2040 			printk(KERN_ERR
2041 			       "cik_fw: validation failed for firmware \"%s\"\n",
2042 			       fw_name);
2043 			goto out;
2044 		} else {
2045 			new_fw++;
2046 		}
2047 	}
2048 
2049 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name);
2050 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2051 	if (err) {
2052 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
2053 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2054 		if (err)
2055 			goto out;
2056 		if (rdev->mec_fw->datasize != mec_req_size) {
2057 			printk(KERN_ERR
2058 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2059 			       rdev->mec_fw->datasize, fw_name);
2060 			err = -EINVAL;
2061 		}
2062 	} else {
2063 		err = radeon_ucode_validate(rdev->mec_fw);
2064 		if (err) {
2065 			printk(KERN_ERR
2066 			       "cik_fw: validation failed for firmware \"%s\"\n",
2067 			       fw_name);
2068 			goto out;
2069 		} else {
2070 			new_fw++;
2071 		}
2072 	}
2073 
2074 	if (rdev->family == CHIP_KAVERI) {
2075 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name);
2076 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2077 		if (err) {
2078 			goto out;
2079 		} else {
2080 			err = radeon_ucode_validate(rdev->mec2_fw);
2081 			if (err) {
2082 				goto out;
2083 			} else {
2084 				new_fw++;
2085 			}
2086 		}
2087 	}
2088 
2089 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
2090 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2091 	if (err) {
2092 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
2093 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2094 		if (err)
2095 			goto out;
2096 		if (rdev->rlc_fw->datasize != rlc_req_size) {
2097 			printk(KERN_ERR
2098 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->rlc_fw->datasize, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->rlc_fw);
2104 		if (err) {
2105 			printk(KERN_ERR
2106 			       "cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name);
2115 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
2118 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->sdma_fw->datasize != sdma_req_size) {
2122 			printk(KERN_ERR
2123 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2124 			       rdev->sdma_fw->datasize, fw_name);
2125 			err = -EINVAL;
2126 		}
2127 	} else {
2128 		err = radeon_ucode_validate(rdev->sdma_fw);
2129 		if (err) {
2130 			printk(KERN_ERR
2131 			       "cik_fw: validation failed for firmware \"%s\"\n",
2132 			       fw_name);
2133 			goto out;
2134 		} else {
2135 			new_fw++;
2136 		}
2137 	}
2138 
2139 	/* No SMC, MC ucode on APUs */
2140 	if (!(rdev->flags & RADEON_IS_IGP)) {
2141 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
2142 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2143 		if (err) {
2144 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
2145 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2146 			if (err) {
2147 				ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
2148 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2149 				if (err)
2150 					goto out;
2151 			}
2152 			if ((rdev->mc_fw->datasize != mc_req_size) &&
2153 			    (rdev->mc_fw->datasize != mc2_req_size)){
2154 				printk(KERN_ERR
2155 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2156 				       rdev->mc_fw->datasize, fw_name);
2157 				err = -EINVAL;
2158 			}
2159 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
2160 		} else {
2161 			err = radeon_ucode_validate(rdev->mc_fw);
2162 			if (err) {
2163 				printk(KERN_ERR
2164 				       "cik_fw: validation failed for firmware \"%s\"\n",
2165 				       fw_name);
2166 				goto out;
2167 			} else {
2168 				new_fw++;
2169 			}
2170 		}
2171 
2172 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
2173 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2174 		if (err) {
2175 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
2176 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2177 			if (err) {
2178 				printk(KERN_ERR
2179 				       "smc: error loading firmware \"%s\"\n",
2180 				       fw_name);
2181 				release_firmware(rdev->smc_fw);
2182 				rdev->smc_fw = NULL;
2183 				err = 0;
2184 			} else if (rdev->smc_fw->datasize != smc_req_size) {
2185 				printk(KERN_ERR
2186 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2187 				       rdev->smc_fw->datasize, fw_name);
2188 				err = -EINVAL;
2189 			}
2190 		} else {
2191 			err = radeon_ucode_validate(rdev->smc_fw);
2192 			if (err) {
2193 				printk(KERN_ERR
2194 				       "cik_fw: validation failed for firmware \"%s\"\n",
2195 				       fw_name);
2196 				goto out;
2197 			} else {
2198 				new_fw++;
2199 			}
2200 		}
2201 	}
2202 
2203 	if (new_fw == 0) {
2204 		rdev->new_fw = false;
2205 	} else if (new_fw < num_fw) {
2206 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2207 		err = -EINVAL;
2208 	} else {
2209 		rdev->new_fw = true;
2210 	}
2211 
2212 out:
2213 	if (err) {
2214 		if (err != -EINVAL)
2215 			printk(KERN_ERR
2216 			       "cik_cp: Failed to load firmware \"%s\"\n",
2217 			       fw_name);
2218 		release_firmware(rdev->pfp_fw);
2219 		rdev->pfp_fw = NULL;
2220 		release_firmware(rdev->me_fw);
2221 		rdev->me_fw = NULL;
2222 		release_firmware(rdev->ce_fw);
2223 		rdev->ce_fw = NULL;
2224 		release_firmware(rdev->mec_fw);
2225 		rdev->mec_fw = NULL;
2226 		release_firmware(rdev->mec2_fw);
2227 		rdev->mec2_fw = NULL;
2228 		release_firmware(rdev->rlc_fw);
2229 		rdev->rlc_fw = NULL;
2230 		release_firmware(rdev->sdma_fw);
2231 		rdev->sdma_fw = NULL;
2232 		release_firmware(rdev->mc_fw);
2233 		rdev->mc_fw = NULL;
2234 		release_firmware(rdev->smc_fw);
2235 		rdev->smc_fw = NULL;
2236 	}
2237 	return err;
2238 }
2239 
2240 /**
2241  * cik_fini_microcode - drop the firmwares image references
2242  *
2243  * @rdev: radeon_device pointer
2244  *
2245  * Drop the pfp, me, mec, mec2, rlc, sdma, mc, smc and ce firmware image references.
2246  * Called at driver shutdown.
2247  */
2248 static void cik_fini_microcode(struct radeon_device *rdev)
2249 {
2250 	release_firmware(rdev->pfp_fw);
2251 	rdev->pfp_fw = NULL;
2252 	release_firmware(rdev->me_fw);
2253 	rdev->me_fw = NULL;
2254 	release_firmware(rdev->ce_fw);
2255 	rdev->ce_fw = NULL;
2256 	release_firmware(rdev->mec_fw);
2257 	rdev->mec_fw = NULL;
2258 	release_firmware(rdev->mec2_fw);
2259 	rdev->mec2_fw = NULL;
2260 	release_firmware(rdev->rlc_fw);
2261 	rdev->rlc_fw = NULL;
2262 	release_firmware(rdev->sdma_fw);
2263 	rdev->sdma_fw = NULL;
2264 	release_firmware(rdev->mc_fw);
2265 	rdev->mc_fw = NULL;
2266 	release_firmware(rdev->smc_fw);
2267 	rdev->smc_fw = NULL;
2268 }
2269 
2270 /*
2271  * Core functions
2272  */
2273 /**
2274  * cik_tiling_mode_table_init - init the hw tiling table
2275  *
2276  * @rdev: radeon_device pointer
2277  *
2278  * Starting with SI, the tiling setup is done globally in a
2279  * set of 32 tiling modes.  Rather than selecting each set of
2280  * parameters per surface as on older asics, we just select
2281  * which index in the tiling table we want to use, and the
2282  * surface uses those parameters (CIK).
2283  */
2284 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2285 {
2286 	const u32 num_tile_mode_states = 32;
2287 	const u32 num_secondary_tile_mode_states = 16;
2288 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2289 	u32 num_pipe_configs;
2290 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2291 		rdev->config.cik.max_shader_engines;
2292 
2293 	switch (rdev->config.cik.mem_row_size_in_kb) {
2294 	case 1:
2295 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2296 		break;
2297 	case 2:
2298 	default:
2299 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2300 		break;
2301 	case 4:
2302 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2303 		break;
2304 	}
2305 
2306 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2307 	if (num_pipe_configs > 8)
2308 		num_pipe_configs = 16;
2309 
2310 	if (num_pipe_configs == 16) {
2311 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2312 			switch (reg_offset) {
2313 			case 0:
2314 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2316 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2318 				break;
2319 			case 1:
2320 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2322 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2324 				break;
2325 			case 2:
2326 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2328 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2330 				break;
2331 			case 3:
2332 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2334 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2336 				break;
2337 			case 4:
2338 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2339 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2340 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 						 TILE_SPLIT(split_equal_to_row_size));
2342 				break;
2343 			case 5:
2344 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2345 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347 				break;
2348 			case 6:
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2350 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2351 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2353 				break;
2354 			case 7:
2355 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2356 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2357 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 						 TILE_SPLIT(split_equal_to_row_size));
2359 				break;
2360 			case 8:
2361 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2362 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2363 				break;
2364 			case 9:
2365 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2368 				break;
2369 			case 10:
2370 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374 				break;
2375 			case 11:
2376 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2377 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2378 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2379 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380 				break;
2381 			case 12:
2382 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2383 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2384 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386 				break;
2387 			case 13:
2388 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2389 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2391 				break;
2392 			case 14:
2393 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397 				break;
2398 			case 16:
2399 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2400 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2402 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2403 				break;
2404 			case 17:
2405 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2406 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2407 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409 				break;
2410 			case 27:
2411 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2412 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2414 				break;
2415 			case 28:
2416 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2418 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 				break;
2421 			case 29:
2422 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2423 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2424 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2425 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2426 				break;
2427 			case 30:
2428 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2429 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2430 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 				break;
2433 			default:
2434 				gb_tile_moden = 0;
2435 				break;
2436 			}
2437 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2438 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2439 		}
2440 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2441 			switch (reg_offset) {
2442 			case 0:
2443 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2445 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2446 						 NUM_BANKS(ADDR_SURF_16_BANK));
2447 				break;
2448 			case 1:
2449 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 						 NUM_BANKS(ADDR_SURF_16_BANK));
2453 				break;
2454 			case 2:
2455 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2457 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2458 						 NUM_BANKS(ADDR_SURF_16_BANK));
2459 				break;
2460 			case 3:
2461 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 						 NUM_BANKS(ADDR_SURF_16_BANK));
2465 				break;
2466 			case 4:
2467 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470 						 NUM_BANKS(ADDR_SURF_8_BANK));
2471 				break;
2472 			case 5:
2473 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476 						 NUM_BANKS(ADDR_SURF_4_BANK));
2477 				break;
2478 			case 6:
2479 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2481 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2482 						 NUM_BANKS(ADDR_SURF_2_BANK));
2483 				break;
2484 			case 8:
2485 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2487 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2488 						 NUM_BANKS(ADDR_SURF_16_BANK));
2489 				break;
2490 			case 9:
2491 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2493 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2494 						 NUM_BANKS(ADDR_SURF_16_BANK));
2495 				break;
2496 			case 10:
2497 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 						 NUM_BANKS(ADDR_SURF_16_BANK));
2501 				break;
2502 			case 11:
2503 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2506 						 NUM_BANKS(ADDR_SURF_8_BANK));
2507 				break;
2508 			case 12:
2509 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2512 						 NUM_BANKS(ADDR_SURF_4_BANK));
2513 				break;
2514 			case 13:
2515 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518 						 NUM_BANKS(ADDR_SURF_2_BANK));
2519 				break;
2520 			case 14:
2521 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 						 NUM_BANKS(ADDR_SURF_2_BANK));
2525 				break;
2526 			default:
2527 				gb_tile_moden = 0;
2528 				break;
2529 			}
2530 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2531 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2532 		}
2533 	} else if (num_pipe_configs == 8) {
2534 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2535 			switch (reg_offset) {
2536 			case 0:
2537 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2539 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2541 				break;
2542 			case 1:
2543 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2545 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2547 				break;
2548 			case 2:
2549 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2553 				break;
2554 			case 3:
2555 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2559 				break;
2560 			case 4:
2561 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2563 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564 						 TILE_SPLIT(split_equal_to_row_size));
2565 				break;
2566 			case 5:
2567 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2568 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2570 				break;
2571 			case 6:
2572 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2574 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2576 				break;
2577 			case 7:
2578 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2579 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2580 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581 						 TILE_SPLIT(split_equal_to_row_size));
2582 				break;
2583 			case 8:
2584 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2585 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2586 				break;
2587 			case 9:
2588 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2589 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2591 				break;
2592 			case 10:
2593 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2597 				break;
2598 			case 11:
2599 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2600 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2601 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2602 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2603 				break;
2604 			case 12:
2605 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2606 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2609 				break;
2610 			case 13:
2611 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2612 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2614 				break;
2615 			case 14:
2616 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2618 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 				break;
2621 			case 16:
2622 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2623 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2624 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2625 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626 				break;
2627 			case 17:
2628 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2629 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2630 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632 				break;
2633 			case 27:
2634 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2635 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2636 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2637 				break;
2638 			case 28:
2639 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2640 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2641 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 				break;
2644 			case 29:
2645 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2646 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2647 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2648 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2649 				break;
2650 			case 30:
2651 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2652 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2653 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2654 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2655 				break;
2656 			default:
2657 				gb_tile_moden = 0;
2658 				break;
2659 			}
2660 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2661 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2662 		}
2663 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2664 			switch (reg_offset) {
2665 			case 0:
2666 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2668 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2669 						 NUM_BANKS(ADDR_SURF_16_BANK));
2670 				break;
2671 			case 1:
2672 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2674 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2675 						 NUM_BANKS(ADDR_SURF_16_BANK));
2676 				break;
2677 			case 2:
2678 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2681 						 NUM_BANKS(ADDR_SURF_16_BANK));
2682 				break;
2683 			case 3:
2684 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2687 						 NUM_BANKS(ADDR_SURF_16_BANK));
2688 				break;
2689 			case 4:
2690 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2693 						 NUM_BANKS(ADDR_SURF_8_BANK));
2694 				break;
2695 			case 5:
2696 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2699 						 NUM_BANKS(ADDR_SURF_4_BANK));
2700 				break;
2701 			case 6:
2702 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2705 						 NUM_BANKS(ADDR_SURF_2_BANK));
2706 				break;
2707 			case 8:
2708 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2710 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2711 						 NUM_BANKS(ADDR_SURF_16_BANK));
2712 				break;
2713 			case 9:
2714 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2716 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2717 						 NUM_BANKS(ADDR_SURF_16_BANK));
2718 				break;
2719 			case 10:
2720 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723 						 NUM_BANKS(ADDR_SURF_16_BANK));
2724 				break;
2725 			case 11:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2729 						 NUM_BANKS(ADDR_SURF_16_BANK));
2730 				break;
2731 			case 12:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2735 						 NUM_BANKS(ADDR_SURF_8_BANK));
2736 				break;
2737 			case 13:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2741 						 NUM_BANKS(ADDR_SURF_4_BANK));
2742 				break;
2743 			case 14:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2747 						 NUM_BANKS(ADDR_SURF_2_BANK));
2748 				break;
2749 			default:
2750 				gb_tile_moden = 0;
2751 				break;
2752 			}
2753 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2754 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2755 		}
2756 	} else if (num_pipe_configs == 4) {
2757 		if (num_rbs == 4) {
2758 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2759 				switch (reg_offset) {
2760 				case 0:
2761 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2763 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2765 					break;
2766 				case 1:
2767 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2768 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2769 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2771 					break;
2772 				case 2:
2773 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2777 					break;
2778 				case 3:
2779 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2780 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2781 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2783 					break;
2784 				case 4:
2785 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2787 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788 							 TILE_SPLIT(split_equal_to_row_size));
2789 					break;
2790 				case 5:
2791 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2794 					break;
2795 				case 6:
2796 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2800 					break;
2801 				case 7:
2802 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2803 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 							 TILE_SPLIT(split_equal_to_row_size));
2806 					break;
2807 				case 8:
2808 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2809 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2810 					break;
2811 				case 9:
2812 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2813 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2815 					break;
2816 				case 10:
2817 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2818 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2819 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2821 					break;
2822 				case 11:
2823 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2826 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2827 					break;
2828 				case 12:
2829 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2830 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833 					break;
2834 				case 13:
2835 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2836 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2837 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2838 					break;
2839 				case 14:
2840 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2841 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2842 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844 					break;
2845 				case 16:
2846 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2847 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2849 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2850 					break;
2851 				case 17:
2852 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2853 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2854 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2856 					break;
2857 				case 27:
2858 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2859 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2860 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2861 					break;
2862 				case 28:
2863 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2864 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2865 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2867 					break;
2868 				case 29:
2869 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2870 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2871 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2872 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2873 					break;
2874 				case 30:
2875 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2876 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2877 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2879 					break;
2880 				default:
2881 					gb_tile_moden = 0;
2882 					break;
2883 				}
2884 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2885 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2886 			}
2887 		} else if (num_rbs < 4) {
2888 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2889 				switch (reg_offset) {
2890 				case 0:
2891 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2894 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2895 					break;
2896 				case 1:
2897 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2900 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2901 					break;
2902 				case 2:
2903 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2905 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2906 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2907 					break;
2908 				case 3:
2909 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2911 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2912 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2913 					break;
2914 				case 4:
2915 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2917 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2918 							 TILE_SPLIT(split_equal_to_row_size));
2919 					break;
2920 				case 5:
2921 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2923 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924 					break;
2925 				case 6:
2926 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2927 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2928 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2929 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2930 					break;
2931 				case 7:
2932 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2933 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2934 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2935 							 TILE_SPLIT(split_equal_to_row_size));
2936 					break;
2937 				case 8:
2938 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2939 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2940 					break;
2941 				case 9:
2942 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2943 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2944 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2945 					break;
2946 				case 10:
2947 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2948 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2950 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 					break;
2952 				case 11:
2953 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2955 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2956 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2957 					break;
2958 				case 12:
2959 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2960 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2961 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2962 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963 					break;
2964 				case 13:
2965 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2966 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2967 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2968 					break;
2969 				case 14:
2970 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2971 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2973 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974 					break;
2975 				case 16:
2976 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2977 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2978 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2979 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2980 					break;
2981 				case 17:
2982 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2983 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2985 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2986 					break;
2987 				case 27:
2988 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2989 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2990 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2991 					break;
2992 				case 28:
2993 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2994 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2995 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2996 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 					break;
2998 				case 29:
2999 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3000 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3001 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3002 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3003 					break;
3004 				case 30:
3005 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3006 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3007 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3008 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3009 					break;
3010 				default:
3011 					gb_tile_moden = 0;
3012 					break;
3013 				}
3014 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3015 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3016 			}
3017 		}
3018 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3019 			switch (reg_offset) {
3020 			case 0:
3021 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3023 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024 						 NUM_BANKS(ADDR_SURF_16_BANK));
3025 				break;
3026 			case 1:
3027 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3029 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3030 						 NUM_BANKS(ADDR_SURF_16_BANK));
3031 				break;
3032 			case 2:
3033 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3035 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3036 						 NUM_BANKS(ADDR_SURF_16_BANK));
3037 				break;
3038 			case 3:
3039 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3041 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3042 						 NUM_BANKS(ADDR_SURF_16_BANK));
3043 				break;
3044 			case 4:
3045 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3047 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3048 						 NUM_BANKS(ADDR_SURF_16_BANK));
3049 				break;
3050 			case 5:
3051 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3054 						 NUM_BANKS(ADDR_SURF_8_BANK));
3055 				break;
3056 			case 6:
3057 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3058 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3059 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3060 						 NUM_BANKS(ADDR_SURF_4_BANK));
3061 				break;
3062 			case 8:
3063 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3064 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3065 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066 						 NUM_BANKS(ADDR_SURF_16_BANK));
3067 				break;
3068 			case 9:
3069 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3070 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3071 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3072 						 NUM_BANKS(ADDR_SURF_16_BANK));
3073 				break;
3074 			case 10:
3075 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3077 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3078 						 NUM_BANKS(ADDR_SURF_16_BANK));
3079 				break;
3080 			case 11:
3081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084 						 NUM_BANKS(ADDR_SURF_16_BANK));
3085 				break;
3086 			case 12:
3087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3090 						 NUM_BANKS(ADDR_SURF_16_BANK));
3091 				break;
3092 			case 13:
3093 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096 						 NUM_BANKS(ADDR_SURF_8_BANK));
3097 				break;
3098 			case 14:
3099 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3102 						 NUM_BANKS(ADDR_SURF_4_BANK));
3103 				break;
3104 			default:
3105 				gb_tile_moden = 0;
3106 				break;
3107 			}
3108 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3109 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3110 		}
3111 	} else if (num_pipe_configs == 2) {
3112 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3113 			switch (reg_offset) {
3114 			case 0:
3115 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3117 						 PIPE_CONFIG(ADDR_SURF_P2) |
3118 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3119 				break;
3120 			case 1:
3121 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3123 						 PIPE_CONFIG(ADDR_SURF_P2) |
3124 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3125 				break;
3126 			case 2:
3127 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3128 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3129 						 PIPE_CONFIG(ADDR_SURF_P2) |
3130 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3131 				break;
3132 			case 3:
3133 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3134 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3135 						 PIPE_CONFIG(ADDR_SURF_P2) |
3136 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3137 				break;
3138 			case 4:
3139 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3140 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3141 						 PIPE_CONFIG(ADDR_SURF_P2) |
3142 						 TILE_SPLIT(split_equal_to_row_size));
3143 				break;
3144 			case 5:
3145 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3146 						 PIPE_CONFIG(ADDR_SURF_P2) |
3147 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3148 				break;
3149 			case 6:
3150 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3151 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3152 						 PIPE_CONFIG(ADDR_SURF_P2) |
3153 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3154 				break;
3155 			case 7:
3156 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3157 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3158 						 PIPE_CONFIG(ADDR_SURF_P2) |
3159 						 TILE_SPLIT(split_equal_to_row_size));
3160 				break;
3161 			case 8:
3162 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3163 						PIPE_CONFIG(ADDR_SURF_P2);
3164 				break;
3165 			case 9:
3166 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3167 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3168 						 PIPE_CONFIG(ADDR_SURF_P2));
3169 				break;
3170 			case 10:
3171 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3172 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3173 						 PIPE_CONFIG(ADDR_SURF_P2) |
3174 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3175 				break;
3176 			case 11:
3177 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3178 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3179 						 PIPE_CONFIG(ADDR_SURF_P2) |
3180 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3181 				break;
3182 			case 12:
3183 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3184 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3185 						 PIPE_CONFIG(ADDR_SURF_P2) |
3186 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3187 				break;
3188 			case 13:
3189 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3190 						 PIPE_CONFIG(ADDR_SURF_P2) |
3191 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3192 				break;
3193 			case 14:
3194 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3195 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3196 						 PIPE_CONFIG(ADDR_SURF_P2) |
3197 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3198 				break;
3199 			case 16:
3200 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3202 						 PIPE_CONFIG(ADDR_SURF_P2) |
3203 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3204 				break;
3205 			case 17:
3206 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3207 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3208 						 PIPE_CONFIG(ADDR_SURF_P2) |
3209 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210 				break;
3211 			case 27:
3212 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3213 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3214 						 PIPE_CONFIG(ADDR_SURF_P2));
3215 				break;
3216 			case 28:
3217 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3218 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3219 						 PIPE_CONFIG(ADDR_SURF_P2) |
3220 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3221 				break;
3222 			case 29:
3223 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3225 						 PIPE_CONFIG(ADDR_SURF_P2) |
3226 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3227 				break;
3228 			case 30:
3229 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3230 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3231 						 PIPE_CONFIG(ADDR_SURF_P2) |
3232 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233 				break;
3234 			default:
3235 				gb_tile_moden = 0;
3236 				break;
3237 			}
3238 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3239 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3240 		}
3241 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3242 			switch (reg_offset) {
3243 			case 0:
3244 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3245 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3246 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247 						 NUM_BANKS(ADDR_SURF_16_BANK));
3248 				break;
3249 			case 1:
3250 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3251 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3252 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253 						 NUM_BANKS(ADDR_SURF_16_BANK));
3254 				break;
3255 			case 2:
3256 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3258 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259 						 NUM_BANKS(ADDR_SURF_16_BANK));
3260 				break;
3261 			case 3:
3262 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3263 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3264 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265 						 NUM_BANKS(ADDR_SURF_16_BANK));
3266 				break;
3267 			case 4:
3268 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3269 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3270 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3271 						 NUM_BANKS(ADDR_SURF_16_BANK));
3272 				break;
3273 			case 5:
3274 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3275 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3276 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3277 						 NUM_BANKS(ADDR_SURF_16_BANK));
3278 				break;
3279 			case 6:
3280 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283 						 NUM_BANKS(ADDR_SURF_8_BANK));
3284 				break;
3285 			case 8:
3286 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3287 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3288 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3289 						 NUM_BANKS(ADDR_SURF_16_BANK));
3290 				break;
3291 			case 9:
3292 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3293 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3294 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3295 						 NUM_BANKS(ADDR_SURF_16_BANK));
3296 				break;
3297 			case 10:
3298 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3299 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3300 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3301 						 NUM_BANKS(ADDR_SURF_16_BANK));
3302 				break;
3303 			case 11:
3304 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 						 NUM_BANKS(ADDR_SURF_16_BANK));
3308 				break;
3309 			case 12:
3310 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313 						 NUM_BANKS(ADDR_SURF_16_BANK));
3314 				break;
3315 			case 13:
3316 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3318 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 						 NUM_BANKS(ADDR_SURF_16_BANK));
3320 				break;
3321 			case 14:
3322 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3325 						 NUM_BANKS(ADDR_SURF_8_BANK));
3326 				break;
3327 			default:
3328 				gb_tile_moden = 0;
3329 				break;
3330 			}
3331 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3332 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3333 		}
3334 	} else
3335 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3336 }
3337 
3338 /**
3339  * cik_select_se_sh - select which SE, SH to address
3340  *
3341  * @rdev: radeon_device pointer
3342  * @se_num: shader engine to address
3343  * @sh_num: sh block to address
3344  *
3345  * Select which SE, SH combinations to address. Certain
3346  * registers are instanced per SE or SH.  0xffffffff means
3347  * broadcast to all SEs or SHs (CIK).
3348  */
3349 static void cik_select_se_sh(struct radeon_device *rdev,
3350 			     u32 se_num, u32 sh_num)
3351 {
3352 	u32 data = INSTANCE_BROADCAST_WRITES;
3353 
3354 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3355 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3356 	else if (se_num == 0xffffffff)
3357 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3358 	else if (sh_num == 0xffffffff)
3359 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3360 	else
3361 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3362 	WREG32(GRBM_GFX_INDEX, data);
3363 }
3364 
3365 /**
3366  * cik_create_bitmask - create a bitmask
3367  *
3368  * @bit_width: length of the mask
3369  *
3370  * create a variable length bit mask (CIK).
3371  * Returns the bitmask.
3372  */
3373 static u32 cik_create_bitmask(u32 bit_width)
3374 {
3375 	u32 i, mask = 0;
3376 
3377 	for (i = 0; i < bit_width; i++) {
3378 		mask <<= 1;
3379 		mask |= 1;
3380 	}
3381 	return mask;
3382 }
3383 
3384 /**
3385  * cik_get_rb_disabled - computes the mask of disabled RBs
3386  *
3387  * @rdev: radeon_device pointer
3388  * @max_rb_num: max RBs (render backends) for the asic
3389  * @se_num: number of SEs (shader engines) for the asic
3390  * @sh_per_se: number of SH blocks per SE for the asic
3391  *
3392  * Calculates the bitmask of disabled RBs (CIK).
3393  * Returns the disabled RB bitmask.
3394  */
3395 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3396 			      u32 max_rb_num_per_se,
3397 			      u32 sh_per_se)
3398 {
3399 	u32 data, mask;
3400 
3401 	data = RREG32(CC_RB_BACKEND_DISABLE);
3402 	if (data & 1)
3403 		data &= BACKEND_DISABLE_MASK;
3404 	else
3405 		data = 0;
3406 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3407 
3408 	data >>= BACKEND_DISABLE_SHIFT;
3409 
3410 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3411 
3412 	return data & mask;
3413 }
3414 
3415 /**
3416  * cik_setup_rb - setup the RBs on the asic
3417  *
3418  * @rdev: radeon_device pointer
3419  * @se_num: number of SEs (shader engines) for the asic
3420  * @sh_per_se: number of SH blocks per SE for the asic
3421  * @max_rb_num: max RBs (render backends) for the asic
3422  *
3423  * Configures per-SE/SH RB registers (CIK).
3424  */
3425 static void cik_setup_rb(struct radeon_device *rdev,
3426 			 u32 se_num, u32 sh_per_se,
3427 			 u32 max_rb_num_per_se)
3428 {
3429 	int i, j;
3430 	u32 data, mask;
3431 	u32 disabled_rbs = 0;
3432 	u32 enabled_rbs = 0;
3433 
3434 	for (i = 0; i < se_num; i++) {
3435 		for (j = 0; j < sh_per_se; j++) {
3436 			cik_select_se_sh(rdev, i, j);
3437 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3438 			if (rdev->family == CHIP_HAWAII)
3439 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3440 			else
3441 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3442 		}
3443 	}
3444 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3445 
3446 	mask = 1;
3447 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3448 		if (!(disabled_rbs & mask))
3449 			enabled_rbs |= mask;
3450 		mask <<= 1;
3451 	}
3452 
3453 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3454 
3455 	for (i = 0; i < se_num; i++) {
3456 		cik_select_se_sh(rdev, i, 0xffffffff);
3457 		data = 0;
3458 		for (j = 0; j < sh_per_se; j++) {
3459 			switch (enabled_rbs & 3) {
3460 			case 0:
3461 				if (j == 0)
3462 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3463 				else
3464 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3465 				break;
3466 			case 1:
3467 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3468 				break;
3469 			case 2:
3470 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3471 				break;
3472 			case 3:
3473 			default:
3474 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3475 				break;
3476 			}
3477 			enabled_rbs >>= 2;
3478 		}
3479 		WREG32(PA_SC_RASTER_CONFIG, data);
3480 	}
3481 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3482 }
3483 
3484 /**
3485  * cik_gpu_init - setup the 3D engine
3486  *
3487  * @rdev: radeon_device pointer
3488  *
3489  * Configures the 3D engine and tiling configuration
3490  * registers so that the 3D engine is usable.
3491  */
3492 static void cik_gpu_init(struct radeon_device *rdev)
3493 {
3494 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3495 	u32 mc_shared_chmap, mc_arb_ramcfg;
3496 	u32 hdp_host_path_cntl;
3497 	u32 tmp;
3498 	int i, j;
3499 
3500 	switch (rdev->family) {
3501 	case CHIP_BONAIRE:
3502 		rdev->config.cik.max_shader_engines = 2;
3503 		rdev->config.cik.max_tile_pipes = 4;
3504 		rdev->config.cik.max_cu_per_sh = 7;
3505 		rdev->config.cik.max_sh_per_se = 1;
3506 		rdev->config.cik.max_backends_per_se = 2;
3507 		rdev->config.cik.max_texture_channel_caches = 4;
3508 		rdev->config.cik.max_gprs = 256;
3509 		rdev->config.cik.max_gs_threads = 32;
3510 		rdev->config.cik.max_hw_contexts = 8;
3511 
3512 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3513 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3514 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3515 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3516 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3517 		break;
3518 	case CHIP_HAWAII:
3519 		rdev->config.cik.max_shader_engines = 4;
3520 		rdev->config.cik.max_tile_pipes = 16;
3521 		rdev->config.cik.max_cu_per_sh = 11;
3522 		rdev->config.cik.max_sh_per_se = 1;
3523 		rdev->config.cik.max_backends_per_se = 4;
3524 		rdev->config.cik.max_texture_channel_caches = 16;
3525 		rdev->config.cik.max_gprs = 256;
3526 		rdev->config.cik.max_gs_threads = 32;
3527 		rdev->config.cik.max_hw_contexts = 8;
3528 
3529 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3530 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3531 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3532 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3533 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3534 		break;
3535 	case CHIP_KAVERI:
3536 		rdev->config.cik.max_shader_engines = 1;
3537 		rdev->config.cik.max_tile_pipes = 4;
3538 		if ((rdev->pdev->device == 0x1304) ||
3539 		    (rdev->pdev->device == 0x1305) ||
3540 		    (rdev->pdev->device == 0x130C) ||
3541 		    (rdev->pdev->device == 0x130F) ||
3542 		    (rdev->pdev->device == 0x1310) ||
3543 		    (rdev->pdev->device == 0x1311) ||
3544 		    (rdev->pdev->device == 0x131C)) {
3545 			rdev->config.cik.max_cu_per_sh = 8;
3546 			rdev->config.cik.max_backends_per_se = 2;
3547 		} else if ((rdev->pdev->device == 0x1309) ||
3548 			   (rdev->pdev->device == 0x130A) ||
3549 			   (rdev->pdev->device == 0x130D) ||
3550 			   (rdev->pdev->device == 0x1313) ||
3551 			   (rdev->pdev->device == 0x131D)) {
3552 			rdev->config.cik.max_cu_per_sh = 6;
3553 			rdev->config.cik.max_backends_per_se = 2;
3554 		} else if ((rdev->pdev->device == 0x1306) ||
3555 			   (rdev->pdev->device == 0x1307) ||
3556 			   (rdev->pdev->device == 0x130B) ||
3557 			   (rdev->pdev->device == 0x130E) ||
3558 			   (rdev->pdev->device == 0x1315) ||
3559 			   (rdev->pdev->device == 0x1318) ||
3560 			   (rdev->pdev->device == 0x131B)) {
3561 			rdev->config.cik.max_cu_per_sh = 4;
3562 			rdev->config.cik.max_backends_per_se = 1;
3563 		} else {
3564 			rdev->config.cik.max_cu_per_sh = 3;
3565 			rdev->config.cik.max_backends_per_se = 1;
3566 		}
3567 		rdev->config.cik.max_sh_per_se = 1;
3568 		rdev->config.cik.max_texture_channel_caches = 4;
3569 		rdev->config.cik.max_gprs = 256;
3570 		rdev->config.cik.max_gs_threads = 16;
3571 		rdev->config.cik.max_hw_contexts = 8;
3572 
3573 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3574 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3575 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3576 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3577 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3578 		break;
3579 	case CHIP_KABINI:
3580 	case CHIP_MULLINS:
3581 	default:
3582 		rdev->config.cik.max_shader_engines = 1;
3583 		rdev->config.cik.max_tile_pipes = 2;
3584 		rdev->config.cik.max_cu_per_sh = 2;
3585 		rdev->config.cik.max_sh_per_se = 1;
3586 		rdev->config.cik.max_backends_per_se = 1;
3587 		rdev->config.cik.max_texture_channel_caches = 2;
3588 		rdev->config.cik.max_gprs = 256;
3589 		rdev->config.cik.max_gs_threads = 16;
3590 		rdev->config.cik.max_hw_contexts = 8;
3591 
3592 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3593 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3594 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3595 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3596 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3597 		break;
3598 	}
3599 
3600 	/* Initialize HDP */
3601 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3602 		WREG32((0x2c14 + j), 0x00000000);
3603 		WREG32((0x2c18 + j), 0x00000000);
3604 		WREG32((0x2c1c + j), 0x00000000);
3605 		WREG32((0x2c20 + j), 0x00000000);
3606 		WREG32((0x2c24 + j), 0x00000000);
3607 	}
3608 
3609 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3610 
3611 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3612 
3613 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3614 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3615 
3616 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3617 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3618 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3619 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3620 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3621 		rdev->config.cik.mem_row_size_in_kb = 4;
3622 	/* XXX use MC settings? */
3623 	rdev->config.cik.shader_engine_tile_size = 32;
3624 	rdev->config.cik.num_gpus = 1;
3625 	rdev->config.cik.multi_gpu_tile_size = 64;
3626 
3627 	/* fix up row size */
3628 	gb_addr_config &= ~ROW_SIZE_MASK;
3629 	switch (rdev->config.cik.mem_row_size_in_kb) {
3630 	case 1:
3631 	default:
3632 		gb_addr_config |= ROW_SIZE(0);
3633 		break;
3634 	case 2:
3635 		gb_addr_config |= ROW_SIZE(1);
3636 		break;
3637 	case 4:
3638 		gb_addr_config |= ROW_SIZE(2);
3639 		break;
3640 	}
3641 
3642 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3643 	 * not have bank info, so create a custom tiling dword.
3644 	 * bits 3:0   num_pipes
3645 	 * bits 7:4   num_banks
3646 	 * bits 11:8  group_size
3647 	 * bits 15:12 row_size
3648 	 */
3649 	rdev->config.cik.tile_config = 0;
3650 	switch (rdev->config.cik.num_tile_pipes) {
3651 	case 1:
3652 		rdev->config.cik.tile_config |= (0 << 0);
3653 		break;
3654 	case 2:
3655 		rdev->config.cik.tile_config |= (1 << 0);
3656 		break;
3657 	case 4:
3658 		rdev->config.cik.tile_config |= (2 << 0);
3659 		break;
3660 	case 8:
3661 	default:
3662 		/* XXX what about 12? */
3663 		rdev->config.cik.tile_config |= (3 << 0);
3664 		break;
3665 	}
3666 	rdev->config.cik.tile_config |=
3667 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3668 	rdev->config.cik.tile_config |=
3669 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3670 	rdev->config.cik.tile_config |=
3671 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3672 
3673 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3674 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3675 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3676 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3677 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3678 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3679 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3680 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3681 
3682 	cik_tiling_mode_table_init(rdev);
3683 
3684 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3685 		     rdev->config.cik.max_sh_per_se,
3686 		     rdev->config.cik.max_backends_per_se);
3687 
3688 	rdev->config.cik.active_cus = 0;
3689 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3690 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3691 			rdev->config.cik.active_cus +=
3692 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3693 		}
3694 	}
3695 
3696 	/* set HW defaults for 3D engine */
3697 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3698 
3699 	WREG32(SX_DEBUG_1, 0x20);
3700 
3701 	WREG32(TA_CNTL_AUX, 0x00010000);
3702 
3703 	tmp = RREG32(SPI_CONFIG_CNTL);
3704 	tmp |= 0x03000000;
3705 	WREG32(SPI_CONFIG_CNTL, tmp);
3706 
3707 	WREG32(SQ_CONFIG, 1);
3708 
3709 	WREG32(DB_DEBUG, 0);
3710 
3711 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3712 	tmp |= 0x00000400;
3713 	WREG32(DB_DEBUG2, tmp);
3714 
3715 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3716 	tmp |= 0x00020200;
3717 	WREG32(DB_DEBUG3, tmp);
3718 
3719 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3720 	tmp |= 0x00018208;
3721 	WREG32(CB_HW_CONTROL, tmp);
3722 
3723 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3724 
3725 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3726 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3727 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3728 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3729 
3730 	WREG32(VGT_NUM_INSTANCES, 1);
3731 
3732 	WREG32(CP_PERFMON_CNTL, 0);
3733 
3734 	WREG32(SQ_CONFIG, 0);
3735 
3736 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3737 					  FORCE_EOV_MAX_REZ_CNT(255)));
3738 
3739 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3740 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3741 
3742 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3743 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3744 
3745 	tmp = RREG32(HDP_MISC_CNTL);
3746 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3747 	WREG32(HDP_MISC_CNTL, tmp);
3748 
3749 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3750 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3751 
3752 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3753 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3754 
3755 	udelay(50);
3756 }
3757 
3758 /*
3759  * GPU scratch registers helpers function.
3760  */
3761 /**
3762  * cik_scratch_init - setup driver info for CP scratch regs
3763  *
3764  * @rdev: radeon_device pointer
3765  *
3766  * Set up the number and offset of the CP scratch registers.
3767  * NOTE: use of CP scratch registers is a legacy inferface and
3768  * is not used by default on newer asics (r6xx+).  On newer asics,
3769  * memory buffers are used for fences rather than scratch regs.
3770  */
3771 static void cik_scratch_init(struct radeon_device *rdev)
3772 {
3773 	int i;
3774 
3775 	rdev->scratch.num_reg = 7;
3776 	rdev->scratch.reg_base = SCRATCH_REG0;
3777 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3778 		rdev->scratch.free[i] = true;
3779 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3780 	}
3781 }
3782 
3783 /**
3784  * cik_ring_test - basic gfx ring test
3785  *
3786  * @rdev: radeon_device pointer
3787  * @ring: radeon_ring structure holding ring information
3788  *
3789  * Allocate a scratch register and write to it using the gfx ring (CIK).
3790  * Provides a basic gfx ring test to verify that the ring is working.
3791  * Used by cik_cp_gfx_resume();
3792  * Returns 0 on success, error on failure.
3793  */
3794 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3795 {
3796 	uint32_t scratch;
3797 	uint32_t tmp = 0;
3798 	unsigned i;
3799 	int r;
3800 
3801 	r = radeon_scratch_get(rdev, &scratch);
3802 	if (r) {
3803 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3804 		return r;
3805 	}
3806 	WREG32(scratch, 0xCAFEDEAD);
3807 	r = radeon_ring_lock(rdev, ring, 3);
3808 	if (r) {
3809 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3810 		radeon_scratch_free(rdev, scratch);
3811 		return r;
3812 	}
3813 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3814 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3815 	radeon_ring_write(ring, 0xDEADBEEF);
3816 	radeon_ring_unlock_commit(rdev, ring, false);
3817 
3818 	for (i = 0; i < rdev->usec_timeout; i++) {
3819 		tmp = RREG32(scratch);
3820 		if (tmp == 0xDEADBEEF)
3821 			break;
3822 		DRM_UDELAY(1);
3823 	}
3824 	if (i < rdev->usec_timeout) {
3825 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3826 	} else {
3827 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3828 			  ring->idx, scratch, tmp);
3829 		r = -EINVAL;
3830 	}
3831 	radeon_scratch_free(rdev, scratch);
3832 	return r;
3833 }
3834 
3835 /**
3836  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3837  *
3838  * @rdev: radeon_device pointer
3839  * @ridx: radeon ring index
3840  *
3841  * Emits an hdp flush on the cp.
3842  */
3843 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3844 				       int ridx)
3845 {
3846 	struct radeon_ring *ring = &rdev->ring[ridx];
3847 	u32 ref_and_mask;
3848 
3849 	switch (ring->idx) {
3850 	case CAYMAN_RING_TYPE_CP1_INDEX:
3851 	case CAYMAN_RING_TYPE_CP2_INDEX:
3852 	default:
3853 		switch (ring->me) {
3854 		case 0:
3855 			ref_and_mask = CP2 << ring->pipe;
3856 			break;
3857 		case 1:
3858 			ref_and_mask = CP6 << ring->pipe;
3859 			break;
3860 		default:
3861 			return;
3862 		}
3863 		break;
3864 	case RADEON_RING_TYPE_GFX_INDEX:
3865 		ref_and_mask = CP0;
3866 		break;
3867 	}
3868 
3869 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3870 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3871 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3872 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3873 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3874 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3875 	radeon_ring_write(ring, ref_and_mask);
3876 	radeon_ring_write(ring, ref_and_mask);
3877 	radeon_ring_write(ring, 0x20); /* poll interval */
3878 }
3879 
3880 /**
3881  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3882  *
3883  * @rdev: radeon_device pointer
3884  * @fence: radeon fence object
3885  *
3886  * Emits a fence sequnce number on the gfx ring and flushes
3887  * GPU caches.
3888  */
3889 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3890 			     struct radeon_fence *fence)
3891 {
3892 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3893 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3894 
3895 	/* EVENT_WRITE_EOP - flush caches, send int */
3896 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3897 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3898 				 EOP_TC_ACTION_EN |
3899 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3900 				 EVENT_INDEX(5)));
3901 	radeon_ring_write(ring, addr & 0xfffffffc);
3902 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3903 	radeon_ring_write(ring, fence->seq);
3904 	radeon_ring_write(ring, 0);
3905 }
3906 
3907 /**
3908  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3909  *
3910  * @rdev: radeon_device pointer
3911  * @fence: radeon fence object
3912  *
3913  * Emits a fence sequnce number on the compute ring and flushes
3914  * GPU caches.
3915  */
3916 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3917 				 struct radeon_fence *fence)
3918 {
3919 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3920 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3921 
3922 	/* RELEASE_MEM - flush caches, send int */
3923 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3924 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3925 				 EOP_TC_ACTION_EN |
3926 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3927 				 EVENT_INDEX(5)));
3928 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3929 	radeon_ring_write(ring, addr & 0xfffffffc);
3930 	radeon_ring_write(ring, upper_32_bits(addr));
3931 	radeon_ring_write(ring, fence->seq);
3932 	radeon_ring_write(ring, 0);
3933 }
3934 
3935 /**
3936  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3937  *
3938  * @rdev: radeon_device pointer
3939  * @ring: radeon ring buffer object
3940  * @semaphore: radeon semaphore object
3941  * @emit_wait: Is this a sempahore wait?
3942  *
3943  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3944  * from running ahead of semaphore waits.
3945  */
3946 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3947 			     struct radeon_ring *ring,
3948 			     struct radeon_semaphore *semaphore,
3949 			     bool emit_wait)
3950 {
3951 	uint64_t addr = semaphore->gpu_addr;
3952 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3953 
3954 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3955 	radeon_ring_write(ring, lower_32_bits(addr));
3956 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3957 
3958 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3959 		/* Prevent the PFP from running ahead of the semaphore wait */
3960 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3961 		radeon_ring_write(ring, 0x0);
3962 	}
3963 
3964 	return true;
3965 }
3966 
3967 /**
3968  * cik_copy_cpdma - copy pages using the CP DMA engine
3969  *
3970  * @rdev: radeon_device pointer
3971  * @src_offset: src GPU address
3972  * @dst_offset: dst GPU address
3973  * @num_gpu_pages: number of GPU pages to xfer
3974  * @fence: radeon fence object
3975  *
3976  * Copy GPU paging using the CP DMA engine (CIK+).
3977  * Used by the radeon ttm implementation to move pages if
3978  * registered as the asic copy callback.
3979  */
3980 int cik_copy_cpdma(struct radeon_device *rdev,
3981 		   uint64_t src_offset, uint64_t dst_offset,
3982 		   unsigned num_gpu_pages,
3983 		   struct radeon_fence **fence)
3984 {
3985 	struct radeon_semaphore *sem = NULL;
3986 	int ring_index = rdev->asic->copy.blit_ring_index;
3987 	struct radeon_ring *ring = &rdev->ring[ring_index];
3988 	u32 size_in_bytes, cur_size_in_bytes, control;
3989 	int i, num_loops;
3990 	int r = 0;
3991 
3992 	r = radeon_semaphore_create(rdev, &sem);
3993 	if (r) {
3994 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3995 		return r;
3996 	}
3997 
3998 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3999 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4000 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4001 	if (r) {
4002 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4003 		radeon_semaphore_free(rdev, &sem, NULL);
4004 		return r;
4005 	}
4006 
4007 	radeon_semaphore_sync_to(sem, *fence);
4008 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
4009 
4010 	for (i = 0; i < num_loops; i++) {
4011 		cur_size_in_bytes = size_in_bytes;
4012 		if (cur_size_in_bytes > 0x1fffff)
4013 			cur_size_in_bytes = 0x1fffff;
4014 		size_in_bytes -= cur_size_in_bytes;
4015 		control = 0;
4016 		if (size_in_bytes == 0)
4017 			control |= PACKET3_DMA_DATA_CP_SYNC;
4018 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4019 		radeon_ring_write(ring, control);
4020 		radeon_ring_write(ring, lower_32_bits(src_offset));
4021 		radeon_ring_write(ring, upper_32_bits(src_offset));
4022 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4023 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4024 		radeon_ring_write(ring, cur_size_in_bytes);
4025 		src_offset += cur_size_in_bytes;
4026 		dst_offset += cur_size_in_bytes;
4027 	}
4028 
4029 	r = radeon_fence_emit(rdev, fence, ring->idx);
4030 	if (r) {
4031 		radeon_ring_unlock_undo(rdev, ring);
4032 		radeon_semaphore_free(rdev, &sem, NULL);
4033 		return r;
4034 	}
4035 
4036 	radeon_ring_unlock_commit(rdev, ring, false);
4037 	radeon_semaphore_free(rdev, &sem, *fence);
4038 
4039 	return r;
4040 }
4041 
4042 /*
4043  * IB stuff
4044  */
4045 /**
4046  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4047  *
4048  * @rdev: radeon_device pointer
4049  * @ib: radeon indirect buffer object
4050  *
4051  * Emits an DE (drawing engine) or CE (constant engine) IB
4052  * on the gfx ring.  IBs are usually generated by userspace
4053  * acceleration drivers and submitted to the kernel for
4054  * sheduling on the ring.  This function schedules the IB
4055  * on the gfx ring for execution by the GPU.
4056  */
4057 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4058 {
4059 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4060 	u32 header, control = INDIRECT_BUFFER_VALID;
4061 
4062 	if (ib->is_const_ib) {
4063 		/* set switch buffer packet before const IB */
4064 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4065 		radeon_ring_write(ring, 0);
4066 
4067 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4068 	} else {
4069 		u32 next_rptr;
4070 		if (ring->rptr_save_reg) {
4071 			next_rptr = ring->wptr + 3 + 4;
4072 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4073 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4074 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4075 			radeon_ring_write(ring, next_rptr);
4076 		} else if (rdev->wb.enabled) {
4077 			next_rptr = ring->wptr + 5 + 4;
4078 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4079 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4080 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4081 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4082 			radeon_ring_write(ring, next_rptr);
4083 		}
4084 
4085 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4086 	}
4087 
4088 	control |= ib->length_dw |
4089 		(ib->vm ? (ib->vm->id << 24) : 0);
4090 
4091 	radeon_ring_write(ring, header);
4092 	radeon_ring_write(ring,
4093 #ifdef __BIG_ENDIAN
4094 			  (2 << 0) |
4095 #endif
4096 			  (ib->gpu_addr & 0xFFFFFFFC));
4097 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4098 	radeon_ring_write(ring, control);
4099 }
4100 
4101 /**
4102  * cik_ib_test - basic gfx ring IB test
4103  *
4104  * @rdev: radeon_device pointer
4105  * @ring: radeon_ring structure holding ring information
4106  *
4107  * Allocate an IB and execute it on the gfx ring (CIK).
4108  * Provides a basic gfx ring test to verify that IBs are working.
4109  * Returns 0 on success, error on failure.
4110  */
4111 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4112 {
4113 	struct radeon_ib ib;
4114 	uint32_t scratch;
4115 	uint32_t tmp = 0;
4116 	unsigned i;
4117 	int r;
4118 
4119 	r = radeon_scratch_get(rdev, &scratch);
4120 	if (r) {
4121 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4122 		return r;
4123 	}
4124 	WREG32(scratch, 0xCAFEDEAD);
4125 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4126 	if (r) {
4127 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4128 		radeon_scratch_free(rdev, scratch);
4129 		return r;
4130 	}
4131 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4132 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4133 	ib.ptr[2] = 0xDEADBEEF;
4134 	ib.length_dw = 3;
4135 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4136 	if (r) {
4137 		radeon_scratch_free(rdev, scratch);
4138 		radeon_ib_free(rdev, &ib);
4139 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4140 		return r;
4141 	}
4142 	r = radeon_fence_wait(ib.fence, false);
4143 	if (r) {
4144 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4145 		radeon_scratch_free(rdev, scratch);
4146 		radeon_ib_free(rdev, &ib);
4147 		return r;
4148 	}
4149 	for (i = 0; i < rdev->usec_timeout; i++) {
4150 		tmp = RREG32(scratch);
4151 		if (tmp == 0xDEADBEEF)
4152 			break;
4153 		DRM_UDELAY(1);
4154 	}
4155 	if (i < rdev->usec_timeout) {
4156 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4157 	} else {
4158 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4159 			  scratch, tmp);
4160 		r = -EINVAL;
4161 	}
4162 	radeon_scratch_free(rdev, scratch);
4163 	radeon_ib_free(rdev, &ib);
4164 	return r;
4165 }
4166 
4167 /*
4168  * CP.
4169  * On CIK, gfx and compute now have independant command processors.
4170  *
4171  * GFX
4172  * Gfx consists of a single ring and can process both gfx jobs and
4173  * compute jobs.  The gfx CP consists of three microengines (ME):
4174  * PFP - Pre-Fetch Parser
4175  * ME - Micro Engine
4176  * CE - Constant Engine
4177  * The PFP and ME make up what is considered the Drawing Engine (DE).
4178  * The CE is an asynchronous engine used for updating buffer desciptors
4179  * used by the DE so that they can be loaded into cache in parallel
4180  * while the DE is processing state update packets.
4181  *
4182  * Compute
4183  * The compute CP consists of two microengines (ME):
4184  * MEC1 - Compute MicroEngine 1
4185  * MEC2 - Compute MicroEngine 2
4186  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4187  * The queues are exposed to userspace and are programmed directly
4188  * by the compute runtime.
4189  */
4190 /**
4191  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4192  *
4193  * @rdev: radeon_device pointer
4194  * @enable: enable or disable the MEs
4195  *
4196  * Halts or unhalts the gfx MEs.
4197  */
4198 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4199 {
4200 	if (enable)
4201 		WREG32(CP_ME_CNTL, 0);
4202 	else {
4203 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4204 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4205 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4206 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4207 	}
4208 	udelay(50);
4209 }
4210 
4211 /**
4212  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4213  *
4214  * @rdev: radeon_device pointer
4215  *
4216  * Loads the gfx PFP, ME, and CE ucode.
4217  * Returns 0 for success, -EINVAL if the ucode is not available.
4218  */
4219 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4220 {
4221 	int i;
4222 
4223 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4224 		return -EINVAL;
4225 
4226 	cik_cp_gfx_enable(rdev, false);
4227 
4228 	if (rdev->new_fw) {
4229 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4230 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4231 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4232 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4233 		const struct gfx_firmware_header_v1_0 *me_hdr =
4234 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4235 		const __le32 *fw_data;
4236 		u32 fw_size;
4237 
4238 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4239 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4240 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4241 
4242 		/* PFP */
4243 		fw_data = (const __le32 *)
4244 			((const char *)rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4245 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4246 		WREG32(CP_PFP_UCODE_ADDR, 0);
4247 		for (i = 0; i < fw_size; i++)
4248 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4249 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4250 
4251 		/* CE */
4252 		fw_data = (const __le32 *)
4253 			((const char *)rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4254 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4255 		WREG32(CP_CE_UCODE_ADDR, 0);
4256 		for (i = 0; i < fw_size; i++)
4257 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4258 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4259 
4260 		/* ME */
4261 		fw_data = (const __be32 *)
4262 			((const char *)rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4263 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4264 		WREG32(CP_ME_RAM_WADDR, 0);
4265 		for (i = 0; i < fw_size; i++)
4266 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4267 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4268 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4269 	} else {
4270 		const __be32 *fw_data;
4271 
4272 		/* PFP */
4273 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4274 		WREG32(CP_PFP_UCODE_ADDR, 0);
4275 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4276 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4277 		WREG32(CP_PFP_UCODE_ADDR, 0);
4278 
4279 		/* CE */
4280 		fw_data = (const __be32 *)rdev->ce_fw->data;
4281 		WREG32(CP_CE_UCODE_ADDR, 0);
4282 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4283 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4284 		WREG32(CP_CE_UCODE_ADDR, 0);
4285 
4286 		/* ME */
4287 		fw_data = (const __be32 *)rdev->me_fw->data;
4288 		WREG32(CP_ME_RAM_WADDR, 0);
4289 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4290 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4291 		WREG32(CP_ME_RAM_WADDR, 0);
4292 	}
4293 
4294 	return 0;
4295 }
4296 
4297 /**
4298  * cik_cp_gfx_start - start the gfx ring
4299  *
4300  * @rdev: radeon_device pointer
4301  *
4302  * Enables the ring and loads the clear state context and other
4303  * packets required to init the ring.
4304  * Returns 0 for success, error for failure.
4305  */
4306 static int cik_cp_gfx_start(struct radeon_device *rdev)
4307 {
4308 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4309 	int r, i;
4310 
4311 	/* init the CP */
4312 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4313 	WREG32(CP_ENDIAN_SWAP, 0);
4314 	WREG32(CP_DEVICE_ID, 1);
4315 
4316 	cik_cp_gfx_enable(rdev, true);
4317 
4318 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4319 	if (r) {
4320 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4321 		return r;
4322 	}
4323 
4324 	/* init the CE partitions.  CE only used for gfx on CIK */
4325 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4326 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4327 	radeon_ring_write(ring, 0x8000);
4328 	radeon_ring_write(ring, 0x8000);
4329 
4330 	/* setup clear context state */
4331 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4332 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4333 
4334 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4335 	radeon_ring_write(ring, 0x80000000);
4336 	radeon_ring_write(ring, 0x80000000);
4337 
4338 	for (i = 0; i < cik_default_size; i++)
4339 		radeon_ring_write(ring, cik_default_state[i]);
4340 
4341 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4342 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4343 
4344 	/* set clear context state */
4345 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4346 	radeon_ring_write(ring, 0);
4347 
4348 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4349 	radeon_ring_write(ring, 0x00000316);
4350 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4351 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4352 
4353 	radeon_ring_unlock_commit(rdev, ring, false);
4354 
4355 	return 0;
4356 }
4357 
4358 /**
4359  * cik_cp_gfx_fini - stop the gfx ring
4360  *
4361  * @rdev: radeon_device pointer
4362  *
4363  * Stop the gfx ring and tear down the driver ring
4364  * info.
4365  */
4366 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4367 {
4368 	cik_cp_gfx_enable(rdev, false);
4369 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4370 }
4371 
4372 /**
4373  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4374  *
4375  * @rdev: radeon_device pointer
4376  *
4377  * Program the location and size of the gfx ring buffer
4378  * and test it to make sure it's working.
4379  * Returns 0 for success, error for failure.
4380  */
4381 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4382 {
4383 	struct radeon_ring *ring;
4384 	u32 tmp;
4385 	u32 rb_bufsz;
4386 	u64 rb_addr;
4387 	int r;
4388 
4389 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4390 	if (rdev->family != CHIP_HAWAII)
4391 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4392 
4393 	/* Set the write pointer delay */
4394 	WREG32(CP_RB_WPTR_DELAY, 0);
4395 
4396 	/* set the RB to use vmid 0 */
4397 	WREG32(CP_RB_VMID, 0);
4398 
4399 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4400 
4401 	/* ring 0 - compute and gfx */
4402 	/* Set ring buffer size */
4403 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4404 	rb_bufsz = order_base_2(ring->ring_size / 8);
4405 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4406 #ifdef __BIG_ENDIAN
4407 	tmp |= BUF_SWAP_32BIT;
4408 #endif
4409 	WREG32(CP_RB0_CNTL, tmp);
4410 
4411 	/* Initialize the ring buffer's read and write pointers */
4412 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4413 	ring->wptr = 0;
4414 	WREG32(CP_RB0_WPTR, ring->wptr);
4415 
4416 	/* set the wb address wether it's enabled or not */
4417 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4418 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4419 
4420 	/* scratch register shadowing is no longer supported */
4421 	WREG32(SCRATCH_UMSK, 0);
4422 
4423 	if (!rdev->wb.enabled)
4424 		tmp |= RB_NO_UPDATE;
4425 
4426 	mdelay(1);
4427 	WREG32(CP_RB0_CNTL, tmp);
4428 
4429 	rb_addr = ring->gpu_addr >> 8;
4430 	WREG32(CP_RB0_BASE, rb_addr);
4431 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4432 
4433 	/* start the ring */
4434 	cik_cp_gfx_start(rdev);
4435 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4436 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4437 	if (r) {
4438 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4439 		return r;
4440 	}
4441 
4442 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4443 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4444 
4445 	return 0;
4446 }
4447 
4448 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4449 		     struct radeon_ring *ring)
4450 {
4451 	u32 rptr;
4452 
4453 	if (rdev->wb.enabled)
4454 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4455 	else
4456 		rptr = RREG32(CP_RB0_RPTR);
4457 
4458 	return rptr;
4459 }
4460 
4461 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4462 		     struct radeon_ring *ring)
4463 {
4464 	u32 wptr;
4465 
4466 	wptr = RREG32(CP_RB0_WPTR);
4467 
4468 	return wptr;
4469 }
4470 
4471 void cik_gfx_set_wptr(struct radeon_device *rdev,
4472 		      struct radeon_ring *ring)
4473 {
4474 	WREG32(CP_RB0_WPTR, ring->wptr);
4475 	(void)RREG32(CP_RB0_WPTR);
4476 }
4477 
4478 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4479 			 struct radeon_ring *ring)
4480 {
4481 	u32 rptr;
4482 
4483 	if (rdev->wb.enabled) {
4484 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4485 	} else {
4486 		spin_lock(&rdev->srbm_mutex);
4487 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4488 		rptr = RREG32(CP_HQD_PQ_RPTR);
4489 		cik_srbm_select(rdev, 0, 0, 0, 0);
4490 		spin_unlock(&rdev->srbm_mutex);
4491 	}
4492 
4493 	return rptr;
4494 }
4495 
4496 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4497 			 struct radeon_ring *ring)
4498 {
4499 	u32 wptr;
4500 
4501 	if (rdev->wb.enabled) {
4502 		/* XXX check if swapping is necessary on BE */
4503 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4504 	} else {
4505 		spin_lock(&rdev->srbm_mutex);
4506 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4507 		wptr = RREG32(CP_HQD_PQ_WPTR);
4508 		cik_srbm_select(rdev, 0, 0, 0, 0);
4509 		spin_unlock(&rdev->srbm_mutex);
4510 	}
4511 
4512 	return wptr;
4513 }
4514 
4515 void cik_compute_set_wptr(struct radeon_device *rdev,
4516 			  struct radeon_ring *ring)
4517 {
4518 	/* XXX check if swapping is necessary on BE */
4519 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4520 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4521 }
4522 
4523 /**
4524  * cik_cp_compute_enable - enable/disable the compute CP MEs
4525  *
4526  * @rdev: radeon_device pointer
4527  * @enable: enable or disable the MEs
4528  *
4529  * Halts or unhalts the compute MEs.
4530  */
4531 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4532 {
4533 	if (enable)
4534 		WREG32(CP_MEC_CNTL, 0);
4535 	else {
4536 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4537 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4538 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4539 	}
4540 	udelay(50);
4541 }
4542 
4543 /**
4544  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4545  *
4546  * @rdev: radeon_device pointer
4547  *
4548  * Loads the compute MEC1&2 ucode.
4549  * Returns 0 for success, -EINVAL if the ucode is not available.
4550  */
4551 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4552 {
4553 	int i;
4554 
4555 	if (!rdev->mec_fw)
4556 		return -EINVAL;
4557 
4558 	cik_cp_compute_enable(rdev, false);
4559 
4560 	if (rdev->new_fw) {
4561 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4562 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4563 		const __le32 *fw_data;
4564 		u32 fw_size;
4565 
4566 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4567 
4568 		/* MEC1 */
4569 		fw_data = (const __le32 *)
4570 			((const char *)rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4571 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4572 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4573 		for (i = 0; i < fw_size; i++)
4574 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4575 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4576 
4577 		/* MEC2 */
4578 		if (rdev->family == CHIP_KAVERI) {
4579 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4580 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4581 
4582 			fw_data = (const __le32 *)
4583 				((const char *)rdev->mec2_fw->data +
4584 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4585 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4586 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4587 			for (i = 0; i < fw_size; i++)
4588 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4589 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4590 		}
4591 	} else {
4592 		const __be32 *fw_data;
4593 
4594 		/* MEC1 */
4595 		fw_data = (const __be32 *)rdev->mec_fw->data;
4596 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4597 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4598 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4599 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4600 
4601 		if (rdev->family == CHIP_KAVERI) {
4602 			/* MEC2 */
4603 			fw_data = (const __be32 *)rdev->mec_fw->data;
4604 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4605 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4606 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4607 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4608 		}
4609 	}
4610 
4611 	return 0;
4612 }
4613 
4614 /**
4615  * cik_cp_compute_start - start the compute queues
4616  *
4617  * @rdev: radeon_device pointer
4618  *
4619  * Enable the compute queues.
4620  * Returns 0 for success, error for failure.
4621  */
4622 static int cik_cp_compute_start(struct radeon_device *rdev)
4623 {
4624 	cik_cp_compute_enable(rdev, true);
4625 
4626 	return 0;
4627 }
4628 
4629 /**
4630  * cik_cp_compute_fini - stop the compute queues
4631  *
4632  * @rdev: radeon_device pointer
4633  *
4634  * Stop the compute queues and tear down the driver queue
4635  * info.
4636  */
4637 static void cik_cp_compute_fini(struct radeon_device *rdev)
4638 {
4639 	int i, idx, r;
4640 
4641 	cik_cp_compute_enable(rdev, false);
4642 
4643 	for (i = 0; i < 2; i++) {
4644 		if (i == 0)
4645 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4646 		else
4647 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4648 
4649 		if (rdev->ring[idx].mqd_obj) {
4650 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4651 			if (unlikely(r != 0))
4652 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4653 
4654 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4655 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4656 
4657 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4658 			rdev->ring[idx].mqd_obj = NULL;
4659 		}
4660 	}
4661 }
4662 
4663 static void cik_mec_fini(struct radeon_device *rdev)
4664 {
4665 	int r;
4666 
4667 	if (rdev->mec.hpd_eop_obj) {
4668 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4669 		if (unlikely(r != 0))
4670 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4671 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4672 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4673 
4674 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4675 		rdev->mec.hpd_eop_obj = NULL;
4676 	}
4677 }
4678 
4679 #define MEC_HPD_SIZE 2048
4680 
4681 static int cik_mec_init(struct radeon_device *rdev)
4682 {
4683 	int r;
4684 	u32 *hpd;
4685 
4686 	/*
4687 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4688 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4689 	 */
4690 	if (rdev->family == CHIP_KAVERI)
4691 		rdev->mec.num_mec = 2;
4692 	else
4693 		rdev->mec.num_mec = 1;
4694 	rdev->mec.num_pipe = 4;
4695 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4696 
4697 	if (rdev->mec.hpd_eop_obj == NULL) {
4698 		r = radeon_bo_create(rdev,
4699 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4700 				     PAGE_SIZE, true,
4701 				     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4702 				     &rdev->mec.hpd_eop_obj);
4703 		if (r) {
4704 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4705 			return r;
4706 		}
4707 	}
4708 
4709 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4710 	if (unlikely(r != 0)) {
4711 		cik_mec_fini(rdev);
4712 		return r;
4713 	}
4714 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4715 			  &rdev->mec.hpd_eop_gpu_addr);
4716 	if (r) {
4717 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4718 		cik_mec_fini(rdev);
4719 		return r;
4720 	}
4721 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4722 	if (r) {
4723 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4724 		cik_mec_fini(rdev);
4725 		return r;
4726 	}
4727 
4728 	/* clear memory.  Not sure if this is required or not */
4729 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4730 
4731 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4732 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4733 
4734 	return 0;
4735 }
4736 
4737 struct hqd_registers
4738 {
4739 	u32 cp_mqd_base_addr;
4740 	u32 cp_mqd_base_addr_hi;
4741 	u32 cp_hqd_active;
4742 	u32 cp_hqd_vmid;
4743 	u32 cp_hqd_persistent_state;
4744 	u32 cp_hqd_pipe_priority;
4745 	u32 cp_hqd_queue_priority;
4746 	u32 cp_hqd_quantum;
4747 	u32 cp_hqd_pq_base;
4748 	u32 cp_hqd_pq_base_hi;
4749 	u32 cp_hqd_pq_rptr;
4750 	u32 cp_hqd_pq_rptr_report_addr;
4751 	u32 cp_hqd_pq_rptr_report_addr_hi;
4752 	u32 cp_hqd_pq_wptr_poll_addr;
4753 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4754 	u32 cp_hqd_pq_doorbell_control;
4755 	u32 cp_hqd_pq_wptr;
4756 	u32 cp_hqd_pq_control;
4757 	u32 cp_hqd_ib_base_addr;
4758 	u32 cp_hqd_ib_base_addr_hi;
4759 	u32 cp_hqd_ib_rptr;
4760 	u32 cp_hqd_ib_control;
4761 	u32 cp_hqd_iq_timer;
4762 	u32 cp_hqd_iq_rptr;
4763 	u32 cp_hqd_dequeue_request;
4764 	u32 cp_hqd_dma_offload;
4765 	u32 cp_hqd_sema_cmd;
4766 	u32 cp_hqd_msg_type;
4767 	u32 cp_hqd_atomic0_preop_lo;
4768 	u32 cp_hqd_atomic0_preop_hi;
4769 	u32 cp_hqd_atomic1_preop_lo;
4770 	u32 cp_hqd_atomic1_preop_hi;
4771 	u32 cp_hqd_hq_scheduler0;
4772 	u32 cp_hqd_hq_scheduler1;
4773 	u32 cp_mqd_control;
4774 };
4775 
4776 struct bonaire_mqd
4777 {
4778 	u32 header;
4779 	u32 dispatch_initiator;
4780 	u32 dimensions[3];
4781 	u32 start_idx[3];
4782 	u32 num_threads[3];
4783 	u32 pipeline_stat_enable;
4784 	u32 perf_counter_enable;
4785 	u32 pgm[2];
4786 	u32 tba[2];
4787 	u32 tma[2];
4788 	u32 pgm_rsrc[2];
4789 	u32 vmid;
4790 	u32 resource_limits;
4791 	u32 static_thread_mgmt01[2];
4792 	u32 tmp_ring_size;
4793 	u32 static_thread_mgmt23[2];
4794 	u32 restart[3];
4795 	u32 thread_trace_enable;
4796 	u32 reserved1;
4797 	u32 user_data[16];
4798 	u32 vgtcs_invoke_count[2];
4799 	struct hqd_registers queue_state;
4800 	u32 dequeue_cntr;
4801 	u32 interrupt_queue[64];
4802 };
4803 
4804 /**
4805  * cik_cp_compute_resume - setup the compute queue registers
4806  *
4807  * @rdev: radeon_device pointer
4808  *
4809  * Program the compute queues and test them to make sure they
4810  * are working.
4811  * Returns 0 for success, error for failure.
4812  */
4813 static int cik_cp_compute_resume(struct radeon_device *rdev)
4814 {
4815 	int r, i, j, idx;
4816 	u32 tmp;
4817 	bool use_doorbell = true;
4818 	u64 hqd_gpu_addr;
4819 	u64 mqd_gpu_addr;
4820 	u64 eop_gpu_addr;
4821 	u64 wb_gpu_addr;
4822 	u32 *buf;
4823 	struct bonaire_mqd *mqd;
4824 
4825 	r = cik_cp_compute_start(rdev);
4826 	if (r)
4827 		return r;
4828 
4829 	/* fix up chicken bits */
4830 	tmp = RREG32(CP_CPF_DEBUG);
4831 	tmp |= (1 << 23);
4832 	WREG32(CP_CPF_DEBUG, tmp);
4833 
4834 	/* init the pipes */
4835 	spin_lock(&rdev->srbm_mutex);
4836 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4837 		int me = (i < 4) ? 1 : 2;
4838 		int pipe = (i < 4) ? i : (i - 4);
4839 
4840 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4841 
4842 		cik_srbm_select(rdev, me, pipe, 0, 0);
4843 
4844 		/* write the EOP addr */
4845 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4846 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4847 
4848 		/* set the VMID assigned */
4849 		WREG32(CP_HPD_EOP_VMID, 0);
4850 
4851 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4852 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4853 		tmp &= ~EOP_SIZE_MASK;
4854 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4855 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4856 	}
4857 	cik_srbm_select(rdev, 0, 0, 0, 0);
4858 	spin_unlock(&rdev->srbm_mutex);
4859 
4860 	/* init the queues.  Just two for now. */
4861 	for (i = 0; i < 2; i++) {
4862 		if (i == 0)
4863 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4864 		else
4865 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4866 
4867 		if (rdev->ring[idx].mqd_obj == NULL) {
4868 			r = radeon_bo_create(rdev,
4869 					     sizeof(struct bonaire_mqd),
4870 					     PAGE_SIZE, true,
4871 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4872 					     &rdev->ring[idx].mqd_obj);
4873 			if (r) {
4874 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4875 				return r;
4876 			}
4877 		}
4878 
4879 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4880 		if (unlikely(r != 0)) {
4881 			cik_cp_compute_fini(rdev);
4882 			return r;
4883 		}
4884 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4885 				  &mqd_gpu_addr);
4886 		if (r) {
4887 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4888 			cik_cp_compute_fini(rdev);
4889 			return r;
4890 		}
4891 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4892 		if (r) {
4893 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4894 			cik_cp_compute_fini(rdev);
4895 			return r;
4896 		}
4897 
4898 		/* init the mqd struct */
4899 		memset(buf, 0, sizeof(struct bonaire_mqd));
4900 
4901 		mqd = (struct bonaire_mqd *)buf;
4902 		mqd->header = 0xC0310800;
4903 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4904 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4905 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4906 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4907 
4908 		spin_lock(&rdev->srbm_mutex);
4909 		cik_srbm_select(rdev, rdev->ring[idx].me,
4910 				rdev->ring[idx].pipe,
4911 				rdev->ring[idx].queue, 0);
4912 
4913 		/* disable wptr polling */
4914 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4915 		tmp &= ~WPTR_POLL_EN;
4916 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4917 
4918 		/* enable doorbell? */
4919 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4920 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4921 		if (use_doorbell)
4922 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4923 		else
4924 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4925 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4926 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4927 
4928 		/* disable the queue if it's active */
4929 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4930 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4931 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4932 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4933 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4934 			for (j = 0; j < rdev->usec_timeout; j++) {
4935 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4936 					break;
4937 				udelay(1);
4938 			}
4939 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4940 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4941 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4942 		}
4943 
4944 		/* set the pointer to the MQD */
4945 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4946 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4947 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4948 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4949 		/* set MQD vmid to 0 */
4950 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4951 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4952 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4953 
4954 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4955 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4956 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4957 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4958 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4959 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4960 
4961 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4962 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4963 		mqd->queue_state.cp_hqd_pq_control &=
4964 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4965 
4966 		mqd->queue_state.cp_hqd_pq_control |=
4967 			order_base_2(rdev->ring[idx].ring_size / 8);
4968 		mqd->queue_state.cp_hqd_pq_control |=
4969 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4970 #ifdef __BIG_ENDIAN
4971 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4972 #endif
4973 		mqd->queue_state.cp_hqd_pq_control &=
4974 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4975 		mqd->queue_state.cp_hqd_pq_control |=
4976 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4977 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4978 
4979 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4980 		if (i == 0)
4981 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4982 		else
4983 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4984 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4985 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4986 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4987 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4988 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4989 
4990 		/* set the wb address wether it's enabled or not */
4991 		if (i == 0)
4992 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4993 		else
4994 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4995 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4996 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4997 			upper_32_bits(wb_gpu_addr) & 0xffff;
4998 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4999 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5000 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5001 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5002 
5003 		/* enable the doorbell if requested */
5004 		if (use_doorbell) {
5005 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5006 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5007 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5008 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5009 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5010 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5011 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5012 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5013 
5014 		} else {
5015 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5016 		}
5017 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5018 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5019 
5020 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5021 		rdev->ring[idx].wptr = 0;
5022 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5023 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5024 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5025 
5026 		/* set the vmid for the queue */
5027 		mqd->queue_state.cp_hqd_vmid = 0;
5028 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5029 
5030 		/* activate the queue */
5031 		mqd->queue_state.cp_hqd_active = 1;
5032 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5033 
5034 		cik_srbm_select(rdev, 0, 0, 0, 0);
5035 		spin_unlock(&rdev->srbm_mutex);
5036 
5037 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5038 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5039 
5040 		rdev->ring[idx].ready = true;
5041 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5042 		if (r)
5043 			rdev->ring[idx].ready = false;
5044 	}
5045 
5046 	return 0;
5047 }
5048 
5049 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5050 {
5051 	cik_cp_gfx_enable(rdev, enable);
5052 	cik_cp_compute_enable(rdev, enable);
5053 }
5054 
5055 static int cik_cp_load_microcode(struct radeon_device *rdev)
5056 {
5057 	int r;
5058 
5059 	r = cik_cp_gfx_load_microcode(rdev);
5060 	if (r)
5061 		return r;
5062 	r = cik_cp_compute_load_microcode(rdev);
5063 	if (r)
5064 		return r;
5065 
5066 	return 0;
5067 }
5068 
5069 static void cik_cp_fini(struct radeon_device *rdev)
5070 {
5071 	cik_cp_gfx_fini(rdev);
5072 	cik_cp_compute_fini(rdev);
5073 }
5074 
5075 static int cik_cp_resume(struct radeon_device *rdev)
5076 {
5077 	int r;
5078 
5079 	cik_enable_gui_idle_interrupt(rdev, false);
5080 
5081 	r = cik_cp_load_microcode(rdev);
5082 	if (r)
5083 		return r;
5084 
5085 	r = cik_cp_gfx_resume(rdev);
5086 	if (r)
5087 		return r;
5088 	r = cik_cp_compute_resume(rdev);
5089 	if (r)
5090 		return r;
5091 
5092 	cik_enable_gui_idle_interrupt(rdev, true);
5093 
5094 	return 0;
5095 }
5096 
5097 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5098 {
5099 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5100 		RREG32(GRBM_STATUS));
5101 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5102 		RREG32(GRBM_STATUS2));
5103 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5104 		RREG32(GRBM_STATUS_SE0));
5105 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5106 		RREG32(GRBM_STATUS_SE1));
5107 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5108 		RREG32(GRBM_STATUS_SE2));
5109 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5110 		RREG32(GRBM_STATUS_SE3));
5111 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5112 		RREG32(SRBM_STATUS));
5113 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5114 		RREG32(SRBM_STATUS2));
5115 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5116 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5117 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5118 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5119 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5120 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5121 		 RREG32(CP_STALLED_STAT1));
5122 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5123 		 RREG32(CP_STALLED_STAT2));
5124 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5125 		 RREG32(CP_STALLED_STAT3));
5126 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5127 		 RREG32(CP_CPF_BUSY_STAT));
5128 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5129 		 RREG32(CP_CPF_STALLED_STAT1));
5130 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5131 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5132 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5133 		 RREG32(CP_CPC_STALLED_STAT1));
5134 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5135 }
5136 
5137 /**
5138  * cik_gpu_check_soft_reset - check which blocks are busy
5139  *
5140  * @rdev: radeon_device pointer
5141  *
5142  * Check which blocks are busy and return the relevant reset
5143  * mask to be used by cik_gpu_soft_reset().
5144  * Returns a mask of the blocks to be reset.
5145  */
5146 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5147 {
5148 	u32 reset_mask = 0;
5149 	u32 tmp;
5150 
5151 	/* GRBM_STATUS */
5152 	tmp = RREG32(GRBM_STATUS);
5153 	if (tmp & (PA_BUSY | SC_BUSY |
5154 		   BCI_BUSY | SX_BUSY |
5155 		   TA_BUSY | VGT_BUSY |
5156 		   DB_BUSY | CB_BUSY |
5157 		   GDS_BUSY | SPI_BUSY |
5158 		   IA_BUSY | IA_BUSY_NO_DMA))
5159 		reset_mask |= RADEON_RESET_GFX;
5160 
5161 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5162 		reset_mask |= RADEON_RESET_CP;
5163 
5164 	/* GRBM_STATUS2 */
5165 	tmp = RREG32(GRBM_STATUS2);
5166 	if (tmp & RLC_BUSY)
5167 		reset_mask |= RADEON_RESET_RLC;
5168 
5169 	/* SDMA0_STATUS_REG */
5170 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5171 	if (!(tmp & SDMA_IDLE))
5172 		reset_mask |= RADEON_RESET_DMA;
5173 
5174 	/* SDMA1_STATUS_REG */
5175 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5176 	if (!(tmp & SDMA_IDLE))
5177 		reset_mask |= RADEON_RESET_DMA1;
5178 
5179 	/* SRBM_STATUS2 */
5180 	tmp = RREG32(SRBM_STATUS2);
5181 	if (tmp & SDMA_BUSY)
5182 		reset_mask |= RADEON_RESET_DMA;
5183 
5184 	if (tmp & SDMA1_BUSY)
5185 		reset_mask |= RADEON_RESET_DMA1;
5186 
5187 	/* SRBM_STATUS */
5188 	tmp = RREG32(SRBM_STATUS);
5189 
5190 	if (tmp & IH_BUSY)
5191 		reset_mask |= RADEON_RESET_IH;
5192 
5193 	if (tmp & SEM_BUSY)
5194 		reset_mask |= RADEON_RESET_SEM;
5195 
5196 	if (tmp & GRBM_RQ_PENDING)
5197 		reset_mask |= RADEON_RESET_GRBM;
5198 
5199 	if (tmp & VMC_BUSY)
5200 		reset_mask |= RADEON_RESET_VMC;
5201 
5202 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5203 		   MCC_BUSY | MCD_BUSY))
5204 		reset_mask |= RADEON_RESET_MC;
5205 
5206 	if (evergreen_is_display_hung(rdev))
5207 		reset_mask |= RADEON_RESET_DISPLAY;
5208 
5209 	/* Skip MC reset as it's mostly likely not hung, just busy */
5210 	if (reset_mask & RADEON_RESET_MC) {
5211 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5212 		reset_mask &= ~RADEON_RESET_MC;
5213 	}
5214 
5215 	return reset_mask;
5216 }
5217 
5218 /**
5219  * cik_gpu_soft_reset - soft reset GPU
5220  *
5221  * @rdev: radeon_device pointer
5222  * @reset_mask: mask of which blocks to reset
5223  *
5224  * Soft reset the blocks specified in @reset_mask.
5225  */
5226 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5227 {
5228 	struct evergreen_mc_save save;
5229 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5230 	u32 tmp;
5231 
5232 	if (reset_mask == 0)
5233 		return;
5234 
5235 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5236 
5237 	cik_print_gpu_status_regs(rdev);
5238 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5239 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5240 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5241 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5242 
5243 	/* disable CG/PG */
5244 	cik_fini_pg(rdev);
5245 	cik_fini_cg(rdev);
5246 
5247 	/* stop the rlc */
5248 	cik_rlc_stop(rdev);
5249 
5250 	/* Disable GFX parsing/prefetching */
5251 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5252 
5253 	/* Disable MEC parsing/prefetching */
5254 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5255 
5256 	if (reset_mask & RADEON_RESET_DMA) {
5257 		/* sdma0 */
5258 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5259 		tmp |= SDMA_HALT;
5260 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5261 	}
5262 	if (reset_mask & RADEON_RESET_DMA1) {
5263 		/* sdma1 */
5264 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5265 		tmp |= SDMA_HALT;
5266 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5267 	}
5268 
5269 	evergreen_mc_stop(rdev, &save);
5270 	if (evergreen_mc_wait_for_idle(rdev)) {
5271 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5272 	}
5273 
5274 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5275 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5276 
5277 	if (reset_mask & RADEON_RESET_CP) {
5278 		grbm_soft_reset |= SOFT_RESET_CP;
5279 
5280 		srbm_soft_reset |= SOFT_RESET_GRBM;
5281 	}
5282 
5283 	if (reset_mask & RADEON_RESET_DMA)
5284 		srbm_soft_reset |= SOFT_RESET_SDMA;
5285 
5286 	if (reset_mask & RADEON_RESET_DMA1)
5287 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5288 
5289 	if (reset_mask & RADEON_RESET_DISPLAY)
5290 		srbm_soft_reset |= SOFT_RESET_DC;
5291 
5292 	if (reset_mask & RADEON_RESET_RLC)
5293 		grbm_soft_reset |= SOFT_RESET_RLC;
5294 
5295 	if (reset_mask & RADEON_RESET_SEM)
5296 		srbm_soft_reset |= SOFT_RESET_SEM;
5297 
5298 	if (reset_mask & RADEON_RESET_IH)
5299 		srbm_soft_reset |= SOFT_RESET_IH;
5300 
5301 	if (reset_mask & RADEON_RESET_GRBM)
5302 		srbm_soft_reset |= SOFT_RESET_GRBM;
5303 
5304 	if (reset_mask & RADEON_RESET_VMC)
5305 		srbm_soft_reset |= SOFT_RESET_VMC;
5306 
5307 	if (!(rdev->flags & RADEON_IS_IGP)) {
5308 		if (reset_mask & RADEON_RESET_MC)
5309 			srbm_soft_reset |= SOFT_RESET_MC;
5310 	}
5311 
5312 	if (grbm_soft_reset) {
5313 		tmp = RREG32(GRBM_SOFT_RESET);
5314 		tmp |= grbm_soft_reset;
5315 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5316 		WREG32(GRBM_SOFT_RESET, tmp);
5317 		tmp = RREG32(GRBM_SOFT_RESET);
5318 
5319 		udelay(50);
5320 
5321 		tmp &= ~grbm_soft_reset;
5322 		WREG32(GRBM_SOFT_RESET, tmp);
5323 		tmp = RREG32(GRBM_SOFT_RESET);
5324 	}
5325 
5326 	if (srbm_soft_reset) {
5327 		tmp = RREG32(SRBM_SOFT_RESET);
5328 		tmp |= srbm_soft_reset;
5329 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5330 		WREG32(SRBM_SOFT_RESET, tmp);
5331 		tmp = RREG32(SRBM_SOFT_RESET);
5332 
5333 		udelay(50);
5334 
5335 		tmp &= ~srbm_soft_reset;
5336 		WREG32(SRBM_SOFT_RESET, tmp);
5337 		tmp = RREG32(SRBM_SOFT_RESET);
5338 	}
5339 
5340 	/* Wait a little for things to settle down */
5341 	udelay(50);
5342 
5343 	evergreen_mc_resume(rdev, &save);
5344 	udelay(50);
5345 
5346 	cik_print_gpu_status_regs(rdev);
5347 }
5348 
5349 struct kv_reset_save_regs {
5350 	u32 gmcon_reng_execute;
5351 	u32 gmcon_misc;
5352 	u32 gmcon_misc3;
5353 };
5354 
5355 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5356 				   struct kv_reset_save_regs *save)
5357 {
5358 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5359 	save->gmcon_misc = RREG32(GMCON_MISC);
5360 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5361 
5362 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5363 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5364 						STCTRL_STUTTER_EN));
5365 }
5366 
5367 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5368 				      struct kv_reset_save_regs *save)
5369 {
5370 	int i;
5371 
5372 	WREG32(GMCON_PGFSM_WRITE, 0);
5373 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5374 
5375 	for (i = 0; i < 5; i++)
5376 		WREG32(GMCON_PGFSM_WRITE, 0);
5377 
5378 	WREG32(GMCON_PGFSM_WRITE, 0);
5379 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5380 
5381 	for (i = 0; i < 5; i++)
5382 		WREG32(GMCON_PGFSM_WRITE, 0);
5383 
5384 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5385 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5386 
5387 	for (i = 0; i < 5; i++)
5388 		WREG32(GMCON_PGFSM_WRITE, 0);
5389 
5390 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5391 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5392 
5393 	for (i = 0; i < 5; i++)
5394 		WREG32(GMCON_PGFSM_WRITE, 0);
5395 
5396 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5397 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5398 
5399 	for (i = 0; i < 5; i++)
5400 		WREG32(GMCON_PGFSM_WRITE, 0);
5401 
5402 	WREG32(GMCON_PGFSM_WRITE, 0);
5403 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5404 
5405 	for (i = 0; i < 5; i++)
5406 		WREG32(GMCON_PGFSM_WRITE, 0);
5407 
5408 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5409 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5410 
5411 	for (i = 0; i < 5; i++)
5412 		WREG32(GMCON_PGFSM_WRITE, 0);
5413 
5414 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5415 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5416 
5417 	for (i = 0; i < 5; i++)
5418 		WREG32(GMCON_PGFSM_WRITE, 0);
5419 
5420 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5421 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5422 
5423 	for (i = 0; i < 5; i++)
5424 		WREG32(GMCON_PGFSM_WRITE, 0);
5425 
5426 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5427 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5428 
5429 	for (i = 0; i < 5; i++)
5430 		WREG32(GMCON_PGFSM_WRITE, 0);
5431 
5432 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5433 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5434 
5435 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5436 	WREG32(GMCON_MISC, save->gmcon_misc);
5437 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5438 }
5439 
5440 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5441 {
5442 	struct evergreen_mc_save save;
5443 	struct kv_reset_save_regs kv_save = { 0 };
5444 	u32 tmp, i;
5445 
5446 	dev_info(rdev->dev, "GPU pci config reset\n");
5447 
5448 	/* disable dpm? */
5449 
5450 	/* disable cg/pg */
5451 	cik_fini_pg(rdev);
5452 	cik_fini_cg(rdev);
5453 
5454 	/* Disable GFX parsing/prefetching */
5455 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5456 
5457 	/* Disable MEC parsing/prefetching */
5458 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5459 
5460 	/* sdma0 */
5461 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5462 	tmp |= SDMA_HALT;
5463 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5464 	/* sdma1 */
5465 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5466 	tmp |= SDMA_HALT;
5467 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5468 	/* XXX other engines? */
5469 
5470 	/* halt the rlc, disable cp internal ints */
5471 	cik_rlc_stop(rdev);
5472 
5473 	udelay(50);
5474 
5475 	/* disable mem access */
5476 	evergreen_mc_stop(rdev, &save);
5477 	if (evergreen_mc_wait_for_idle(rdev)) {
5478 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5479 	}
5480 
5481 	if (rdev->flags & RADEON_IS_IGP)
5482 		kv_save_regs_for_reset(rdev, &kv_save);
5483 
5484 	/* disable BM */
5485 	pci_disable_busmaster(rdev->pdev->dev.bsddev);
5486 	/* reset */
5487 	radeon_pci_config_reset(rdev);
5488 
5489 	udelay(100);
5490 
5491 	/* wait for asic to come out of reset */
5492 	for (i = 0; i < rdev->usec_timeout; i++) {
5493 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5494 			break;
5495 		udelay(1);
5496 	}
5497 
5498 	/* does asic init need to be run first??? */
5499 	if (rdev->flags & RADEON_IS_IGP)
5500 		kv_restore_regs_for_reset(rdev, &kv_save);
5501 }
5502 
5503 /**
5504  * cik_asic_reset - soft reset GPU
5505  *
5506  * @rdev: radeon_device pointer
5507  *
5508  * Look up which blocks are hung and attempt
5509  * to reset them.
5510  * Returns 0 for success.
5511  */
5512 int cik_asic_reset(struct radeon_device *rdev)
5513 {
5514 	u32 reset_mask;
5515 
5516 	reset_mask = cik_gpu_check_soft_reset(rdev);
5517 
5518 	if (reset_mask)
5519 		r600_set_bios_scratch_engine_hung(rdev, true);
5520 
5521 	/* try soft reset */
5522 	cik_gpu_soft_reset(rdev, reset_mask);
5523 
5524 	reset_mask = cik_gpu_check_soft_reset(rdev);
5525 
5526 	/* try pci config reset */
5527 	if (reset_mask && radeon_hard_reset)
5528 		cik_gpu_pci_config_reset(rdev);
5529 
5530 	reset_mask = cik_gpu_check_soft_reset(rdev);
5531 
5532 	if (!reset_mask)
5533 		r600_set_bios_scratch_engine_hung(rdev, false);
5534 
5535 	return 0;
5536 }
5537 
5538 /**
5539  * cik_gfx_is_lockup - check if the 3D engine is locked up
5540  *
5541  * @rdev: radeon_device pointer
5542  * @ring: radeon_ring structure holding ring information
5543  *
5544  * Check if the 3D engine is locked up (CIK).
5545  * Returns true if the engine is locked, false if not.
5546  */
5547 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5548 {
5549 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5550 
5551 	if (!(reset_mask & (RADEON_RESET_GFX |
5552 			    RADEON_RESET_COMPUTE |
5553 			    RADEON_RESET_CP))) {
5554 		radeon_ring_lockup_update(rdev, ring);
5555 		return false;
5556 	}
5557 	return radeon_ring_test_lockup(rdev, ring);
5558 }
5559 
5560 /* MC */
5561 /**
5562  * cik_mc_program - program the GPU memory controller
5563  *
5564  * @rdev: radeon_device pointer
5565  *
5566  * Set the location of vram, gart, and AGP in the GPU's
5567  * physical address space (CIK).
5568  */
5569 static void cik_mc_program(struct radeon_device *rdev)
5570 {
5571 	struct evergreen_mc_save save;
5572 	u32 tmp;
5573 	int i, j;
5574 
5575 	/* Initialize HDP */
5576 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5577 		WREG32((0x2c14 + j), 0x00000000);
5578 		WREG32((0x2c18 + j), 0x00000000);
5579 		WREG32((0x2c1c + j), 0x00000000);
5580 		WREG32((0x2c20 + j), 0x00000000);
5581 		WREG32((0x2c24 + j), 0x00000000);
5582 	}
5583 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5584 
5585 	evergreen_mc_stop(rdev, &save);
5586 	if (radeon_mc_wait_for_idle(rdev)) {
5587 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5588 	}
5589 	/* Lockout access through VGA aperture*/
5590 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5591 	/* Update configuration */
5592 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5593 	       rdev->mc.vram_start >> 12);
5594 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5595 	       rdev->mc.vram_end >> 12);
5596 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5597 	       rdev->vram_scratch.gpu_addr >> 12);
5598 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5599 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5600 	WREG32(MC_VM_FB_LOCATION, tmp);
5601 	/* XXX double check these! */
5602 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5603 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5604 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5605 	WREG32(MC_VM_AGP_BASE, 0);
5606 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5607 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5608 	if (radeon_mc_wait_for_idle(rdev)) {
5609 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5610 	}
5611 	evergreen_mc_resume(rdev, &save);
5612 	/* we need to own VRAM, so turn off the VGA renderer here
5613 	 * to stop it overwriting our objects */
5614 	rv515_vga_render_disable(rdev);
5615 }
5616 
5617 /**
5618  * cik_mc_init - initialize the memory controller driver params
5619  *
5620  * @rdev: radeon_device pointer
5621  *
5622  * Look up the amount of vram, vram width, and decide how to place
5623  * vram and gart within the GPU's physical address space (CIK).
5624  * Returns 0 for success.
5625  */
5626 static int cik_mc_init(struct radeon_device *rdev)
5627 {
5628 	u32 tmp;
5629 	int chansize, numchan;
5630 
5631 	/* Get VRAM informations */
5632 	rdev->mc.vram_is_ddr = true;
5633 	tmp = RREG32(MC_ARB_RAMCFG);
5634 	if (tmp & CHANSIZE_MASK) {
5635 		chansize = 64;
5636 	} else {
5637 		chansize = 32;
5638 	}
5639 	tmp = RREG32(MC_SHARED_CHMAP);
5640 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5641 	case 0:
5642 	default:
5643 		numchan = 1;
5644 		break;
5645 	case 1:
5646 		numchan = 2;
5647 		break;
5648 	case 2:
5649 		numchan = 4;
5650 		break;
5651 	case 3:
5652 		numchan = 8;
5653 		break;
5654 	case 4:
5655 		numchan = 3;
5656 		break;
5657 	case 5:
5658 		numchan = 6;
5659 		break;
5660 	case 6:
5661 		numchan = 10;
5662 		break;
5663 	case 7:
5664 		numchan = 12;
5665 		break;
5666 	case 8:
5667 		numchan = 16;
5668 		break;
5669 	}
5670 	rdev->mc.vram_width = numchan * chansize;
5671 	/* Could aper size report 0 ? */
5672 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5673 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5674 	/* size in MB on si */
5675 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5676 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5677 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5678 	si_vram_gtt_location(rdev, &rdev->mc);
5679 	radeon_update_bandwidth_info(rdev);
5680 
5681 	return 0;
5682 }
5683 
5684 /*
5685  * GART
5686  * VMID 0 is the physical GPU addresses as used by the kernel.
5687  * VMIDs 1-15 are used for userspace clients and are handled
5688  * by the radeon vm/hsa code.
5689  */
5690 /**
5691  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5692  *
5693  * @rdev: radeon_device pointer
5694  *
5695  * Flush the TLB for the VMID 0 page table (CIK).
5696  */
5697 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5698 {
5699 	/* flush hdp cache */
5700 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5701 
5702 	/* bits 0-15 are the VM contexts0-15 */
5703 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5704 }
5705 
5706 /**
5707  * cik_pcie_gart_enable - gart enable
5708  *
5709  * @rdev: radeon_device pointer
5710  *
5711  * This sets up the TLBs, programs the page tables for VMID0,
5712  * sets up the hw for VMIDs 1-15 which are allocated on
5713  * demand, and sets up the global locations for the LDS, GDS,
5714  * and GPUVM for FSA64 clients (CIK).
5715  * Returns 0 for success, errors for failure.
5716  */
5717 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5718 {
5719 	int r, i;
5720 
5721 	if (rdev->gart.robj == NULL) {
5722 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5723 		return -EINVAL;
5724 	}
5725 	r = radeon_gart_table_vram_pin(rdev);
5726 	if (r)
5727 		return r;
5728 	/* Setup TLB control */
5729 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5730 	       (0xA << 7) |
5731 	       ENABLE_L1_TLB |
5732 	       ENABLE_L1_FRAGMENT_PROCESSING |
5733 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5734 	       ENABLE_ADVANCED_DRIVER_MODEL |
5735 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5736 	/* Setup L2 cache */
5737 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5738 	       ENABLE_L2_FRAGMENT_PROCESSING |
5739 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5740 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5741 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5742 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5743 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5744 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5745 	       BANK_SELECT(4) |
5746 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5747 	/* setup context0 */
5748 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5749 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5750 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5751 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5752 			(u32)(rdev->dummy_page.addr >> 12));
5753 	WREG32(VM_CONTEXT0_CNTL2, 0);
5754 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5755 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5756 
5757 	WREG32(0x15D4, 0);
5758 	WREG32(0x15D8, 0);
5759 	WREG32(0x15DC, 0);
5760 
5761 	/* restore context1-15 */
5762 	/* set vm size, must be a multiple of 4 */
5763 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5764 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5765 	for (i = 1; i < 16; i++) {
5766 		if (i < 8)
5767 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5768 			       rdev->vm_manager.saved_table_addr[i]);
5769 		else
5770 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5771 			       rdev->vm_manager.saved_table_addr[i]);
5772 	}
5773 
5774 	/* enable context1-15 */
5775 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5776 	       (u32)(rdev->dummy_page.addr >> 12));
5777 	WREG32(VM_CONTEXT1_CNTL2, 4);
5778 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5779 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5780 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5781 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5782 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5783 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5784 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5785 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5786 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5787 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5788 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5789 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5790 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5791 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5792 
5793 	if (rdev->family == CHIP_KAVERI) {
5794 		u32 tmp = RREG32(CHUB_CONTROL);
5795 		tmp &= ~BYPASS_VM;
5796 		WREG32(CHUB_CONTROL, tmp);
5797 	}
5798 
5799 	/* XXX SH_MEM regs */
5800 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5801 	spin_lock(&rdev->srbm_mutex);
5802 	for (i = 0; i < 16; i++) {
5803 		cik_srbm_select(rdev, 0, 0, 0, i);
5804 		/* CP and shaders */
5805 		WREG32(SH_MEM_CONFIG, 0);
5806 		WREG32(SH_MEM_APE1_BASE, 1);
5807 		WREG32(SH_MEM_APE1_LIMIT, 0);
5808 		WREG32(SH_MEM_BASES, 0);
5809 		/* SDMA GFX */
5810 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5811 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5812 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5813 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5814 		/* XXX SDMA RLC - todo */
5815 	}
5816 	cik_srbm_select(rdev, 0, 0, 0, 0);
5817 	spin_unlock(&rdev->srbm_mutex);
5818 
5819 	cik_pcie_gart_tlb_flush(rdev);
5820 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5821 		 (unsigned)(rdev->mc.gtt_size >> 20),
5822 		 (unsigned long long)rdev->gart.table_addr);
5823 	rdev->gart.ready = true;
5824 	return 0;
5825 }
5826 
5827 /**
5828  * cik_pcie_gart_disable - gart disable
5829  *
5830  * @rdev: radeon_device pointer
5831  *
5832  * This disables all VM page table (CIK).
5833  */
5834 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5835 {
5836 	unsigned i;
5837 
5838 	for (i = 1; i < 16; ++i) {
5839 		uint32_t reg;
5840 		if (i < 8)
5841 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5842 		else
5843 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5844 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5845 	}
5846 
5847 	/* Disable all tables */
5848 	WREG32(VM_CONTEXT0_CNTL, 0);
5849 	WREG32(VM_CONTEXT1_CNTL, 0);
5850 	/* Setup TLB control */
5851 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5852 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5853 	/* Setup L2 cache */
5854 	WREG32(VM_L2_CNTL,
5855 	       ENABLE_L2_FRAGMENT_PROCESSING |
5856 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5857 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5858 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5859 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5860 	WREG32(VM_L2_CNTL2, 0);
5861 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5862 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5863 	radeon_gart_table_vram_unpin(rdev);
5864 }
5865 
5866 /**
5867  * cik_pcie_gart_fini - vm fini callback
5868  *
5869  * @rdev: radeon_device pointer
5870  *
5871  * Tears down the driver GART/VM setup (CIK).
5872  */
5873 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5874 {
5875 	cik_pcie_gart_disable(rdev);
5876 	radeon_gart_table_vram_free(rdev);
5877 	radeon_gart_fini(rdev);
5878 }
5879 
5880 /* vm parser */
5881 /**
5882  * cik_ib_parse - vm ib_parse callback
5883  *
5884  * @rdev: radeon_device pointer
5885  * @ib: indirect buffer pointer
5886  *
5887  * CIK uses hw IB checking so this is a nop (CIK).
5888  */
5889 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5890 {
5891 	return 0;
5892 }
5893 
5894 /*
5895  * vm
5896  * VMID 0 is the physical GPU addresses as used by the kernel.
5897  * VMIDs 1-15 are used for userspace clients and are handled
5898  * by the radeon vm/hsa code.
5899  */
5900 /**
5901  * cik_vm_init - cik vm init callback
5902  *
5903  * @rdev: radeon_device pointer
5904  *
5905  * Inits cik specific vm parameters (number of VMs, base of vram for
5906  * VMIDs 1-15) (CIK).
5907  * Returns 0 for success.
5908  */
5909 int cik_vm_init(struct radeon_device *rdev)
5910 {
5911 	/* number of VMs */
5912 	rdev->vm_manager.nvm = 16;
5913 	/* base offset of vram pages */
5914 	if (rdev->flags & RADEON_IS_IGP) {
5915 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5916 		tmp <<= 22;
5917 		rdev->vm_manager.vram_base_offset = tmp;
5918 	} else
5919 		rdev->vm_manager.vram_base_offset = 0;
5920 
5921 	return 0;
5922 }
5923 
5924 /**
5925  * cik_vm_fini - cik vm fini callback
5926  *
5927  * @rdev: radeon_device pointer
5928  *
5929  * Tear down any asic specific VM setup (CIK).
5930  */
5931 void cik_vm_fini(struct radeon_device *rdev)
5932 {
5933 }
5934 
5935 /**
5936  * cik_vm_decode_fault - print human readable fault info
5937  *
5938  * @rdev: radeon_device pointer
5939  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5940  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5941  *
5942  * Print human readable fault information (CIK).
5943  */
5944 static void cik_vm_decode_fault(struct radeon_device *rdev,
5945 				u32 status, u32 addr, u32 mc_client)
5946 {
5947 	u32 mc_id;
5948 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5949 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5950 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5951 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5952 
5953 	if (rdev->family == CHIP_HAWAII)
5954 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5955 	else
5956 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5957 
5958 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5959 	       protections, vmid, addr,
5960 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5961 	       block, mc_client, mc_id);
5962 }
5963 
5964 /**
5965  * cik_vm_flush - cik vm flush using the CP
5966  *
5967  * @rdev: radeon_device pointer
5968  *
5969  * Update the page table base and flush the VM TLB
5970  * using the CP (CIK).
5971  */
5972 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5973 {
5974 	struct radeon_ring *ring = &rdev->ring[ridx];
5975 	int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
5976 
5977 	if (vm == NULL)
5978 		return;
5979 
5980 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5981 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5982 				 WRITE_DATA_DST_SEL(0)));
5983 	if (vm->id < 8) {
5984 		radeon_ring_write(ring,
5985 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5986 	} else {
5987 		radeon_ring_write(ring,
5988 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5989 	}
5990 	radeon_ring_write(ring, 0);
5991 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5992 
5993 	/* update SH_MEM_* regs */
5994 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5995 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5996 				 WRITE_DATA_DST_SEL(0)));
5997 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5998 	radeon_ring_write(ring, 0);
5999 	radeon_ring_write(ring, VMID(vm->id));
6000 
6001 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6002 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6003 				 WRITE_DATA_DST_SEL(0)));
6004 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6005 	radeon_ring_write(ring, 0);
6006 
6007 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6008 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6009 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6010 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6011 
6012 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6013 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6014 				 WRITE_DATA_DST_SEL(0)));
6015 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6016 	radeon_ring_write(ring, 0);
6017 	radeon_ring_write(ring, VMID(0));
6018 
6019 	/* HDP flush */
6020 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
6021 
6022 	/* bits 0-15 are the VM contexts0-15 */
6023 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6024 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6025 				 WRITE_DATA_DST_SEL(0)));
6026 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6027 	radeon_ring_write(ring, 0);
6028 	radeon_ring_write(ring, 1 << vm->id);
6029 
6030 	/* compute doesn't have PFP */
6031 	if (usepfp) {
6032 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6033 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6034 		radeon_ring_write(ring, 0x0);
6035 	}
6036 }
6037 
6038 /*
6039  * RLC
6040  * The RLC is a multi-purpose microengine that handles a
6041  * variety of functions, the most important of which is
6042  * the interrupt controller.
6043  */
6044 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6045 					  bool enable)
6046 {
6047 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6048 
6049 	if (enable)
6050 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6051 	else
6052 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6053 	WREG32(CP_INT_CNTL_RING0, tmp);
6054 }
6055 
6056 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6057 {
6058 	u32 tmp;
6059 
6060 	tmp = RREG32(RLC_LB_CNTL);
6061 	if (enable)
6062 		tmp |= LOAD_BALANCE_ENABLE;
6063 	else
6064 		tmp &= ~LOAD_BALANCE_ENABLE;
6065 	WREG32(RLC_LB_CNTL, tmp);
6066 }
6067 
6068 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6069 {
6070 	u32 i, j, k;
6071 	u32 mask;
6072 
6073 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6074 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6075 			cik_select_se_sh(rdev, i, j);
6076 			for (k = 0; k < rdev->usec_timeout; k++) {
6077 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6078 					break;
6079 				udelay(1);
6080 			}
6081 		}
6082 	}
6083 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6084 
6085 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6086 	for (k = 0; k < rdev->usec_timeout; k++) {
6087 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6088 			break;
6089 		udelay(1);
6090 	}
6091 }
6092 
6093 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6094 {
6095 	u32 tmp;
6096 
6097 	tmp = RREG32(RLC_CNTL);
6098 	if (tmp != rlc)
6099 		WREG32(RLC_CNTL, rlc);
6100 }
6101 
6102 static u32 cik_halt_rlc(struct radeon_device *rdev)
6103 {
6104 	u32 data, orig;
6105 
6106 	orig = data = RREG32(RLC_CNTL);
6107 
6108 	if (data & RLC_ENABLE) {
6109 		u32 i;
6110 
6111 		data &= ~RLC_ENABLE;
6112 		WREG32(RLC_CNTL, data);
6113 
6114 		for (i = 0; i < rdev->usec_timeout; i++) {
6115 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6116 				break;
6117 			udelay(1);
6118 		}
6119 
6120 		cik_wait_for_rlc_serdes(rdev);
6121 	}
6122 
6123 	return orig;
6124 }
6125 
6126 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6127 {
6128 	u32 tmp, i, mask;
6129 
6130 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6131 	WREG32(RLC_GPR_REG2, tmp);
6132 
6133 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6134 	for (i = 0; i < rdev->usec_timeout; i++) {
6135 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6136 			break;
6137 		udelay(1);
6138 	}
6139 
6140 	for (i = 0; i < rdev->usec_timeout; i++) {
6141 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6142 			break;
6143 		udelay(1);
6144 	}
6145 }
6146 
6147 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6148 {
6149 	u32 tmp;
6150 
6151 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6152 	WREG32(RLC_GPR_REG2, tmp);
6153 }
6154 
6155 /**
6156  * cik_rlc_stop - stop the RLC ME
6157  *
6158  * @rdev: radeon_device pointer
6159  *
6160  * Halt the RLC ME (MicroEngine) (CIK).
6161  */
6162 static void cik_rlc_stop(struct radeon_device *rdev)
6163 {
6164 	WREG32(RLC_CNTL, 0);
6165 
6166 	cik_enable_gui_idle_interrupt(rdev, false);
6167 
6168 	cik_wait_for_rlc_serdes(rdev);
6169 }
6170 
6171 /**
6172  * cik_rlc_start - start the RLC ME
6173  *
6174  * @rdev: radeon_device pointer
6175  *
6176  * Unhalt the RLC ME (MicroEngine) (CIK).
6177  */
6178 static void cik_rlc_start(struct radeon_device *rdev)
6179 {
6180 	WREG32(RLC_CNTL, RLC_ENABLE);
6181 
6182 	cik_enable_gui_idle_interrupt(rdev, true);
6183 
6184 	udelay(50);
6185 }
6186 
6187 /**
6188  * cik_rlc_resume - setup the RLC hw
6189  *
6190  * @rdev: radeon_device pointer
6191  *
6192  * Initialize the RLC registers, load the ucode,
6193  * and start the RLC (CIK).
6194  * Returns 0 for success, -EINVAL if the ucode is not available.
6195  */
6196 static int cik_rlc_resume(struct radeon_device *rdev)
6197 {
6198 	u32 i, size, tmp;
6199 
6200 	if (!rdev->rlc_fw)
6201 		return -EINVAL;
6202 
6203 	cik_rlc_stop(rdev);
6204 
6205 	/* disable CG */
6206 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6207 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6208 
6209 	si_rlc_reset(rdev);
6210 
6211 	cik_init_pg(rdev);
6212 
6213 	cik_init_cg(rdev);
6214 
6215 	WREG32(RLC_LB_CNTR_INIT, 0);
6216 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6217 
6218 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6219 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6220 	WREG32(RLC_LB_PARAMS, 0x00600408);
6221 	WREG32(RLC_LB_CNTL, 0x80000004);
6222 
6223 	WREG32(RLC_MC_CNTL, 0);
6224 	WREG32(RLC_UCODE_CNTL, 0);
6225 
6226 	if (rdev->new_fw) {
6227 		const struct rlc_firmware_header_v1_0 *hdr =
6228 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6229 		const __le32 *fw_data = (const __le32 *)
6230 			((const char *)rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6231 
6232 		radeon_ucode_print_rlc_hdr(&hdr->header);
6233 
6234 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6235 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6236 		for (i = 0; i < size; i++)
6237 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6238 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6239 	} else {
6240 		const __be32 *fw_data;
6241 
6242 		switch (rdev->family) {
6243 		case CHIP_BONAIRE:
6244 		case CHIP_HAWAII:
6245 		default:
6246 			size = BONAIRE_RLC_UCODE_SIZE;
6247 			break;
6248 		case CHIP_KAVERI:
6249 			size = KV_RLC_UCODE_SIZE;
6250 			break;
6251 		case CHIP_KABINI:
6252 			size = KB_RLC_UCODE_SIZE;
6253 			break;
6254 		case CHIP_MULLINS:
6255 			size = ML_RLC_UCODE_SIZE;
6256 			break;
6257 		}
6258 
6259 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6260 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6261 		for (i = 0; i < size; i++)
6262 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6263 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6264 	}
6265 
6266 	/* XXX - find out what chips support lbpw */
6267 	cik_enable_lbpw(rdev, false);
6268 
6269 	if (rdev->family == CHIP_BONAIRE)
6270 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6271 
6272 	cik_rlc_start(rdev);
6273 
6274 	return 0;
6275 }
6276 
6277 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6278 {
6279 	u32 data, orig, tmp, tmp2;
6280 
6281 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6282 
6283 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6284 		cik_enable_gui_idle_interrupt(rdev, true);
6285 
6286 		tmp = cik_halt_rlc(rdev);
6287 
6288 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6289 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6290 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6291 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6292 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6293 
6294 		cik_update_rlc(rdev, tmp);
6295 
6296 		data |= CGCG_EN | CGLS_EN;
6297 	} else {
6298 		cik_enable_gui_idle_interrupt(rdev, false);
6299 
6300 		RREG32(CB_CGTT_SCLK_CTRL);
6301 		RREG32(CB_CGTT_SCLK_CTRL);
6302 		RREG32(CB_CGTT_SCLK_CTRL);
6303 		RREG32(CB_CGTT_SCLK_CTRL);
6304 
6305 		data &= ~(CGCG_EN | CGLS_EN);
6306 	}
6307 
6308 	if (orig != data)
6309 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6310 
6311 }
6312 
6313 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6314 {
6315 	u32 data, orig, tmp = 0;
6316 
6317 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6318 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6319 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6320 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6321 				data |= CP_MEM_LS_EN;
6322 				if (orig != data)
6323 					WREG32(CP_MEM_SLP_CNTL, data);
6324 			}
6325 		}
6326 
6327 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6328 		data &= 0xfffffffd;
6329 		if (orig != data)
6330 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6331 
6332 		tmp = cik_halt_rlc(rdev);
6333 
6334 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6335 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6336 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6337 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6338 		WREG32(RLC_SERDES_WR_CTRL, data);
6339 
6340 		cik_update_rlc(rdev, tmp);
6341 
6342 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6343 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6344 			data &= ~SM_MODE_MASK;
6345 			data |= SM_MODE(0x2);
6346 			data |= SM_MODE_ENABLE;
6347 			data &= ~CGTS_OVERRIDE;
6348 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6349 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6350 				data &= ~CGTS_LS_OVERRIDE;
6351 			data &= ~ON_MONITOR_ADD_MASK;
6352 			data |= ON_MONITOR_ADD_EN;
6353 			data |= ON_MONITOR_ADD(0x96);
6354 			if (orig != data)
6355 				WREG32(CGTS_SM_CTRL_REG, data);
6356 		}
6357 	} else {
6358 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6359 		data |= 0x00000002;
6360 		if (orig != data)
6361 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6362 
6363 		data = RREG32(RLC_MEM_SLP_CNTL);
6364 		if (data & RLC_MEM_LS_EN) {
6365 			data &= ~RLC_MEM_LS_EN;
6366 			WREG32(RLC_MEM_SLP_CNTL, data);
6367 		}
6368 
6369 		data = RREG32(CP_MEM_SLP_CNTL);
6370 		if (data & CP_MEM_LS_EN) {
6371 			data &= ~CP_MEM_LS_EN;
6372 			WREG32(CP_MEM_SLP_CNTL, data);
6373 		}
6374 
6375 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6376 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6377 		if (orig != data)
6378 			WREG32(CGTS_SM_CTRL_REG, data);
6379 
6380 		tmp = cik_halt_rlc(rdev);
6381 
6382 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6383 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6384 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6385 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6386 		WREG32(RLC_SERDES_WR_CTRL, data);
6387 
6388 		cik_update_rlc(rdev, tmp);
6389 	}
6390 }
6391 
6392 static const u32 mc_cg_registers[] =
6393 {
6394 	MC_HUB_MISC_HUB_CG,
6395 	MC_HUB_MISC_SIP_CG,
6396 	MC_HUB_MISC_VM_CG,
6397 	MC_XPB_CLK_GAT,
6398 	ATC_MISC_CG,
6399 	MC_CITF_MISC_WR_CG,
6400 	MC_CITF_MISC_RD_CG,
6401 	MC_CITF_MISC_VM_CG,
6402 	VM_L2_CG,
6403 };
6404 
6405 static void cik_enable_mc_ls(struct radeon_device *rdev,
6406 			     bool enable)
6407 {
6408 	int i;
6409 	u32 orig, data;
6410 
6411 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6412 		orig = data = RREG32(mc_cg_registers[i]);
6413 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6414 			data |= MC_LS_ENABLE;
6415 		else
6416 			data &= ~MC_LS_ENABLE;
6417 		if (data != orig)
6418 			WREG32(mc_cg_registers[i], data);
6419 	}
6420 }
6421 
6422 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6423 			       bool enable)
6424 {
6425 	int i;
6426 	u32 orig, data;
6427 
6428 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6429 		orig = data = RREG32(mc_cg_registers[i]);
6430 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6431 			data |= MC_CG_ENABLE;
6432 		else
6433 			data &= ~MC_CG_ENABLE;
6434 		if (data != orig)
6435 			WREG32(mc_cg_registers[i], data);
6436 	}
6437 }
6438 
6439 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6440 				 bool enable)
6441 {
6442 	u32 orig, data;
6443 
6444 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6445 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6446 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6447 	} else {
6448 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6449 		data |= 0xff000000;
6450 		if (data != orig)
6451 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6452 
6453 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6454 		data |= 0xff000000;
6455 		if (data != orig)
6456 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6457 	}
6458 }
6459 
6460 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6461 				 bool enable)
6462 {
6463 	u32 orig, data;
6464 
6465 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6466 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6467 		data |= 0x100;
6468 		if (orig != data)
6469 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6470 
6471 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6472 		data |= 0x100;
6473 		if (orig != data)
6474 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6475 	} else {
6476 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6477 		data &= ~0x100;
6478 		if (orig != data)
6479 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6480 
6481 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6482 		data &= ~0x100;
6483 		if (orig != data)
6484 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6485 	}
6486 }
6487 
6488 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6489 				bool enable)
6490 {
6491 	u32 orig, data;
6492 
6493 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6494 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6495 		data = 0xfff;
6496 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6497 
6498 		orig = data = RREG32(UVD_CGC_CTRL);
6499 		data |= DCM;
6500 		if (orig != data)
6501 			WREG32(UVD_CGC_CTRL, data);
6502 	} else {
6503 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6504 		data &= ~0xfff;
6505 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6506 
6507 		orig = data = RREG32(UVD_CGC_CTRL);
6508 		data &= ~DCM;
6509 		if (orig != data)
6510 			WREG32(UVD_CGC_CTRL, data);
6511 	}
6512 }
6513 
6514 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6515 			       bool enable)
6516 {
6517 	u32 orig, data;
6518 
6519 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6520 
6521 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6522 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6523 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6524 	else
6525 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6526 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6527 
6528 	if (orig != data)
6529 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6530 }
6531 
6532 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6533 				bool enable)
6534 {
6535 	u32 orig, data;
6536 
6537 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6538 
6539 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6540 		data &= ~CLOCK_GATING_DIS;
6541 	else
6542 		data |= CLOCK_GATING_DIS;
6543 
6544 	if (orig != data)
6545 		WREG32(HDP_HOST_PATH_CNTL, data);
6546 }
6547 
6548 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6549 			      bool enable)
6550 {
6551 	u32 orig, data;
6552 
6553 	orig = data = RREG32(HDP_MEM_POWER_LS);
6554 
6555 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6556 		data |= HDP_LS_ENABLE;
6557 	else
6558 		data &= ~HDP_LS_ENABLE;
6559 
6560 	if (orig != data)
6561 		WREG32(HDP_MEM_POWER_LS, data);
6562 }
6563 
6564 void cik_update_cg(struct radeon_device *rdev,
6565 		   u32 block, bool enable)
6566 {
6567 
6568 	if (block & RADEON_CG_BLOCK_GFX) {
6569 		cik_enable_gui_idle_interrupt(rdev, false);
6570 		/* order matters! */
6571 		if (enable) {
6572 			cik_enable_mgcg(rdev, true);
6573 			cik_enable_cgcg(rdev, true);
6574 		} else {
6575 			cik_enable_cgcg(rdev, false);
6576 			cik_enable_mgcg(rdev, false);
6577 		}
6578 		cik_enable_gui_idle_interrupt(rdev, true);
6579 	}
6580 
6581 	if (block & RADEON_CG_BLOCK_MC) {
6582 		if (!(rdev->flags & RADEON_IS_IGP)) {
6583 			cik_enable_mc_mgcg(rdev, enable);
6584 			cik_enable_mc_ls(rdev, enable);
6585 		}
6586 	}
6587 
6588 	if (block & RADEON_CG_BLOCK_SDMA) {
6589 		cik_enable_sdma_mgcg(rdev, enable);
6590 		cik_enable_sdma_mgls(rdev, enable);
6591 	}
6592 
6593 	if (block & RADEON_CG_BLOCK_BIF) {
6594 		cik_enable_bif_mgls(rdev, enable);
6595 	}
6596 
6597 	if (block & RADEON_CG_BLOCK_UVD) {
6598 		if (rdev->has_uvd)
6599 			cik_enable_uvd_mgcg(rdev, enable);
6600 	}
6601 
6602 	if (block & RADEON_CG_BLOCK_HDP) {
6603 		cik_enable_hdp_mgcg(rdev, enable);
6604 		cik_enable_hdp_ls(rdev, enable);
6605 	}
6606 
6607 	if (block & RADEON_CG_BLOCK_VCE) {
6608 		vce_v2_0_enable_mgcg(rdev, enable);
6609 	}
6610 }
6611 
6612 static void cik_init_cg(struct radeon_device *rdev)
6613 {
6614 
6615 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6616 
6617 	if (rdev->has_uvd)
6618 		si_init_uvd_internal_cg(rdev);
6619 
6620 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6621 			     RADEON_CG_BLOCK_SDMA |
6622 			     RADEON_CG_BLOCK_BIF |
6623 			     RADEON_CG_BLOCK_UVD |
6624 			     RADEON_CG_BLOCK_HDP), true);
6625 }
6626 
6627 static void cik_fini_cg(struct radeon_device *rdev)
6628 {
6629 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6630 			     RADEON_CG_BLOCK_SDMA |
6631 			     RADEON_CG_BLOCK_BIF |
6632 			     RADEON_CG_BLOCK_UVD |
6633 			     RADEON_CG_BLOCK_HDP), false);
6634 
6635 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6636 }
6637 
6638 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6639 					  bool enable)
6640 {
6641 	u32 data, orig;
6642 
6643 	orig = data = RREG32(RLC_PG_CNTL);
6644 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6645 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6646 	else
6647 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6648 	if (orig != data)
6649 		WREG32(RLC_PG_CNTL, data);
6650 }
6651 
6652 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6653 					  bool enable)
6654 {
6655 	u32 data, orig;
6656 
6657 	orig = data = RREG32(RLC_PG_CNTL);
6658 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6659 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6660 	else
6661 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6662 	if (orig != data)
6663 		WREG32(RLC_PG_CNTL, data);
6664 }
6665 
6666 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6667 {
6668 	u32 data, orig;
6669 
6670 	orig = data = RREG32(RLC_PG_CNTL);
6671 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6672 		data &= ~DISABLE_CP_PG;
6673 	else
6674 		data |= DISABLE_CP_PG;
6675 	if (orig != data)
6676 		WREG32(RLC_PG_CNTL, data);
6677 }
6678 
6679 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6680 {
6681 	u32 data, orig;
6682 
6683 	orig = data = RREG32(RLC_PG_CNTL);
6684 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6685 		data &= ~DISABLE_GDS_PG;
6686 	else
6687 		data |= DISABLE_GDS_PG;
6688 	if (orig != data)
6689 		WREG32(RLC_PG_CNTL, data);
6690 }
6691 
6692 #define CP_ME_TABLE_SIZE    96
6693 #define CP_ME_TABLE_OFFSET  2048
6694 #define CP_MEC_TABLE_OFFSET 4096
6695 
6696 void cik_init_cp_pg_table(struct radeon_device *rdev)
6697 {
6698 	volatile u32 *dst_ptr;
6699 	int me, i, max_me = 4;
6700 	u32 bo_offset = 0;
6701 	u32 table_offset, table_size;
6702 
6703 	if (rdev->family == CHIP_KAVERI)
6704 		max_me = 5;
6705 
6706 	if (rdev->rlc.cp_table_ptr == NULL)
6707 		return;
6708 
6709 	/* write the cp table buffer */
6710 	dst_ptr = rdev->rlc.cp_table_ptr;
6711 	for (me = 0; me < max_me; me++) {
6712 		if (rdev->new_fw) {
6713 			const __le32 *fw_data;
6714 			const struct gfx_firmware_header_v1_0 *hdr;
6715 
6716 			if (me == 0) {
6717 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6718 				fw_data = (const __le32 *)
6719 					((const char *)rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6720 				table_offset = le32_to_cpu(hdr->jt_offset);
6721 				table_size = le32_to_cpu(hdr->jt_size);
6722 			} else if (me == 1) {
6723 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6724 				fw_data = (const __le32 *)
6725 					((const char *)rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6726 				table_offset = le32_to_cpu(hdr->jt_offset);
6727 				table_size = le32_to_cpu(hdr->jt_size);
6728 			} else if (me == 2) {
6729 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6730 				fw_data = (const __le32 *)
6731 					((const char *)rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6732 				table_offset = le32_to_cpu(hdr->jt_offset);
6733 				table_size = le32_to_cpu(hdr->jt_size);
6734 			} else if (me == 3) {
6735 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6736 				fw_data = (const __le32 *)
6737 					((const char *)rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6738 				table_offset = le32_to_cpu(hdr->jt_offset);
6739 				table_size = le32_to_cpu(hdr->jt_size);
6740 			} else {
6741 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6742 				fw_data = (const __le32 *)
6743 					((const char *)rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6744 				table_offset = le32_to_cpu(hdr->jt_offset);
6745 				table_size = le32_to_cpu(hdr->jt_size);
6746 			}
6747 
6748 			for (i = 0; i < table_size; i ++) {
6749 				dst_ptr[bo_offset + i] =
6750 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6751 			}
6752 			bo_offset += table_size;
6753 		} else {
6754 			const __be32 *fw_data;
6755 			table_size = CP_ME_TABLE_SIZE;
6756 
6757 			if (me == 0) {
6758 				fw_data = (const __be32 *)rdev->ce_fw->data;
6759 				table_offset = CP_ME_TABLE_OFFSET;
6760 			} else if (me == 1) {
6761 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6762 				table_offset = CP_ME_TABLE_OFFSET;
6763 			} else if (me == 2) {
6764 				fw_data = (const __be32 *)rdev->me_fw->data;
6765 				table_offset = CP_ME_TABLE_OFFSET;
6766 			} else {
6767 				fw_data = (const __be32 *)rdev->mec_fw->data;
6768 				table_offset = CP_MEC_TABLE_OFFSET;
6769 			}
6770 
6771 			for (i = 0; i < table_size; i ++) {
6772 				dst_ptr[bo_offset + i] =
6773 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6774 			}
6775 			bo_offset += table_size;
6776 		}
6777 	}
6778 }
6779 
6780 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6781 				bool enable)
6782 {
6783 	u32 data, orig;
6784 
6785 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6786 		orig = data = RREG32(RLC_PG_CNTL);
6787 		data |= GFX_PG_ENABLE;
6788 		if (orig != data)
6789 			WREG32(RLC_PG_CNTL, data);
6790 
6791 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6792 		data |= AUTO_PG_EN;
6793 		if (orig != data)
6794 			WREG32(RLC_AUTO_PG_CTRL, data);
6795 	} else {
6796 		orig = data = RREG32(RLC_PG_CNTL);
6797 		data &= ~GFX_PG_ENABLE;
6798 		if (orig != data)
6799 			WREG32(RLC_PG_CNTL, data);
6800 
6801 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6802 		data &= ~AUTO_PG_EN;
6803 		if (orig != data)
6804 			WREG32(RLC_AUTO_PG_CTRL, data);
6805 
6806 		data = RREG32(DB_RENDER_CONTROL);
6807 	}
6808 }
6809 
6810 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6811 {
6812 	u32 mask = 0, tmp, tmp1;
6813 	int i;
6814 
6815 	cik_select_se_sh(rdev, se, sh);
6816 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6817 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6818 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6819 
6820 	tmp &= 0xffff0000;
6821 
6822 	tmp |= tmp1;
6823 	tmp >>= 16;
6824 
6825 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6826 		mask <<= 1;
6827 		mask |= 1;
6828 	}
6829 
6830 	return (~tmp) & mask;
6831 }
6832 
6833 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6834 {
6835 	u32 i, j, k, active_cu_number = 0;
6836 	u32 mask, counter, cu_bitmap;
6837 	u32 tmp = 0;
6838 
6839 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6840 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6841 			mask = 1;
6842 			cu_bitmap = 0;
6843 			counter = 0;
6844 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6845 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6846 					if (counter < 2)
6847 						cu_bitmap |= mask;
6848 					counter ++;
6849 				}
6850 				mask <<= 1;
6851 			}
6852 
6853 			active_cu_number += counter;
6854 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6855 		}
6856 	}
6857 
6858 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6859 
6860 	tmp = RREG32(RLC_MAX_PG_CU);
6861 	tmp &= ~MAX_PU_CU_MASK;
6862 	tmp |= MAX_PU_CU(active_cu_number);
6863 	WREG32(RLC_MAX_PG_CU, tmp);
6864 }
6865 
6866 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6867 				       bool enable)
6868 {
6869 	u32 data, orig;
6870 
6871 	orig = data = RREG32(RLC_PG_CNTL);
6872 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6873 		data |= STATIC_PER_CU_PG_ENABLE;
6874 	else
6875 		data &= ~STATIC_PER_CU_PG_ENABLE;
6876 	if (orig != data)
6877 		WREG32(RLC_PG_CNTL, data);
6878 }
6879 
6880 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6881 					bool enable)
6882 {
6883 	u32 data, orig;
6884 
6885 	orig = data = RREG32(RLC_PG_CNTL);
6886 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6887 		data |= DYN_PER_CU_PG_ENABLE;
6888 	else
6889 		data &= ~DYN_PER_CU_PG_ENABLE;
6890 	if (orig != data)
6891 		WREG32(RLC_PG_CNTL, data);
6892 }
6893 
6894 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6895 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6896 
6897 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6898 {
6899 	u32 data, orig;
6900 	u32 i;
6901 
6902 	if (rdev->rlc.cs_data) {
6903 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6904 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6905 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6906 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6907 	} else {
6908 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6909 		for (i = 0; i < 3; i++)
6910 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6911 	}
6912 	if (rdev->rlc.reg_list) {
6913 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6914 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6915 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6916 	}
6917 
6918 	orig = data = RREG32(RLC_PG_CNTL);
6919 	data |= GFX_PG_SRC;
6920 	if (orig != data)
6921 		WREG32(RLC_PG_CNTL, data);
6922 
6923 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6924 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6925 
6926 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6927 	data &= ~IDLE_POLL_COUNT_MASK;
6928 	data |= IDLE_POLL_COUNT(0x60);
6929 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6930 
6931 	data = 0x10101010;
6932 	WREG32(RLC_PG_DELAY, data);
6933 
6934 	data = RREG32(RLC_PG_DELAY_2);
6935 	data &= ~0xff;
6936 	data |= 0x3;
6937 	WREG32(RLC_PG_DELAY_2, data);
6938 
6939 	data = RREG32(RLC_AUTO_PG_CTRL);
6940 	data &= ~GRBM_REG_SGIT_MASK;
6941 	data |= GRBM_REG_SGIT(0x700);
6942 	WREG32(RLC_AUTO_PG_CTRL, data);
6943 
6944 }
6945 
6946 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6947 {
6948 	cik_enable_gfx_cgpg(rdev, enable);
6949 	cik_enable_gfx_static_mgpg(rdev, enable);
6950 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6951 }
6952 
6953 u32 cik_get_csb_size(struct radeon_device *rdev)
6954 {
6955 	u32 count = 0;
6956 	const struct cs_section_def *sect = NULL;
6957 	const struct cs_extent_def *ext = NULL;
6958 
6959 	if (rdev->rlc.cs_data == NULL)
6960 		return 0;
6961 
6962 	/* begin clear state */
6963 	count += 2;
6964 	/* context control state */
6965 	count += 3;
6966 
6967 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6968 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6969 			if (sect->id == SECT_CONTEXT)
6970 				count += 2 + ext->reg_count;
6971 			else
6972 				return 0;
6973 		}
6974 	}
6975 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6976 	count += 4;
6977 	/* end clear state */
6978 	count += 2;
6979 	/* clear state */
6980 	count += 2;
6981 
6982 	return count;
6983 }
6984 
6985 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6986 {
6987 	u32 count = 0, i;
6988 	const struct cs_section_def *sect = NULL;
6989 	const struct cs_extent_def *ext = NULL;
6990 
6991 	if (rdev->rlc.cs_data == NULL)
6992 		return;
6993 	if (buffer == NULL)
6994 		return;
6995 
6996 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6997 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6998 
6999 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7000 	buffer[count++] = cpu_to_le32(0x80000000);
7001 	buffer[count++] = cpu_to_le32(0x80000000);
7002 
7003 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7004 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7005 			if (sect->id == SECT_CONTEXT) {
7006 				buffer[count++] =
7007 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7008 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7009 				for (i = 0; i < ext->reg_count; i++)
7010 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7011 			} else {
7012 				return;
7013 			}
7014 		}
7015 	}
7016 
7017 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7018 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7019 	switch (rdev->family) {
7020 	case CHIP_BONAIRE:
7021 		buffer[count++] = cpu_to_le32(0x16000012);
7022 		buffer[count++] = cpu_to_le32(0x00000000);
7023 		break;
7024 	case CHIP_KAVERI:
7025 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7026 		buffer[count++] = cpu_to_le32(0x00000000);
7027 		break;
7028 	case CHIP_KABINI:
7029 	case CHIP_MULLINS:
7030 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7031 		buffer[count++] = cpu_to_le32(0x00000000);
7032 		break;
7033 	case CHIP_HAWAII:
7034 		buffer[count++] = cpu_to_le32(0x3a00161a);
7035 		buffer[count++] = cpu_to_le32(0x0000002e);
7036 		break;
7037 	default:
7038 		buffer[count++] = cpu_to_le32(0x00000000);
7039 		buffer[count++] = cpu_to_le32(0x00000000);
7040 		break;
7041 	}
7042 
7043 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7044 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7045 
7046 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7047 	buffer[count++] = cpu_to_le32(0);
7048 }
7049 
7050 static void cik_init_pg(struct radeon_device *rdev)
7051 {
7052 	if (rdev->pg_flags) {
7053 		cik_enable_sck_slowdown_on_pu(rdev, true);
7054 		cik_enable_sck_slowdown_on_pd(rdev, true);
7055 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7056 			cik_init_gfx_cgpg(rdev);
7057 			cik_enable_cp_pg(rdev, true);
7058 			cik_enable_gds_pg(rdev, true);
7059 		}
7060 		cik_init_ao_cu_mask(rdev);
7061 		cik_update_gfx_pg(rdev, true);
7062 	}
7063 }
7064 
7065 static void cik_fini_pg(struct radeon_device *rdev)
7066 {
7067 	if (rdev->pg_flags) {
7068 		cik_update_gfx_pg(rdev, false);
7069 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7070 			cik_enable_cp_pg(rdev, false);
7071 			cik_enable_gds_pg(rdev, false);
7072 		}
7073 	}
7074 }
7075 
7076 /*
7077  * Interrupts
7078  * Starting with r6xx, interrupts are handled via a ring buffer.
7079  * Ring buffers are areas of GPU accessible memory that the GPU
7080  * writes interrupt vectors into and the host reads vectors out of.
7081  * There is a rptr (read pointer) that determines where the
7082  * host is currently reading, and a wptr (write pointer)
7083  * which determines where the GPU has written.  When the
7084  * pointers are equal, the ring is idle.  When the GPU
7085  * writes vectors to the ring buffer, it increments the
7086  * wptr.  When there is an interrupt, the host then starts
7087  * fetching commands and processing them until the pointers are
7088  * equal again at which point it updates the rptr.
7089  */
7090 
7091 /**
7092  * cik_enable_interrupts - Enable the interrupt ring buffer
7093  *
7094  * @rdev: radeon_device pointer
7095  *
7096  * Enable the interrupt ring buffer (CIK).
7097  */
7098 static void cik_enable_interrupts(struct radeon_device *rdev)
7099 {
7100 	u32 ih_cntl = RREG32(IH_CNTL);
7101 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7102 
7103 	ih_cntl |= ENABLE_INTR;
7104 	ih_rb_cntl |= IH_RB_ENABLE;
7105 	WREG32(IH_CNTL, ih_cntl);
7106 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7107 	rdev->ih.enabled = true;
7108 }
7109 
7110 /**
7111  * cik_disable_interrupts - Disable the interrupt ring buffer
7112  *
7113  * @rdev: radeon_device pointer
7114  *
7115  * Disable the interrupt ring buffer (CIK).
7116  */
7117 static void cik_disable_interrupts(struct radeon_device *rdev)
7118 {
7119 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7120 	u32 ih_cntl = RREG32(IH_CNTL);
7121 
7122 	ih_rb_cntl &= ~IH_RB_ENABLE;
7123 	ih_cntl &= ~ENABLE_INTR;
7124 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7125 	WREG32(IH_CNTL, ih_cntl);
7126 	/* set rptr, wptr to 0 */
7127 	WREG32(IH_RB_RPTR, 0);
7128 	WREG32(IH_RB_WPTR, 0);
7129 	rdev->ih.enabled = false;
7130 	rdev->ih.rptr = 0;
7131 }
7132 
7133 /**
7134  * cik_disable_interrupt_state - Disable all interrupt sources
7135  *
7136  * @rdev: radeon_device pointer
7137  *
7138  * Clear all interrupt enable bits used by the driver (CIK).
7139  */
7140 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7141 {
7142 	u32 tmp;
7143 
7144 	/* gfx ring */
7145 	tmp = RREG32(CP_INT_CNTL_RING0) &
7146 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7147 	WREG32(CP_INT_CNTL_RING0, tmp);
7148 	/* sdma */
7149 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7150 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7151 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7152 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7153 	/* compute queues */
7154 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7155 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7156 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7157 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7158 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7159 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7160 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7161 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7162 	/* grbm */
7163 	WREG32(GRBM_INT_CNTL, 0);
7164 	/* vline/vblank, etc. */
7165 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7166 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7167 	if (rdev->num_crtc >= 4) {
7168 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7169 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7170 	}
7171 	if (rdev->num_crtc >= 6) {
7172 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7173 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7174 	}
7175 	/* pflip */
7176 	if (rdev->num_crtc >= 2) {
7177 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7178 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7179 	}
7180 	if (rdev->num_crtc >= 4) {
7181 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7182 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7183 	}
7184 	if (rdev->num_crtc >= 6) {
7185 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7186 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7187 	}
7188 
7189 	/* dac hotplug */
7190 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7191 
7192 	/* digital hotplug */
7193 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7194 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7195 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7196 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7197 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7198 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7199 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7200 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7201 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7202 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7203 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7204 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7205 
7206 }
7207 
7208 /**
7209  * cik_irq_init - init and enable the interrupt ring
7210  *
7211  * @rdev: radeon_device pointer
7212  *
7213  * Allocate a ring buffer for the interrupt controller,
7214  * enable the RLC, disable interrupts, enable the IH
7215  * ring buffer and enable it (CIK).
7216  * Called at device load and reume.
7217  * Returns 0 for success, errors for failure.
7218  */
7219 static int cik_irq_init(struct radeon_device *rdev)
7220 {
7221 	int ret = 0;
7222 	int rb_bufsz;
7223 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7224 
7225 	/* allocate ring */
7226 	ret = r600_ih_ring_alloc(rdev);
7227 	if (ret)
7228 		return ret;
7229 
7230 	/* disable irqs */
7231 	cik_disable_interrupts(rdev);
7232 
7233 	/* init rlc */
7234 	ret = cik_rlc_resume(rdev);
7235 	if (ret) {
7236 		r600_ih_ring_fini(rdev);
7237 		return ret;
7238 	}
7239 
7240 	/* setup interrupt control */
7241 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7242 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7243 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7244 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7245 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7246 	 */
7247 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7248 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7249 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7250 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7251 
7252 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7253 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7254 
7255 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7256 		      IH_WPTR_OVERFLOW_CLEAR |
7257 		      (rb_bufsz << 1));
7258 
7259 	if (rdev->wb.enabled)
7260 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7261 
7262 	/* set the writeback address whether it's enabled or not */
7263 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7264 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7265 
7266 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7267 
7268 	/* set rptr, wptr to 0 */
7269 	WREG32(IH_RB_RPTR, 0);
7270 	WREG32(IH_RB_WPTR, 0);
7271 
7272 	/* Default settings for IH_CNTL (disabled at first) */
7273 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7274 	/* RPTR_REARM only works if msi's are enabled */
7275 	if (rdev->msi_enabled)
7276 		ih_cntl |= RPTR_REARM;
7277 	WREG32(IH_CNTL, ih_cntl);
7278 
7279 	/* force the active interrupt state to all disabled */
7280 	cik_disable_interrupt_state(rdev);
7281 
7282 	pci_enable_busmaster(rdev->pdev->dev.bsddev);
7283 
7284 	/* enable irqs */
7285 	cik_enable_interrupts(rdev);
7286 
7287 	return ret;
7288 }
7289 
7290 /**
7291  * cik_irq_set - enable/disable interrupt sources
7292  *
7293  * @rdev: radeon_device pointer
7294  *
7295  * Enable interrupt sources on the GPU (vblanks, hpd,
7296  * etc.) (CIK).
7297  * Returns 0 for success, errors for failure.
7298  */
7299 int cik_irq_set(struct radeon_device *rdev)
7300 {
7301 	u32 cp_int_cntl;
7302 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7303 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7304 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7305 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7306 	u32 grbm_int_cntl = 0;
7307 	u32 dma_cntl, dma_cntl1;
7308 	u32 thermal_int;
7309 
7310 	if (!rdev->irq.installed) {
7311 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7312 		return -EINVAL;
7313 	}
7314 	/* don't enable anything if the ih is disabled */
7315 	if (!rdev->ih.enabled) {
7316 		cik_disable_interrupts(rdev);
7317 		/* force the active interrupt state to all disabled */
7318 		cik_disable_interrupt_state(rdev);
7319 		return 0;
7320 	}
7321 
7322 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7323 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7324 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7325 
7326 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7327 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7328 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7329 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7330 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7331 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7332 
7333 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7334 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7335 
7336 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7337 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7338 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7339 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7340 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7341 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7342 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7343 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7344 
7345 	if (rdev->flags & RADEON_IS_IGP)
7346 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7347 			~(THERM_INTH_MASK | THERM_INTL_MASK);
7348 	else
7349 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7350 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7351 
7352 	/* enable CP interrupts on all rings */
7353 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7354 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7355 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7356 	}
7357 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7358 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7359 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7360 		if (ring->me == 1) {
7361 			switch (ring->pipe) {
7362 			case 0:
7363 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7364 				break;
7365 			case 1:
7366 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7367 				break;
7368 			case 2:
7369 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7370 				break;
7371 			case 3:
7372 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7373 				break;
7374 			default:
7375 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7376 				break;
7377 			}
7378 		} else if (ring->me == 2) {
7379 			switch (ring->pipe) {
7380 			case 0:
7381 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7382 				break;
7383 			case 1:
7384 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7385 				break;
7386 			case 2:
7387 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7388 				break;
7389 			case 3:
7390 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7391 				break;
7392 			default:
7393 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7394 				break;
7395 			}
7396 		} else {
7397 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7398 		}
7399 	}
7400 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7401 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7402 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7403 		if (ring->me == 1) {
7404 			switch (ring->pipe) {
7405 			case 0:
7406 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7407 				break;
7408 			case 1:
7409 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7410 				break;
7411 			case 2:
7412 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7413 				break;
7414 			case 3:
7415 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7416 				break;
7417 			default:
7418 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7419 				break;
7420 			}
7421 		} else if (ring->me == 2) {
7422 			switch (ring->pipe) {
7423 			case 0:
7424 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7425 				break;
7426 			case 1:
7427 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7428 				break;
7429 			case 2:
7430 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7431 				break;
7432 			case 3:
7433 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7434 				break;
7435 			default:
7436 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7437 				break;
7438 			}
7439 		} else {
7440 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7441 		}
7442 	}
7443 
7444 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7445 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7446 		dma_cntl |= TRAP_ENABLE;
7447 	}
7448 
7449 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7450 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7451 		dma_cntl1 |= TRAP_ENABLE;
7452 	}
7453 
7454 	if (rdev->irq.crtc_vblank_int[0] ||
7455 	    atomic_read(&rdev->irq.pflip[0])) {
7456 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7457 		crtc1 |= VBLANK_INTERRUPT_MASK;
7458 	}
7459 	if (rdev->irq.crtc_vblank_int[1] ||
7460 	    atomic_read(&rdev->irq.pflip[1])) {
7461 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7462 		crtc2 |= VBLANK_INTERRUPT_MASK;
7463 	}
7464 	if (rdev->irq.crtc_vblank_int[2] ||
7465 	    atomic_read(&rdev->irq.pflip[2])) {
7466 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7467 		crtc3 |= VBLANK_INTERRUPT_MASK;
7468 	}
7469 	if (rdev->irq.crtc_vblank_int[3] ||
7470 	    atomic_read(&rdev->irq.pflip[3])) {
7471 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7472 		crtc4 |= VBLANK_INTERRUPT_MASK;
7473 	}
7474 	if (rdev->irq.crtc_vblank_int[4] ||
7475 	    atomic_read(&rdev->irq.pflip[4])) {
7476 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7477 		crtc5 |= VBLANK_INTERRUPT_MASK;
7478 	}
7479 	if (rdev->irq.crtc_vblank_int[5] ||
7480 	    atomic_read(&rdev->irq.pflip[5])) {
7481 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7482 		crtc6 |= VBLANK_INTERRUPT_MASK;
7483 	}
7484 	if (rdev->irq.hpd[0]) {
7485 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7486 		hpd1 |= DC_HPDx_INT_EN;
7487 	}
7488 	if (rdev->irq.hpd[1]) {
7489 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7490 		hpd2 |= DC_HPDx_INT_EN;
7491 	}
7492 	if (rdev->irq.hpd[2]) {
7493 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7494 		hpd3 |= DC_HPDx_INT_EN;
7495 	}
7496 	if (rdev->irq.hpd[3]) {
7497 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7498 		hpd4 |= DC_HPDx_INT_EN;
7499 	}
7500 	if (rdev->irq.hpd[4]) {
7501 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7502 		hpd5 |= DC_HPDx_INT_EN;
7503 	}
7504 	if (rdev->irq.hpd[5]) {
7505 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7506 		hpd6 |= DC_HPDx_INT_EN;
7507 	}
7508 
7509 	if (rdev->irq.dpm_thermal) {
7510 		DRM_DEBUG("dpm thermal\n");
7511 		if (rdev->flags & RADEON_IS_IGP)
7512 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7513 		else
7514 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7515 	}
7516 
7517 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7518 
7519 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7520 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7521 
7522 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7523 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7524 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7525 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7526 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7527 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7528 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7529 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7530 
7531 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7532 
7533 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7534 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7535 	if (rdev->num_crtc >= 4) {
7536 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7537 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7538 	}
7539 	if (rdev->num_crtc >= 6) {
7540 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7541 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7542 	}
7543 
7544 	if (rdev->num_crtc >= 2) {
7545 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7546 		       GRPH_PFLIP_INT_MASK);
7547 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7548 		       GRPH_PFLIP_INT_MASK);
7549 	}
7550 	if (rdev->num_crtc >= 4) {
7551 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7552 		       GRPH_PFLIP_INT_MASK);
7553 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7554 		       GRPH_PFLIP_INT_MASK);
7555 	}
7556 	if (rdev->num_crtc >= 6) {
7557 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7558 		       GRPH_PFLIP_INT_MASK);
7559 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7560 		       GRPH_PFLIP_INT_MASK);
7561 	}
7562 
7563 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7564 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7565 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7566 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7567 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7568 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7569 
7570 	if (rdev->flags & RADEON_IS_IGP)
7571 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7572 	else
7573 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7574 
7575 	return 0;
7576 }
7577 
7578 /**
7579  * cik_irq_ack - ack interrupt sources
7580  *
7581  * @rdev: radeon_device pointer
7582  *
7583  * Ack interrupt sources on the GPU (vblanks, hpd,
7584  * etc.) (CIK).  Certain interrupts sources are sw
7585  * generated and do not require an explicit ack.
7586  */
7587 static inline void cik_irq_ack(struct radeon_device *rdev)
7588 {
7589 	u32 tmp;
7590 
7591 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7592 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7593 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7594 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7595 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7596 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7597 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7598 
7599 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7600 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7601 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7602 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7603 	if (rdev->num_crtc >= 4) {
7604 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7605 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7606 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7607 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7608 	}
7609 	if (rdev->num_crtc >= 6) {
7610 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7611 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7612 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7613 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7614 	}
7615 
7616 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7617 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7618 		       GRPH_PFLIP_INT_CLEAR);
7619 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7620 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7621 		       GRPH_PFLIP_INT_CLEAR);
7622 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7623 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7624 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7625 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7626 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7627 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7628 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7629 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7630 
7631 	if (rdev->num_crtc >= 4) {
7632 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7633 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7634 			       GRPH_PFLIP_INT_CLEAR);
7635 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7636 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7637 			       GRPH_PFLIP_INT_CLEAR);
7638 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7639 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7640 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7641 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7642 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7643 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7644 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7645 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7646 	}
7647 
7648 	if (rdev->num_crtc >= 6) {
7649 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7650 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7651 			       GRPH_PFLIP_INT_CLEAR);
7652 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7653 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7654 			       GRPH_PFLIP_INT_CLEAR);
7655 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7656 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7657 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7658 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7659 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7660 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7661 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7662 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7663 	}
7664 
7665 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7666 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7667 		tmp |= DC_HPDx_INT_ACK;
7668 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7669 	}
7670 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7671 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7672 		tmp |= DC_HPDx_INT_ACK;
7673 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7674 	}
7675 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7676 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7677 		tmp |= DC_HPDx_INT_ACK;
7678 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7679 	}
7680 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7681 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7682 		tmp |= DC_HPDx_INT_ACK;
7683 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7684 	}
7685 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7686 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7687 		tmp |= DC_HPDx_INT_ACK;
7688 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7689 	}
7690 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7691 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7692 		tmp |= DC_HPDx_INT_ACK;
7693 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7694 	}
7695 }
7696 
7697 /**
7698  * cik_irq_disable - disable interrupts
7699  *
7700  * @rdev: radeon_device pointer
7701  *
7702  * Disable interrupts on the hw (CIK).
7703  */
7704 static void cik_irq_disable(struct radeon_device *rdev)
7705 {
7706 	cik_disable_interrupts(rdev);
7707 	/* Wait and acknowledge irq */
7708 	mdelay(1);
7709 	cik_irq_ack(rdev);
7710 	cik_disable_interrupt_state(rdev);
7711 }
7712 
7713 /**
7714  * cik_irq_disable - disable interrupts for suspend
7715  *
7716  * @rdev: radeon_device pointer
7717  *
7718  * Disable interrupts and stop the RLC (CIK).
7719  * Used for suspend.
7720  */
7721 static void cik_irq_suspend(struct radeon_device *rdev)
7722 {
7723 	cik_irq_disable(rdev);
7724 	cik_rlc_stop(rdev);
7725 }
7726 
7727 /**
7728  * cik_irq_fini - tear down interrupt support
7729  *
7730  * @rdev: radeon_device pointer
7731  *
7732  * Disable interrupts on the hw and free the IH ring
7733  * buffer (CIK).
7734  * Used for driver unload.
7735  */
7736 static void cik_irq_fini(struct radeon_device *rdev)
7737 {
7738 	cik_irq_suspend(rdev);
7739 	r600_ih_ring_fini(rdev);
7740 }
7741 
7742 /**
7743  * cik_get_ih_wptr - get the IH ring buffer wptr
7744  *
7745  * @rdev: radeon_device pointer
7746  *
7747  * Get the IH ring buffer wptr from either the register
7748  * or the writeback memory buffer (CIK).  Also check for
7749  * ring buffer overflow and deal with it.
7750  * Used by cik_irq_process().
7751  * Returns the value of the wptr.
7752  */
7753 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7754 {
7755 	u32 wptr, tmp;
7756 
7757 	if (rdev->wb.enabled)
7758 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7759 	else
7760 		wptr = RREG32(IH_RB_WPTR);
7761 
7762 	if (wptr & RB_OVERFLOW) {
7763 		wptr &= ~RB_OVERFLOW;
7764 		/* When a ring buffer overflow happen start parsing interrupt
7765 		 * from the last not overwritten vector (wptr + 16). Hopefully
7766 		 * this should allow us to catchup.
7767 		 */
7768 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7769 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7770 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7771 		tmp = RREG32(IH_RB_CNTL);
7772 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7773 		WREG32(IH_RB_CNTL, tmp);
7774 	}
7775 	return (wptr & rdev->ih.ptr_mask);
7776 }
7777 
7778 /*        CIK IV Ring
7779  * Each IV ring entry is 128 bits:
7780  * [7:0]    - interrupt source id
7781  * [31:8]   - reserved
7782  * [59:32]  - interrupt source data
7783  * [63:60]  - reserved
7784  * [71:64]  - RINGID
7785  *            CP:
7786  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7787  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7788  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7789  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7790  *            PIPE_ID - ME0 0=3D
7791  *                    - ME1&2 compute dispatcher (4 pipes each)
7792  *            SDMA:
7793  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7794  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7795  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7796  * [79:72]  - VMID
7797  * [95:80]  - PASID
7798  * [127:96] - reserved
7799  */
7800 /**
7801  * cik_irq_process - interrupt handler
7802  *
7803  * @rdev: radeon_device pointer
7804  *
7805  * Interrupt hander (CIK).  Walk the IH ring,
7806  * ack interrupts and schedule work to handle
7807  * interrupt events.
7808  * Returns irq process return code.
7809  */
7810 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7811 {
7812 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7813 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7814 	u32 wptr;
7815 	u32 rptr;
7816 	u32 src_id, src_data, ring_id;
7817 	u8 me_id, pipe_id, queue_id;
7818 	u32 ring_index;
7819 	bool queue_hotplug = false;
7820 	bool queue_reset = false;
7821 	u32 addr, status, mc_client;
7822 	bool queue_thermal = false;
7823 
7824 	if (!rdev->ih.enabled || rdev->shutdown)
7825 		return IRQ_NONE;
7826 
7827 	wptr = cik_get_ih_wptr(rdev);
7828 
7829 restart_ih:
7830 	/* is somebody else already processing irqs? */
7831 	if (atomic_xchg(&rdev->ih.lock, 1))
7832 		return IRQ_NONE;
7833 
7834 	rptr = rdev->ih.rptr;
7835 	DRM_DEBUG_VBLANK("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7836 
7837 	/* Order reading of wptr vs. reading of IH ring data */
7838 	rmb();
7839 
7840 	/* display interrupts */
7841 	cik_irq_ack(rdev);
7842 
7843 	while (rptr != wptr) {
7844 		/* wptr/rptr are in bytes! */
7845 		ring_index = rptr / 4;
7846 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7847 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7848 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7849 
7850 		switch (src_id) {
7851 		case 1: /* D1 vblank/vline */
7852 			switch (src_data) {
7853 			case 0: /* D1 vblank */
7854 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7855 					if (rdev->irq.crtc_vblank_int[0]) {
7856 						drm_handle_vblank(rdev->ddev, 0);
7857 						rdev->pm.vblank_sync = true;
7858 						wake_up(&rdev->irq.vblank_queue);
7859 					}
7860 					if (atomic_read(&rdev->irq.pflip[0]))
7861 						radeon_crtc_handle_vblank(rdev, 0);
7862 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7863 					DRM_DEBUG_VBLANK("IH: D1 vblank\n");
7864 				}
7865 				break;
7866 			case 1: /* D1 vline */
7867 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7868 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7869 					DRM_DEBUG_VBLANK("IH: D1 vline\n");
7870 				}
7871 				break;
7872 			default:
7873 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7874 				break;
7875 			}
7876 			break;
7877 		case 2: /* D2 vblank/vline */
7878 			switch (src_data) {
7879 			case 0: /* D2 vblank */
7880 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7881 					if (rdev->irq.crtc_vblank_int[1]) {
7882 						drm_handle_vblank(rdev->ddev, 1);
7883 						rdev->pm.vblank_sync = true;
7884 						wake_up(&rdev->irq.vblank_queue);
7885 					}
7886 					if (atomic_read(&rdev->irq.pflip[1]))
7887 						radeon_crtc_handle_vblank(rdev, 1);
7888 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7889 					DRM_DEBUG_VBLANK("IH: D2 vblank\n");
7890 				}
7891 				break;
7892 			case 1: /* D2 vline */
7893 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7894 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7895 					DRM_DEBUG_VBLANK("IH: D2 vline\n");
7896 				}
7897 				break;
7898 			default:
7899 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7900 				break;
7901 			}
7902 			break;
7903 		case 3: /* D3 vblank/vline */
7904 			switch (src_data) {
7905 			case 0: /* D3 vblank */
7906 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7907 					if (rdev->irq.crtc_vblank_int[2]) {
7908 						drm_handle_vblank(rdev->ddev, 2);
7909 						rdev->pm.vblank_sync = true;
7910 						wake_up(&rdev->irq.vblank_queue);
7911 					}
7912 					if (atomic_read(&rdev->irq.pflip[2]))
7913 						radeon_crtc_handle_vblank(rdev, 2);
7914 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7915 					DRM_DEBUG_VBLANK("IH: D3 vblank\n");
7916 				}
7917 				break;
7918 			case 1: /* D3 vline */
7919 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7920 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7921 					DRM_DEBUG_VBLANK("IH: D3 vline\n");
7922 				}
7923 				break;
7924 			default:
7925 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7926 				break;
7927 			}
7928 			break;
7929 		case 4: /* D4 vblank/vline */
7930 			switch (src_data) {
7931 			case 0: /* D4 vblank */
7932 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7933 					if (rdev->irq.crtc_vblank_int[3]) {
7934 						drm_handle_vblank(rdev->ddev, 3);
7935 						rdev->pm.vblank_sync = true;
7936 						wake_up(&rdev->irq.vblank_queue);
7937 					}
7938 					if (atomic_read(&rdev->irq.pflip[3]))
7939 						radeon_crtc_handle_vblank(rdev, 3);
7940 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7941 					DRM_DEBUG_VBLANK("IH: D4 vblank\n");
7942 				}
7943 				break;
7944 			case 1: /* D4 vline */
7945 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7946 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7947 					DRM_DEBUG_VBLANK("IH: D4 vline\n");
7948 				}
7949 				break;
7950 			default:
7951 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7952 				break;
7953 			}
7954 			break;
7955 		case 5: /* D5 vblank/vline */
7956 			switch (src_data) {
7957 			case 0: /* D5 vblank */
7958 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7959 					if (rdev->irq.crtc_vblank_int[4]) {
7960 						drm_handle_vblank(rdev->ddev, 4);
7961 						rdev->pm.vblank_sync = true;
7962 						wake_up(&rdev->irq.vblank_queue);
7963 					}
7964 					if (atomic_read(&rdev->irq.pflip[4]))
7965 						radeon_crtc_handle_vblank(rdev, 4);
7966 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7967 					DRM_DEBUG_VBLANK("IH: D5 vblank\n");
7968 				}
7969 				break;
7970 			case 1: /* D5 vline */
7971 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7972 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7973 					DRM_DEBUG_VBLANK("IH: D5 vline\n");
7974 				}
7975 				break;
7976 			default:
7977 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7978 				break;
7979 			}
7980 			break;
7981 		case 6: /* D6 vblank/vline */
7982 			switch (src_data) {
7983 			case 0: /* D6 vblank */
7984 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7985 					if (rdev->irq.crtc_vblank_int[5]) {
7986 						drm_handle_vblank(rdev->ddev, 5);
7987 						rdev->pm.vblank_sync = true;
7988 						wake_up(&rdev->irq.vblank_queue);
7989 					}
7990 					if (atomic_read(&rdev->irq.pflip[5]))
7991 						radeon_crtc_handle_vblank(rdev, 5);
7992 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7993 					DRM_DEBUG_VBLANK("IH: D6 vblank\n");
7994 				}
7995 				break;
7996 			case 1: /* D6 vline */
7997 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7998 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7999 					DRM_DEBUG_VBLANK("IH: D6 vline\n");
8000 				}
8001 				break;
8002 			default:
8003 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8004 				break;
8005 			}
8006 			break;
8007 		case 8: /* D1 page flip */
8008 		case 10: /* D2 page flip */
8009 		case 12: /* D3 page flip */
8010 		case 14: /* D4 page flip */
8011 		case 16: /* D5 page flip */
8012 		case 18: /* D6 page flip */
8013 			DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8014 			if (radeon_use_pflipirq > 0)
8015 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8016 			break;
8017 		case 42: /* HPD hotplug */
8018 			switch (src_data) {
8019 			case 0:
8020 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8021 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8022 					queue_hotplug = true;
8023 					DRM_DEBUG("IH: HPD1\n");
8024 				}
8025 				break;
8026 			case 1:
8027 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8028 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8029 					queue_hotplug = true;
8030 					DRM_DEBUG("IH: HPD2\n");
8031 				}
8032 				break;
8033 			case 2:
8034 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8035 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8036 					queue_hotplug = true;
8037 					DRM_DEBUG("IH: HPD3\n");
8038 				}
8039 				break;
8040 			case 3:
8041 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8042 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8043 					queue_hotplug = true;
8044 					DRM_DEBUG("IH: HPD4\n");
8045 				}
8046 				break;
8047 			case 4:
8048 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8049 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8050 					queue_hotplug = true;
8051 					DRM_DEBUG("IH: HPD5\n");
8052 				}
8053 				break;
8054 			case 5:
8055 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8056 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8057 					queue_hotplug = true;
8058 					DRM_DEBUG("IH: HPD6\n");
8059 				}
8060 				break;
8061 			default:
8062 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8063 				break;
8064 			}
8065 			break;
8066 		case 124: /* UVD */
8067 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8068 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8069 			break;
8070 		case 146:
8071 		case 147:
8072 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8073 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8074 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8075 			/* reset addr and status */
8076 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8077 			if (addr == 0x0 && status == 0x0)
8078 				break;
8079 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8080 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8081 				addr);
8082 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8083 				status);
8084 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8085 			break;
8086 		case 167: /* VCE */
8087 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8088 			switch (src_data) {
8089 			case 0:
8090 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8091 				break;
8092 			case 1:
8093 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8094 				break;
8095 			default:
8096 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8097 				break;
8098 			}
8099 			break;
8100 		case 176: /* GFX RB CP_INT */
8101 		case 177: /* GFX IB CP_INT */
8102 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8103 			break;
8104 		case 181: /* CP EOP event */
8105 			DRM_DEBUG("IH: CP EOP\n");
8106 			/* XXX check the bitfield order! */
8107 			me_id = (ring_id & 0x60) >> 5;
8108 			pipe_id = (ring_id & 0x18) >> 3;
8109 			queue_id = (ring_id & 0x7) >> 0;
8110 			switch (me_id) {
8111 			case 0:
8112 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8113 				break;
8114 			case 1:
8115 			case 2:
8116 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8117 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8118 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8119 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8120 				break;
8121 			}
8122 			break;
8123 		case 184: /* CP Privileged reg access */
8124 			DRM_ERROR("Illegal register access in command stream\n");
8125 			/* XXX check the bitfield order! */
8126 			me_id = (ring_id & 0x60) >> 5;
8127 			pipe_id = (ring_id & 0x18) >> 3;
8128 			queue_id = (ring_id & 0x7) >> 0;
8129 			switch (me_id) {
8130 			case 0:
8131 				/* This results in a full GPU reset, but all we need to do is soft
8132 				 * reset the CP for gfx
8133 				 */
8134 				queue_reset = true;
8135 				break;
8136 			case 1:
8137 				/* XXX compute */
8138 				queue_reset = true;
8139 				break;
8140 			case 2:
8141 				/* XXX compute */
8142 				queue_reset = true;
8143 				break;
8144 			}
8145 			break;
8146 		case 185: /* CP Privileged inst */
8147 			DRM_ERROR("Illegal instruction in command stream\n");
8148 			/* XXX check the bitfield order! */
8149 			me_id = (ring_id & 0x60) >> 5;
8150 			pipe_id = (ring_id & 0x18) >> 3;
8151 			queue_id = (ring_id & 0x7) >> 0;
8152 			switch (me_id) {
8153 			case 0:
8154 				/* This results in a full GPU reset, but all we need to do is soft
8155 				 * reset the CP for gfx
8156 				 */
8157 				queue_reset = true;
8158 				break;
8159 			case 1:
8160 				/* XXX compute */
8161 				queue_reset = true;
8162 				break;
8163 			case 2:
8164 				/* XXX compute */
8165 				queue_reset = true;
8166 				break;
8167 			}
8168 			break;
8169 		case 224: /* SDMA trap event */
8170 			/* XXX check the bitfield order! */
8171 			me_id = (ring_id & 0x3) >> 0;
8172 			queue_id = (ring_id & 0xc) >> 2;
8173 			DRM_DEBUG("IH: SDMA trap\n");
8174 			switch (me_id) {
8175 			case 0:
8176 				switch (queue_id) {
8177 				case 0:
8178 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8179 					break;
8180 				case 1:
8181 					/* XXX compute */
8182 					break;
8183 				case 2:
8184 					/* XXX compute */
8185 					break;
8186 				}
8187 				break;
8188 			case 1:
8189 				switch (queue_id) {
8190 				case 0:
8191 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8192 					break;
8193 				case 1:
8194 					/* XXX compute */
8195 					break;
8196 				case 2:
8197 					/* XXX compute */
8198 					break;
8199 				}
8200 				break;
8201 			}
8202 			break;
8203 		case 230: /* thermal low to high */
8204 			DRM_DEBUG("IH: thermal low to high\n");
8205 			rdev->pm.dpm.thermal.high_to_low = false;
8206 			queue_thermal = true;
8207 			break;
8208 		case 231: /* thermal high to low */
8209 			DRM_DEBUG("IH: thermal high to low\n");
8210 			rdev->pm.dpm.thermal.high_to_low = true;
8211 			queue_thermal = true;
8212 			break;
8213 		case 233: /* GUI IDLE */
8214 			DRM_DEBUG("IH: GUI idle\n");
8215 			break;
8216 		case 241: /* SDMA Privileged inst */
8217 		case 247: /* SDMA Privileged inst */
8218 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8219 			/* XXX check the bitfield order! */
8220 			me_id = (ring_id & 0x3) >> 0;
8221 			queue_id = (ring_id & 0xc) >> 2;
8222 			switch (me_id) {
8223 			case 0:
8224 				switch (queue_id) {
8225 				case 0:
8226 					queue_reset = true;
8227 					break;
8228 				case 1:
8229 					/* XXX compute */
8230 					queue_reset = true;
8231 					break;
8232 				case 2:
8233 					/* XXX compute */
8234 					queue_reset = true;
8235 					break;
8236 				}
8237 				break;
8238 			case 1:
8239 				switch (queue_id) {
8240 				case 0:
8241 					queue_reset = true;
8242 					break;
8243 				case 1:
8244 					/* XXX compute */
8245 					queue_reset = true;
8246 					break;
8247 				case 2:
8248 					/* XXX compute */
8249 					queue_reset = true;
8250 					break;
8251 				}
8252 				break;
8253 			}
8254 			break;
8255 		default:
8256 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8257 			break;
8258 		}
8259 
8260 		/* wptr/rptr are in bytes! */
8261 		rptr += 16;
8262 		rptr &= rdev->ih.ptr_mask;
8263 		WREG32(IH_RB_RPTR, rptr);
8264 	}
8265 	if (queue_hotplug)
8266 		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
8267 	if (queue_reset) {
8268 		rdev->needs_reset = true;
8269 		wake_up_all(&rdev->fence_queue);
8270 	}
8271 	if (queue_thermal)
8272 		taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
8273 	rdev->ih.rptr = rptr;
8274 	atomic_set(&rdev->ih.lock, 0);
8275 
8276 	/* make sure wptr hasn't changed while processing */
8277 	wptr = cik_get_ih_wptr(rdev);
8278 	if (wptr != rptr)
8279 		goto restart_ih;
8280 
8281 	return IRQ_HANDLED;
8282 }
8283 
8284 /*
8285  * startup/shutdown callbacks
8286  */
8287 /**
8288  * cik_startup - program the asic to a functional state
8289  *
8290  * @rdev: radeon_device pointer
8291  *
8292  * Programs the asic to a functional state (CIK).
8293  * Called by cik_init() and cik_resume().
8294  * Returns 0 for success, error for failure.
8295  */
8296 static int cik_startup(struct radeon_device *rdev)
8297 {
8298 	struct radeon_ring *ring;
8299 	u32 nop;
8300 	int r;
8301 
8302 	/* enable pcie gen2/3 link */
8303 	cik_pcie_gen3_enable(rdev);
8304 	/* enable aspm */
8305 	cik_program_aspm(rdev);
8306 
8307 	/* scratch needs to be initialized before MC */
8308 	r = r600_vram_scratch_init(rdev);
8309 	if (r)
8310 		return r;
8311 
8312 	cik_mc_program(rdev);
8313 
8314 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8315 		r = ci_mc_load_microcode(rdev);
8316 		if (r) {
8317 			DRM_ERROR("Failed to load MC firmware!\n");
8318 			return r;
8319 		}
8320 	}
8321 
8322 	r = cik_pcie_gart_enable(rdev);
8323 	if (r)
8324 		return r;
8325 	cik_gpu_init(rdev);
8326 
8327 	/* allocate rlc buffers */
8328 	if (rdev->flags & RADEON_IS_IGP) {
8329 		if (rdev->family == CHIP_KAVERI) {
8330 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8331 			rdev->rlc.reg_list_size =
8332 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8333 		} else {
8334 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8335 			rdev->rlc.reg_list_size =
8336 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8337 		}
8338 	}
8339 	rdev->rlc.cs_data = ci_cs_data;
8340 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8341 	r = sumo_rlc_init(rdev);
8342 	if (r) {
8343 		DRM_ERROR("Failed to init rlc BOs!\n");
8344 		return r;
8345 	}
8346 
8347 	/* allocate wb buffer */
8348 	r = radeon_wb_init(rdev);
8349 	if (r)
8350 		return r;
8351 
8352 	/* allocate mec buffers */
8353 	r = cik_mec_init(rdev);
8354 	if (r) {
8355 		DRM_ERROR("Failed to init MEC BOs!\n");
8356 		return r;
8357 	}
8358 
8359 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8360 	if (r) {
8361 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8362 		return r;
8363 	}
8364 
8365 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8366 	if (r) {
8367 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8368 		return r;
8369 	}
8370 
8371 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8372 	if (r) {
8373 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8374 		return r;
8375 	}
8376 
8377 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8378 	if (r) {
8379 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8380 		return r;
8381 	}
8382 
8383 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8384 	if (r) {
8385 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8386 		return r;
8387 	}
8388 
8389 	r = radeon_uvd_resume(rdev);
8390 	if (!r) {
8391 		r = uvd_v4_2_resume(rdev);
8392 		if (!r) {
8393 			r = radeon_fence_driver_start_ring(rdev,
8394 							   R600_RING_TYPE_UVD_INDEX);
8395 			if (r)
8396 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8397 		}
8398 	}
8399 	if (r)
8400 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8401 
8402 	r = radeon_vce_resume(rdev);
8403 	if (!r) {
8404 		r = vce_v2_0_resume(rdev);
8405 		if (!r)
8406 			r = radeon_fence_driver_start_ring(rdev,
8407 							   TN_RING_TYPE_VCE1_INDEX);
8408 		if (!r)
8409 			r = radeon_fence_driver_start_ring(rdev,
8410 							   TN_RING_TYPE_VCE2_INDEX);
8411 	}
8412 	if (r) {
8413 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8414 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8415 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8416 	}
8417 
8418 	/* Enable IRQ */
8419 	if (!rdev->irq.installed) {
8420 		r = radeon_irq_kms_init(rdev);
8421 		if (r)
8422 			return r;
8423 	}
8424 
8425 	r = cik_irq_init(rdev);
8426 	if (r) {
8427 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8428 		radeon_irq_kms_fini(rdev);
8429 		return r;
8430 	}
8431 	cik_irq_set(rdev);
8432 
8433 	if (rdev->family == CHIP_HAWAII) {
8434 		if (rdev->new_fw)
8435 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8436 		else
8437 			nop = RADEON_CP_PACKET2;
8438 	} else {
8439 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8440 	}
8441 
8442 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8443 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8444 			     nop);
8445 	if (r)
8446 		return r;
8447 
8448 	/* set up the compute queues */
8449 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8450 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8451 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8452 			     nop);
8453 	if (r)
8454 		return r;
8455 	ring->me = 1; /* first MEC */
8456 	ring->pipe = 0; /* first pipe */
8457 	ring->queue = 0; /* first queue */
8458 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8459 
8460 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8461 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8462 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8463 			     nop);
8464 	if (r)
8465 		return r;
8466 	/* dGPU only have 1 MEC */
8467 	ring->me = 1; /* first MEC */
8468 	ring->pipe = 0; /* first pipe */
8469 	ring->queue = 1; /* second queue */
8470 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8471 
8472 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8473 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8474 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8475 	if (r)
8476 		return r;
8477 
8478 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8479 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8480 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8481 	if (r)
8482 		return r;
8483 
8484 	r = cik_cp_resume(rdev);
8485 	if (r)
8486 		return r;
8487 
8488 	r = cik_sdma_resume(rdev);
8489 	if (r)
8490 		return r;
8491 
8492 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8493 	if (ring->ring_size) {
8494 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8495 				     RADEON_CP_PACKET2);
8496 		if (!r)
8497 			r = uvd_v1_0_init(rdev);
8498 		if (r)
8499 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8500 	}
8501 
8502 	r = -ENOENT;
8503 
8504 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8505 	if (ring->ring_size)
8506 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8507 				     VCE_CMD_NO_OP);
8508 
8509 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8510 	if (ring->ring_size)
8511 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8512 				     VCE_CMD_NO_OP);
8513 
8514 	if (!r)
8515 		r = vce_v1_0_init(rdev);
8516 	else if (r != -ENOENT)
8517 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8518 
8519 	r = radeon_ib_pool_init(rdev);
8520 	if (r) {
8521 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8522 		return r;
8523 	}
8524 
8525 	r = radeon_vm_manager_init(rdev);
8526 	if (r) {
8527 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8528 		return r;
8529 	}
8530 
8531 	r = dce6_audio_init(rdev);
8532 	if (r)
8533 		return r;
8534 
8535 	return 0;
8536 }
8537 
8538 /**
8539  * cik_resume - resume the asic to a functional state
8540  *
8541  * @rdev: radeon_device pointer
8542  *
8543  * Programs the asic to a functional state (CIK).
8544  * Called at resume.
8545  * Returns 0 for success, error for failure.
8546  */
8547 int cik_resume(struct radeon_device *rdev)
8548 {
8549 	int r;
8550 
8551 	/* post card */
8552 	atom_asic_init(rdev->mode_info.atom_context);
8553 
8554 	/* init golden registers */
8555 	cik_init_golden_registers(rdev);
8556 
8557 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8558 		radeon_pm_resume(rdev);
8559 
8560 	rdev->accel_working = true;
8561 	r = cik_startup(rdev);
8562 	if (r) {
8563 		DRM_ERROR("cik startup failed on resume\n");
8564 		rdev->accel_working = false;
8565 		return r;
8566 	}
8567 
8568 	return r;
8569 
8570 }
8571 
8572 /**
8573  * cik_suspend - suspend the asic
8574  *
8575  * @rdev: radeon_device pointer
8576  *
8577  * Bring the chip into a state suitable for suspend (CIK).
8578  * Called at suspend.
8579  * Returns 0 for success.
8580  */
8581 int cik_suspend(struct radeon_device *rdev)
8582 {
8583 	radeon_pm_suspend(rdev);
8584 	dce6_audio_fini(rdev);
8585 	radeon_vm_manager_fini(rdev);
8586 	cik_cp_enable(rdev, false);
8587 	cik_sdma_enable(rdev, false);
8588 	uvd_v1_0_fini(rdev);
8589 	radeon_uvd_suspend(rdev);
8590 	radeon_vce_suspend(rdev);
8591 	cik_fini_pg(rdev);
8592 	cik_fini_cg(rdev);
8593 	cik_irq_suspend(rdev);
8594 	radeon_wb_disable(rdev);
8595 	cik_pcie_gart_disable(rdev);
8596 	return 0;
8597 }
8598 
8599 /* Plan is to move initialization in that function and use
8600  * helper function so that radeon_device_init pretty much
8601  * do nothing more than calling asic specific function. This
8602  * should also allow to remove a bunch of callback function
8603  * like vram_info.
8604  */
8605 /**
8606  * cik_init - asic specific driver and hw init
8607  *
8608  * @rdev: radeon_device pointer
8609  *
8610  * Setup asic specific driver variables and program the hw
8611  * to a functional state (CIK).
8612  * Called at driver startup.
8613  * Returns 0 for success, errors for failure.
8614  */
8615 int cik_init(struct radeon_device *rdev)
8616 {
8617 	struct radeon_ring *ring;
8618 	int r;
8619 
8620 	/* Read BIOS */
8621 	if (!radeon_get_bios(rdev)) {
8622 		if (ASIC_IS_AVIVO(rdev))
8623 			return -EINVAL;
8624 	}
8625 	/* Must be an ATOMBIOS */
8626 	if (!rdev->is_atom_bios) {
8627 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8628 		return -EINVAL;
8629 	}
8630 	r = radeon_atombios_init(rdev);
8631 	if (r)
8632 		return r;
8633 
8634 	/* Post card if necessary */
8635 	if (!radeon_card_posted(rdev)) {
8636 		if (!rdev->bios) {
8637 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8638 			return -EINVAL;
8639 		}
8640 		DRM_INFO("GPU not posted. posting now...\n");
8641 		atom_asic_init(rdev->mode_info.atom_context);
8642 	}
8643 	/* init golden registers */
8644 	cik_init_golden_registers(rdev);
8645 	/* Initialize scratch registers */
8646 	cik_scratch_init(rdev);
8647 	/* Initialize surface registers */
8648 	radeon_surface_init(rdev);
8649 	/* Initialize clocks */
8650 	radeon_get_clock_info(rdev->ddev);
8651 
8652 	/* Fence driver */
8653 	r = radeon_fence_driver_init(rdev);
8654 	if (r)
8655 		return r;
8656 
8657 	/* initialize memory controller */
8658 	r = cik_mc_init(rdev);
8659 	if (r)
8660 		return r;
8661 	/* Memory manager */
8662 	r = radeon_bo_init(rdev);
8663 	if (r)
8664 		return r;
8665 
8666 	if (rdev->flags & RADEON_IS_IGP) {
8667 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8668 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8669 			r = cik_init_microcode(rdev);
8670 			if (r) {
8671 				DRM_ERROR("Failed to load firmware!\n");
8672 				return r;
8673 			}
8674 		}
8675 	} else {
8676 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8677 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8678 		    !rdev->mc_fw) {
8679 			r = cik_init_microcode(rdev);
8680 			if (r) {
8681 				DRM_ERROR("Failed to load firmware!\n");
8682 				return r;
8683 			}
8684 		}
8685 	}
8686 
8687 	/* Initialize power management */
8688 	radeon_pm_init(rdev);
8689 
8690 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8691 	ring->ring_obj = NULL;
8692 	r600_ring_init(rdev, ring, 1024 * 1024);
8693 
8694 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8695 	ring->ring_obj = NULL;
8696 	r600_ring_init(rdev, ring, 1024 * 1024);
8697 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8698 	if (r)
8699 		return r;
8700 
8701 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8702 	ring->ring_obj = NULL;
8703 	r600_ring_init(rdev, ring, 1024 * 1024);
8704 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8705 	if (r)
8706 		return r;
8707 
8708 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8709 	ring->ring_obj = NULL;
8710 	r600_ring_init(rdev, ring, 256 * 1024);
8711 
8712 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8713 	ring->ring_obj = NULL;
8714 	r600_ring_init(rdev, ring, 256 * 1024);
8715 
8716 	r = radeon_uvd_init(rdev);
8717 	if (!r) {
8718 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8719 		ring->ring_obj = NULL;
8720 		r600_ring_init(rdev, ring, 4096);
8721 	}
8722 
8723 	r = radeon_vce_init(rdev);
8724 	if (!r) {
8725 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8726 		ring->ring_obj = NULL;
8727 		r600_ring_init(rdev, ring, 4096);
8728 
8729 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8730 		ring->ring_obj = NULL;
8731 		r600_ring_init(rdev, ring, 4096);
8732 	}
8733 
8734 	rdev->ih.ring_obj = NULL;
8735 	r600_ih_ring_init(rdev, 64 * 1024);
8736 
8737 	r = r600_pcie_gart_init(rdev);
8738 	if (r)
8739 		return r;
8740 
8741 	rdev->accel_working = true;
8742 	r = cik_startup(rdev);
8743 	if (r) {
8744 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8745 		cik_cp_fini(rdev);
8746 		cik_sdma_fini(rdev);
8747 		cik_irq_fini(rdev);
8748 		sumo_rlc_fini(rdev);
8749 		cik_mec_fini(rdev);
8750 		radeon_wb_fini(rdev);
8751 		radeon_ib_pool_fini(rdev);
8752 		radeon_vm_manager_fini(rdev);
8753 		radeon_irq_kms_fini(rdev);
8754 		cik_pcie_gart_fini(rdev);
8755 		rdev->accel_working = false;
8756 	}
8757 
8758 	/* Don't start up if the MC ucode is missing.
8759 	 * The default clocks and voltages before the MC ucode
8760 	 * is loaded are not suffient for advanced operations.
8761 	 */
8762 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8763 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8764 		return -EINVAL;
8765 	}
8766 
8767 	return 0;
8768 }
8769 
8770 /**
8771  * cik_fini - asic specific driver and hw fini
8772  *
8773  * @rdev: radeon_device pointer
8774  *
8775  * Tear down the asic specific driver variables and program the hw
8776  * to an idle state (CIK).
8777  * Called at driver unload.
8778  */
8779 void cik_fini(struct radeon_device *rdev)
8780 {
8781 	radeon_pm_fini(rdev);
8782 	cik_cp_fini(rdev);
8783 	cik_sdma_fini(rdev);
8784 	cik_fini_pg(rdev);
8785 	cik_fini_cg(rdev);
8786 	cik_irq_fini(rdev);
8787 	sumo_rlc_fini(rdev);
8788 	cik_mec_fini(rdev);
8789 	radeon_wb_fini(rdev);
8790 	radeon_vm_manager_fini(rdev);
8791 	radeon_ib_pool_fini(rdev);
8792 	radeon_irq_kms_fini(rdev);
8793 	uvd_v1_0_fini(rdev);
8794 	radeon_uvd_fini(rdev);
8795 	radeon_vce_fini(rdev);
8796 	cik_pcie_gart_fini(rdev);
8797 	r600_vram_scratch_fini(rdev);
8798 	radeon_gem_fini(rdev);
8799 	radeon_fence_driver_fini(rdev);
8800 	radeon_bo_fini(rdev);
8801 	radeon_atombios_fini(rdev);
8802 	cik_fini_microcode(rdev);
8803 	kfree(rdev->bios);
8804 	rdev->bios = NULL;
8805 }
8806 
8807 void dce8_program_fmt(struct drm_encoder *encoder)
8808 {
8809 	struct drm_device *dev = encoder->dev;
8810 	struct radeon_device *rdev = dev->dev_private;
8811 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8812 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8813 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8814 	int bpc = 0;
8815 	u32 tmp = 0;
8816 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8817 
8818 	if (connector) {
8819 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8820 		bpc = radeon_get_monitor_bpc(connector);
8821 		dither = radeon_connector->dither;
8822 	}
8823 
8824 	/* LVDS/eDP FMT is set up by atom */
8825 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8826 		return;
8827 
8828 	/* not needed for analog */
8829 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8830 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8831 		return;
8832 
8833 	if (bpc == 0)
8834 		return;
8835 
8836 	switch (bpc) {
8837 	case 6:
8838 		if (dither == RADEON_FMT_DITHER_ENABLE)
8839 			/* XXX sort out optimal dither settings */
8840 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8841 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8842 		else
8843 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8844 		break;
8845 	case 8:
8846 		if (dither == RADEON_FMT_DITHER_ENABLE)
8847 			/* XXX sort out optimal dither settings */
8848 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8849 				FMT_RGB_RANDOM_ENABLE |
8850 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8851 		else
8852 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8853 		break;
8854 	case 10:
8855 		if (dither == RADEON_FMT_DITHER_ENABLE)
8856 			/* XXX sort out optimal dither settings */
8857 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8858 				FMT_RGB_RANDOM_ENABLE |
8859 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8860 		else
8861 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8862 		break;
8863 	default:
8864 		/* not needed */
8865 		break;
8866 	}
8867 
8868 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8869 }
8870 
8871 /* display watermark setup */
8872 /**
8873  * dce8_line_buffer_adjust - Set up the line buffer
8874  *
8875  * @rdev: radeon_device pointer
8876  * @radeon_crtc: the selected display controller
8877  * @mode: the current display mode on the selected display
8878  * controller
8879  *
8880  * Setup up the line buffer allocation for
8881  * the selected display controller (CIK).
8882  * Returns the line buffer size in pixels.
8883  */
8884 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8885 				   struct radeon_crtc *radeon_crtc,
8886 				   struct drm_display_mode *mode)
8887 {
8888 	u32 tmp, buffer_alloc, i;
8889 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8890 	/*
8891 	 * Line Buffer Setup
8892 	 * There are 6 line buffers, one for each display controllers.
8893 	 * There are 3 partitions per LB. Select the number of partitions
8894 	 * to enable based on the display width.  For display widths larger
8895 	 * than 4096, you need use to use 2 display controllers and combine
8896 	 * them using the stereo blender.
8897 	 */
8898 	if (radeon_crtc->base.enabled && mode) {
8899 		if (mode->crtc_hdisplay < 1920) {
8900 			tmp = 1;
8901 			buffer_alloc = 2;
8902 		} else if (mode->crtc_hdisplay < 2560) {
8903 			tmp = 2;
8904 			buffer_alloc = 2;
8905 		} else if (mode->crtc_hdisplay < 4096) {
8906 			tmp = 0;
8907 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8908 		} else {
8909 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8910 			tmp = 0;
8911 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8912 		}
8913 	} else {
8914 		tmp = 1;
8915 		buffer_alloc = 0;
8916 	}
8917 
8918 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8919 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8920 
8921 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8922 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8923 	for (i = 0; i < rdev->usec_timeout; i++) {
8924 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8925 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8926 			break;
8927 		udelay(1);
8928 	}
8929 
8930 	if (radeon_crtc->base.enabled && mode) {
8931 		switch (tmp) {
8932 		case 0:
8933 		default:
8934 			return 4096 * 2;
8935 		case 1:
8936 			return 1920 * 2;
8937 		case 2:
8938 			return 2560 * 2;
8939 		}
8940 	}
8941 
8942 	/* controller not enabled, so no lb used */
8943 	return 0;
8944 }
8945 
8946 /**
8947  * cik_get_number_of_dram_channels - get the number of dram channels
8948  *
8949  * @rdev: radeon_device pointer
8950  *
8951  * Look up the number of video ram channels (CIK).
8952  * Used for display watermark bandwidth calculations
8953  * Returns the number of dram channels
8954  */
8955 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8956 {
8957 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8958 
8959 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8960 	case 0:
8961 	default:
8962 		return 1;
8963 	case 1:
8964 		return 2;
8965 	case 2:
8966 		return 4;
8967 	case 3:
8968 		return 8;
8969 	case 4:
8970 		return 3;
8971 	case 5:
8972 		return 6;
8973 	case 6:
8974 		return 10;
8975 	case 7:
8976 		return 12;
8977 	case 8:
8978 		return 16;
8979 	}
8980 }
8981 
8982 struct dce8_wm_params {
8983 	u32 dram_channels; /* number of dram channels */
8984 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8985 	u32 sclk;          /* engine clock in kHz */
8986 	u32 disp_clk;      /* display clock in kHz */
8987 	u32 src_width;     /* viewport width */
8988 	u32 active_time;   /* active display time in ns */
8989 	u32 blank_time;    /* blank time in ns */
8990 	bool interlaced;    /* mode is interlaced */
8991 	fixed20_12 vsc;    /* vertical scale ratio */
8992 	u32 num_heads;     /* number of active crtcs */
8993 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8994 	u32 lb_size;       /* line buffer allocated to pipe */
8995 	u32 vtaps;         /* vertical scaler taps */
8996 };
8997 
8998 /**
8999  * dce8_dram_bandwidth - get the dram bandwidth
9000  *
9001  * @wm: watermark calculation data
9002  *
9003  * Calculate the raw dram bandwidth (CIK).
9004  * Used for display watermark bandwidth calculations
9005  * Returns the dram bandwidth in MBytes/s
9006  */
9007 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9008 {
9009 	/* Calculate raw DRAM Bandwidth */
9010 	fixed20_12 dram_efficiency; /* 0.7 */
9011 	fixed20_12 yclk, dram_channels, bandwidth;
9012 	fixed20_12 a;
9013 
9014 	a.full = dfixed_const(1000);
9015 	yclk.full = dfixed_const(wm->yclk);
9016 	yclk.full = dfixed_div(yclk, a);
9017 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9018 	a.full = dfixed_const(10);
9019 	dram_efficiency.full = dfixed_const(7);
9020 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9021 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9022 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9023 
9024 	return dfixed_trunc(bandwidth);
9025 }
9026 
9027 /**
9028  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9029  *
9030  * @wm: watermark calculation data
9031  *
9032  * Calculate the dram bandwidth used for display (CIK).
9033  * Used for display watermark bandwidth calculations
9034  * Returns the dram bandwidth for display in MBytes/s
9035  */
9036 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9037 {
9038 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9039 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9040 	fixed20_12 yclk, dram_channels, bandwidth;
9041 	fixed20_12 a;
9042 
9043 	a.full = dfixed_const(1000);
9044 	yclk.full = dfixed_const(wm->yclk);
9045 	yclk.full = dfixed_div(yclk, a);
9046 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9047 	a.full = dfixed_const(10);
9048 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9049 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9050 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9051 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9052 
9053 	return dfixed_trunc(bandwidth);
9054 }
9055 
9056 /**
9057  * dce8_data_return_bandwidth - get the data return bandwidth
9058  *
9059  * @wm: watermark calculation data
9060  *
9061  * Calculate the data return bandwidth used for display (CIK).
9062  * Used for display watermark bandwidth calculations
9063  * Returns the data return bandwidth in MBytes/s
9064  */
9065 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9066 {
9067 	/* Calculate the display Data return Bandwidth */
9068 	fixed20_12 return_efficiency; /* 0.8 */
9069 	fixed20_12 sclk, bandwidth;
9070 	fixed20_12 a;
9071 
9072 	a.full = dfixed_const(1000);
9073 	sclk.full = dfixed_const(wm->sclk);
9074 	sclk.full = dfixed_div(sclk, a);
9075 	a.full = dfixed_const(10);
9076 	return_efficiency.full = dfixed_const(8);
9077 	return_efficiency.full = dfixed_div(return_efficiency, a);
9078 	a.full = dfixed_const(32);
9079 	bandwidth.full = dfixed_mul(a, sclk);
9080 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9081 
9082 	return dfixed_trunc(bandwidth);
9083 }
9084 
9085 /**
9086  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9087  *
9088  * @wm: watermark calculation data
9089  *
9090  * Calculate the dmif bandwidth used for display (CIK).
9091  * Used for display watermark bandwidth calculations
9092  * Returns the dmif bandwidth in MBytes/s
9093  */
9094 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9095 {
9096 	/* Calculate the DMIF Request Bandwidth */
9097 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9098 	fixed20_12 disp_clk, bandwidth;
9099 	fixed20_12 a, b;
9100 
9101 	a.full = dfixed_const(1000);
9102 	disp_clk.full = dfixed_const(wm->disp_clk);
9103 	disp_clk.full = dfixed_div(disp_clk, a);
9104 	a.full = dfixed_const(32);
9105 	b.full = dfixed_mul(a, disp_clk);
9106 
9107 	a.full = dfixed_const(10);
9108 	disp_clk_request_efficiency.full = dfixed_const(8);
9109 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9110 
9111 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9112 
9113 	return dfixed_trunc(bandwidth);
9114 }
9115 
9116 /**
9117  * dce8_available_bandwidth - get the min available bandwidth
9118  *
9119  * @wm: watermark calculation data
9120  *
9121  * Calculate the min available bandwidth used for display (CIK).
9122  * Used for display watermark bandwidth calculations
9123  * Returns the min available bandwidth in MBytes/s
9124  */
9125 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9126 {
9127 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9128 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9129 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9130 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9131 
9132 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9133 }
9134 
9135 /**
9136  * dce8_average_bandwidth - get the average available bandwidth
9137  *
9138  * @wm: watermark calculation data
9139  *
9140  * Calculate the average available bandwidth used for display (CIK).
9141  * Used for display watermark bandwidth calculations
9142  * Returns the average available bandwidth in MBytes/s
9143  */
9144 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9145 {
9146 	/* Calculate the display mode Average Bandwidth
9147 	 * DisplayMode should contain the source and destination dimensions,
9148 	 * timing, etc.
9149 	 */
9150 	fixed20_12 bpp;
9151 	fixed20_12 line_time;
9152 	fixed20_12 src_width;
9153 	fixed20_12 bandwidth;
9154 	fixed20_12 a;
9155 
9156 	a.full = dfixed_const(1000);
9157 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9158 	line_time.full = dfixed_div(line_time, a);
9159 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9160 	src_width.full = dfixed_const(wm->src_width);
9161 	bandwidth.full = dfixed_mul(src_width, bpp);
9162 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9163 	bandwidth.full = dfixed_div(bandwidth, line_time);
9164 
9165 	return dfixed_trunc(bandwidth);
9166 }
9167 
9168 /**
9169  * dce8_latency_watermark - get the latency watermark
9170  *
9171  * @wm: watermark calculation data
9172  *
9173  * Calculate the latency watermark (CIK).
9174  * Used for display watermark bandwidth calculations
9175  * Returns the latency watermark in ns
9176  */
9177 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9178 {
9179 	/* First calculate the latency in ns */
9180 	u32 mc_latency = 2000; /* 2000 ns. */
9181 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9182 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9183 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9184 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9185 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9186 		(wm->num_heads * cursor_line_pair_return_time);
9187 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9188 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9189 	u32 tmp, dmif_size = 12288;
9190 	fixed20_12 a, b, c;
9191 
9192 	if (wm->num_heads == 0)
9193 		return 0;
9194 
9195 	a.full = dfixed_const(2);
9196 	b.full = dfixed_const(1);
9197 	if ((wm->vsc.full > a.full) ||
9198 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9199 	    (wm->vtaps >= 5) ||
9200 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9201 		max_src_lines_per_dst_line = 4;
9202 	else
9203 		max_src_lines_per_dst_line = 2;
9204 
9205 	a.full = dfixed_const(available_bandwidth);
9206 	b.full = dfixed_const(wm->num_heads);
9207 	a.full = dfixed_div(a, b);
9208 
9209 	b.full = dfixed_const(mc_latency + 512);
9210 	c.full = dfixed_const(wm->disp_clk);
9211 	b.full = dfixed_div(b, c);
9212 
9213 	c.full = dfixed_const(dmif_size);
9214 	b.full = dfixed_div(c, b);
9215 
9216 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9217 
9218 	b.full = dfixed_const(1000);
9219 	c.full = dfixed_const(wm->disp_clk);
9220 	b.full = dfixed_div(c, b);
9221 	c.full = dfixed_const(wm->bytes_per_pixel);
9222 	b.full = dfixed_mul(b, c);
9223 
9224 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9225 
9226 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9227 	b.full = dfixed_const(1000);
9228 	c.full = dfixed_const(lb_fill_bw);
9229 	b.full = dfixed_div(c, b);
9230 	a.full = dfixed_div(a, b);
9231 	line_fill_time = dfixed_trunc(a);
9232 
9233 	if (line_fill_time < wm->active_time)
9234 		return latency;
9235 	else
9236 		return latency + (line_fill_time - wm->active_time);
9237 
9238 }
9239 
9240 /**
9241  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9242  * average and available dram bandwidth
9243  *
9244  * @wm: watermark calculation data
9245  *
9246  * Check if the display average bandwidth fits in the display
9247  * dram bandwidth (CIK).
9248  * Used for display watermark bandwidth calculations
9249  * Returns true if the display fits, false if not.
9250  */
9251 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9252 {
9253 	if (dce8_average_bandwidth(wm) <=
9254 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9255 		return true;
9256 	else
9257 		return false;
9258 }
9259 
9260 /**
9261  * dce8_average_bandwidth_vs_available_bandwidth - check
9262  * average and available bandwidth
9263  *
9264  * @wm: watermark calculation data
9265  *
9266  * Check if the display average bandwidth fits in the display
9267  * available bandwidth (CIK).
9268  * Used for display watermark bandwidth calculations
9269  * Returns true if the display fits, false if not.
9270  */
9271 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9272 {
9273 	if (dce8_average_bandwidth(wm) <=
9274 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9275 		return true;
9276 	else
9277 		return false;
9278 }
9279 
9280 /**
9281  * dce8_check_latency_hiding - check latency hiding
9282  *
9283  * @wm: watermark calculation data
9284  *
9285  * Check latency hiding (CIK).
9286  * Used for display watermark bandwidth calculations
9287  * Returns true if the display fits, false if not.
9288  */
9289 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9290 {
9291 	u32 lb_partitions = wm->lb_size / wm->src_width;
9292 	u32 line_time = wm->active_time + wm->blank_time;
9293 	u32 latency_tolerant_lines;
9294 	u32 latency_hiding;
9295 	fixed20_12 a;
9296 
9297 	a.full = dfixed_const(1);
9298 	if (wm->vsc.full > a.full)
9299 		latency_tolerant_lines = 1;
9300 	else {
9301 		if (lb_partitions <= (wm->vtaps + 1))
9302 			latency_tolerant_lines = 1;
9303 		else
9304 			latency_tolerant_lines = 2;
9305 	}
9306 
9307 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9308 
9309 	if (dce8_latency_watermark(wm) <= latency_hiding)
9310 		return true;
9311 	else
9312 		return false;
9313 }
9314 
9315 /**
9316  * dce8_program_watermarks - program display watermarks
9317  *
9318  * @rdev: radeon_device pointer
9319  * @radeon_crtc: the selected display controller
9320  * @lb_size: line buffer size
9321  * @num_heads: number of display controllers in use
9322  *
9323  * Calculate and program the display watermarks for the
9324  * selected display controller (CIK).
9325  */
9326 static void dce8_program_watermarks(struct radeon_device *rdev,
9327 				    struct radeon_crtc *radeon_crtc,
9328 				    u32 lb_size, u32 num_heads)
9329 {
9330 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9331 	struct dce8_wm_params wm_low, wm_high;
9332 	u32 pixel_period;
9333 	u32 line_time = 0;
9334 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9335 	u32 tmp, wm_mask;
9336 
9337 	if (radeon_crtc->base.enabled && num_heads && mode) {
9338 		pixel_period = 1000000 / (u32)mode->clock;
9339 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9340 
9341 		/* watermark for high clocks */
9342 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9343 		    rdev->pm.dpm_enabled) {
9344 			wm_high.yclk =
9345 				radeon_dpm_get_mclk(rdev, false) * 10;
9346 			wm_high.sclk =
9347 				radeon_dpm_get_sclk(rdev, false) * 10;
9348 		} else {
9349 			wm_high.yclk = rdev->pm.current_mclk * 10;
9350 			wm_high.sclk = rdev->pm.current_sclk * 10;
9351 		}
9352 
9353 		wm_high.disp_clk = mode->clock;
9354 		wm_high.src_width = mode->crtc_hdisplay;
9355 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9356 		wm_high.blank_time = line_time - wm_high.active_time;
9357 		wm_high.interlaced = false;
9358 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9359 			wm_high.interlaced = true;
9360 		wm_high.vsc = radeon_crtc->vsc;
9361 		wm_high.vtaps = 1;
9362 		if (radeon_crtc->rmx_type != RMX_OFF)
9363 			wm_high.vtaps = 2;
9364 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9365 		wm_high.lb_size = lb_size;
9366 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9367 		wm_high.num_heads = num_heads;
9368 
9369 		/* set for high clocks */
9370 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9371 
9372 		/* possibly force display priority to high */
9373 		/* should really do this at mode validation time... */
9374 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9375 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9376 		    !dce8_check_latency_hiding(&wm_high) ||
9377 		    (rdev->disp_priority == 2)) {
9378 			DRM_DEBUG_KMS("force priority to high\n");
9379 		}
9380 
9381 		/* watermark for low clocks */
9382 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9383 		    rdev->pm.dpm_enabled) {
9384 			wm_low.yclk =
9385 				radeon_dpm_get_mclk(rdev, true) * 10;
9386 			wm_low.sclk =
9387 				radeon_dpm_get_sclk(rdev, true) * 10;
9388 		} else {
9389 			wm_low.yclk = rdev->pm.current_mclk * 10;
9390 			wm_low.sclk = rdev->pm.current_sclk * 10;
9391 		}
9392 
9393 		wm_low.disp_clk = mode->clock;
9394 		wm_low.src_width = mode->crtc_hdisplay;
9395 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9396 		wm_low.blank_time = line_time - wm_low.active_time;
9397 		wm_low.interlaced = false;
9398 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9399 			wm_low.interlaced = true;
9400 		wm_low.vsc = radeon_crtc->vsc;
9401 		wm_low.vtaps = 1;
9402 		if (radeon_crtc->rmx_type != RMX_OFF)
9403 			wm_low.vtaps = 2;
9404 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9405 		wm_low.lb_size = lb_size;
9406 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9407 		wm_low.num_heads = num_heads;
9408 
9409 		/* set for low clocks */
9410 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9411 
9412 		/* possibly force display priority to high */
9413 		/* should really do this at mode validation time... */
9414 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9415 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9416 		    !dce8_check_latency_hiding(&wm_low) ||
9417 		    (rdev->disp_priority == 2)) {
9418 			DRM_DEBUG_KMS("force priority to high\n");
9419 		}
9420 	}
9421 
9422 	/* select wm A */
9423 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9424 	tmp = wm_mask;
9425 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9426 	tmp |= LATENCY_WATERMARK_MASK(1);
9427 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9428 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9429 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9430 		LATENCY_HIGH_WATERMARK(line_time)));
9431 	/* select wm B */
9432 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9433 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9434 	tmp |= LATENCY_WATERMARK_MASK(2);
9435 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9436 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9437 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9438 		LATENCY_HIGH_WATERMARK(line_time)));
9439 	/* restore original selection */
9440 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9441 
9442 	/* save values for DPM */
9443 	radeon_crtc->line_time = line_time;
9444 	radeon_crtc->wm_high = latency_watermark_a;
9445 	radeon_crtc->wm_low = latency_watermark_b;
9446 }
9447 
9448 /**
9449  * dce8_bandwidth_update - program display watermarks
9450  *
9451  * @rdev: radeon_device pointer
9452  *
9453  * Calculate and program the display watermarks and line
9454  * buffer allocation (CIK).
9455  */
9456 void dce8_bandwidth_update(struct radeon_device *rdev)
9457 {
9458 	struct drm_display_mode *mode = NULL;
9459 	u32 num_heads = 0, lb_size;
9460 	int i;
9461 
9462 	if (!rdev->mode_info.mode_config_initialized)
9463 		return;
9464 
9465 	radeon_update_display_priority(rdev);
9466 
9467 	for (i = 0; i < rdev->num_crtc; i++) {
9468 		if (rdev->mode_info.crtcs[i]->base.enabled)
9469 			num_heads++;
9470 	}
9471 	for (i = 0; i < rdev->num_crtc; i++) {
9472 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9473 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9474 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9475 	}
9476 }
9477 
9478 /**
9479  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9480  *
9481  * @rdev: radeon_device pointer
9482  *
9483  * Fetches a GPU clock counter snapshot (SI).
9484  * Returns the 64 bit clock counter snapshot.
9485  */
9486 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9487 {
9488 	uint64_t clock;
9489 
9490 	spin_lock(&rdev->gpu_clock_mutex);
9491 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9492 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9493 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9494 	spin_unlock(&rdev->gpu_clock_mutex);
9495 	return clock;
9496 }
9497 
9498 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9499                               u32 cntl_reg, u32 status_reg)
9500 {
9501 	int r, i;
9502 	struct atom_clock_dividers dividers;
9503 	uint32_t tmp;
9504 
9505 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9506 					   clock, false, &dividers);
9507 	if (r)
9508 		return r;
9509 
9510 	tmp = RREG32_SMC(cntl_reg);
9511 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9512 	tmp |= dividers.post_divider;
9513 	WREG32_SMC(cntl_reg, tmp);
9514 
9515 	for (i = 0; i < 100; i++) {
9516 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9517 			break;
9518 		mdelay(10);
9519 	}
9520 	if (i == 100)
9521 		return -ETIMEDOUT;
9522 
9523 	return 0;
9524 }
9525 
9526 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9527 {
9528 	int r = 0;
9529 
9530 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9531 	if (r)
9532 		return r;
9533 
9534 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9535 	return r;
9536 }
9537 
9538 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9539 {
9540 	int r, i;
9541 	struct atom_clock_dividers dividers;
9542 	u32 tmp;
9543 
9544 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9545 					   ecclk, false, &dividers);
9546 	if (r)
9547 		return r;
9548 
9549 	for (i = 0; i < 100; i++) {
9550 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9551 			break;
9552 		mdelay(10);
9553 	}
9554 	if (i == 100)
9555 		return -ETIMEDOUT;
9556 
9557 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9558 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9559 	tmp |= dividers.post_divider;
9560 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9561 
9562 	for (i = 0; i < 100; i++) {
9563 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9564 			break;
9565 		mdelay(10);
9566 	}
9567 	if (i == 100)
9568 		return -ETIMEDOUT;
9569 
9570 	return 0;
9571 }
9572 
9573 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9574 {
9575 	struct pci_dev *root = rdev->pdev->bus->self;
9576 	int bridge_pos, gpu_pos;
9577 	u32 speed_cntl, mask, current_data_rate;
9578 	int ret, i;
9579 	u16 tmp16;
9580 
9581 	if (radeon_pcie_gen2 == 0)
9582 		return;
9583 
9584 	if (rdev->flags & RADEON_IS_IGP)
9585 		return;
9586 
9587 	if (!(rdev->flags & RADEON_IS_PCIE))
9588 		return;
9589 
9590 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9591 	if (ret != 0)
9592 		return;
9593 
9594 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9595 		return;
9596 
9597 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9598 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9599 		LC_CURRENT_DATA_RATE_SHIFT;
9600 	if (mask & DRM_PCIE_SPEED_80) {
9601 		if (current_data_rate == 2) {
9602 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9603 			return;
9604 		}
9605 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9606 	} else if (mask & DRM_PCIE_SPEED_50) {
9607 		if (current_data_rate == 1) {
9608 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9609 			return;
9610 		}
9611 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9612 	}
9613 
9614 	bridge_pos = pci_get_pciecap_ptr(root->dev.bsddev);
9615 	if (!bridge_pos)
9616 		return;
9617 
9618 	gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev.bsddev);
9619 	if (!gpu_pos)
9620 		return;
9621 
9622 	if (mask & DRM_PCIE_SPEED_80) {
9623 		/* re-try equalization if gen3 is not already enabled */
9624 		if (current_data_rate != 2) {
9625 			u16 bridge_cfg, gpu_cfg;
9626 			u16 bridge_cfg2, gpu_cfg2;
9627 			u32 max_lw, current_lw, tmp;
9628 
9629 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9630 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9631 
9632 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9633 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9634 
9635 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9636 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9637 
9638 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9639 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9640 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9641 
9642 			if (current_lw < max_lw) {
9643 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9644 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9645 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9646 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9647 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9648 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9649 				}
9650 			}
9651 
9652 			for (i = 0; i < 10; i++) {
9653 				/* check status */
9654 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9655 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9656 					break;
9657 
9658 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9659 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9660 
9661 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9662 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9663 
9664 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9665 				tmp |= LC_SET_QUIESCE;
9666 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9667 
9668 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9669 				tmp |= LC_REDO_EQ;
9670 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9671 
9672 				mdelay(100);
9673 
9674 				/* linkctl */
9675 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9676 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9677 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9678 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9679 
9680 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9681 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9682 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9683 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9684 
9685 				/* linkctl2 */
9686 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9687 				tmp16 &= ~((1 << 4) | (7 << 9));
9688 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9689 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9690 
9691 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9692 				tmp16 &= ~((1 << 4) | (7 << 9));
9693 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9694 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9695 
9696 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9697 				tmp &= ~LC_SET_QUIESCE;
9698 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9699 			}
9700 		}
9701 	}
9702 
9703 	/* set the link speed */
9704 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9705 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9706 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9707 
9708 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9709 	tmp16 &= ~0xf;
9710 	if (mask & DRM_PCIE_SPEED_80)
9711 		tmp16 |= 3; /* gen3 */
9712 	else if (mask & DRM_PCIE_SPEED_50)
9713 		tmp16 |= 2; /* gen2 */
9714 	else
9715 		tmp16 |= 1; /* gen1 */
9716 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9717 
9718 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9719 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9720 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9721 
9722 	for (i = 0; i < rdev->usec_timeout; i++) {
9723 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9724 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9725 			break;
9726 		udelay(1);
9727 	}
9728 }
9729 
9730 static void cik_program_aspm(struct radeon_device *rdev)
9731 {
9732 	u32 data, orig;
9733 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9734 	bool disable_clkreq = false;
9735 
9736 	if (radeon_aspm == 0)
9737 		return;
9738 
9739 	/* XXX double check IGPs */
9740 	if (rdev->flags & RADEON_IS_IGP)
9741 		return;
9742 
9743 	if (!(rdev->flags & RADEON_IS_PCIE))
9744 		return;
9745 
9746 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9747 	data &= ~LC_XMIT_N_FTS_MASK;
9748 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9749 	if (orig != data)
9750 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9751 
9752 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9753 	data |= LC_GO_TO_RECOVERY;
9754 	if (orig != data)
9755 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9756 
9757 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9758 	data |= P_IGNORE_EDB_ERR;
9759 	if (orig != data)
9760 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9761 
9762 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9763 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9764 	data |= LC_PMI_TO_L1_DIS;
9765 	if (!disable_l0s)
9766 		data |= LC_L0S_INACTIVITY(7);
9767 
9768 	if (!disable_l1) {
9769 		data |= LC_L1_INACTIVITY(7);
9770 		data &= ~LC_PMI_TO_L1_DIS;
9771 		if (orig != data)
9772 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9773 
9774 		if (!disable_plloff_in_l1) {
9775 			bool clk_req_support;
9776 
9777 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9778 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9779 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9780 			if (orig != data)
9781 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9782 
9783 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9784 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9785 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9786 			if (orig != data)
9787 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9788 
9789 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9790 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9791 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9792 			if (orig != data)
9793 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9794 
9795 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9796 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9797 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9798 			if (orig != data)
9799 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9800 
9801 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9802 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9803 			data |= LC_DYN_LANES_PWR_STATE(3);
9804 			if (orig != data)
9805 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9806 
9807 			if (!disable_clkreq) {
9808 #ifdef zMN_TODO
9809 				struct pci_dev *root = rdev->pdev->bus->self;
9810 				u32 lnkcap;
9811 
9812 				clk_req_support = false;
9813 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9814 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9815 					clk_req_support = true;
9816 #else
9817 				clk_req_support = false;
9818 #endif
9819 			} else {
9820 				clk_req_support = false;
9821 			}
9822 
9823 			if (clk_req_support) {
9824 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9825 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9826 				if (orig != data)
9827 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9828 
9829 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9830 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9831 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9832 				if (orig != data)
9833 					WREG32_SMC(THM_CLK_CNTL, data);
9834 
9835 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9836 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9837 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9838 				if (orig != data)
9839 					WREG32_SMC(MISC_CLK_CTRL, data);
9840 
9841 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9842 				data &= ~BCLK_AS_XCLK;
9843 				if (orig != data)
9844 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9845 
9846 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9847 				data &= ~FORCE_BIF_REFCLK_EN;
9848 				if (orig != data)
9849 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9850 
9851 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9852 				data &= ~MPLL_CLKOUT_SEL_MASK;
9853 				data |= MPLL_CLKOUT_SEL(4);
9854 				if (orig != data)
9855 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9856 			}
9857 		}
9858 	} else {
9859 		if (orig != data)
9860 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9861 	}
9862 
9863 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9864 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9865 	if (orig != data)
9866 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9867 
9868 	if (!disable_l0s) {
9869 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9870 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9871 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9872 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9873 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9874 				data &= ~LC_L0S_INACTIVITY_MASK;
9875 				if (orig != data)
9876 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9877 			}
9878 		}
9879 	}
9880 }
9881