xref: /dragonfly/sys/dev/drm/radeon/cik.c (revision 0d27ae55)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "cikd.h"
30 #include "atom.h"
31 #include "cik_blit_shaders.h"
32 #include "radeon_ucode.h"
33 #include "clearstate_ci.h"
34 
35 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
36 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 
45 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
46 MODULE_FIRMWARE("radeon/bonaire_me.bin");
47 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
48 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
49 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
50 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
51 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
52 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
55 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
56 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
63 
64 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
65 MODULE_FIRMWARE("radeon/hawaii_me.bin");
66 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
67 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
68 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
69 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
70 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
71 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
72 
73 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
74 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
75 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
79 
80 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
81 MODULE_FIRMWARE("radeon/kaveri_me.bin");
82 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
83 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
84 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
85 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
86 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
87 
88 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
89 MODULE_FIRMWARE("radeon/KABINI_me.bin");
90 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
91 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
92 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
93 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
94 
95 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
96 MODULE_FIRMWARE("radeon/kabini_me.bin");
97 MODULE_FIRMWARE("radeon/kabini_ce.bin");
98 MODULE_FIRMWARE("radeon/kabini_mec.bin");
99 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
100 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
101 
102 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
103 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
104 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
108 
109 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
110 MODULE_FIRMWARE("radeon/mullins_me.bin");
111 MODULE_FIRMWARE("radeon/mullins_ce.bin");
112 MODULE_FIRMWARE("radeon/mullins_mec.bin");
113 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
114 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
115 
116 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
117 static void cik_rlc_stop(struct radeon_device *rdev);
118 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
119 static void cik_program_aspm(struct radeon_device *rdev);
120 static void cik_init_pg(struct radeon_device *rdev);
121 static void cik_init_cg(struct radeon_device *rdev);
122 static void cik_fini_pg(struct radeon_device *rdev);
123 static void cik_fini_cg(struct radeon_device *rdev);
124 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
125 					  bool enable);
126 
127 /* get temperature in millidegrees */
128 int ci_get_temp(struct radeon_device *rdev)
129 {
130 	u32 temp;
131 	int actual_temp = 0;
132 
133 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
134 		CTF_TEMP_SHIFT;
135 
136 	if (temp & 0x200)
137 		actual_temp = 255;
138 	else
139 		actual_temp = temp & 0x1ff;
140 
141 	actual_temp = actual_temp * 1000;
142 
143 	return actual_temp;
144 }
145 
146 /* get temperature in millidegrees */
147 int kv_get_temp(struct radeon_device *rdev)
148 {
149 	u32 temp;
150 	int actual_temp = 0;
151 
152 	temp = RREG32_SMC(0xC0300E0C);
153 
154 	if (temp)
155 		actual_temp = (temp / 8) - 49;
156 	else
157 		actual_temp = 0;
158 
159 	actual_temp = actual_temp * 1000;
160 
161 	return actual_temp;
162 }
163 
164 /*
165  * Indirect registers accessor
166  */
167 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
168 {
169 	u32 r;
170 
171 	spin_lock(&rdev->pciep_idx_lock);
172 	WREG32(PCIE_INDEX, reg);
173 	(void)RREG32(PCIE_INDEX);
174 	r = RREG32(PCIE_DATA);
175 	spin_unlock(&rdev->pciep_idx_lock);
176 	return r;
177 }
178 
179 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
180 {
181 	spin_lock(&rdev->pciep_idx_lock);
182 	WREG32(PCIE_INDEX, reg);
183 	(void)RREG32(PCIE_INDEX);
184 	WREG32(PCIE_DATA, v);
185 	(void)RREG32(PCIE_DATA);
186 	spin_unlock(&rdev->pciep_idx_lock);
187 }
188 
189 static const u32 spectre_rlc_save_restore_register_list[] =
190 {
191 	(0x0e00 << 16) | (0xc12c >> 2),
192 	0x00000000,
193 	(0x0e00 << 16) | (0xc140 >> 2),
194 	0x00000000,
195 	(0x0e00 << 16) | (0xc150 >> 2),
196 	0x00000000,
197 	(0x0e00 << 16) | (0xc15c >> 2),
198 	0x00000000,
199 	(0x0e00 << 16) | (0xc168 >> 2),
200 	0x00000000,
201 	(0x0e00 << 16) | (0xc170 >> 2),
202 	0x00000000,
203 	(0x0e00 << 16) | (0xc178 >> 2),
204 	0x00000000,
205 	(0x0e00 << 16) | (0xc204 >> 2),
206 	0x00000000,
207 	(0x0e00 << 16) | (0xc2b4 >> 2),
208 	0x00000000,
209 	(0x0e00 << 16) | (0xc2b8 >> 2),
210 	0x00000000,
211 	(0x0e00 << 16) | (0xc2bc >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xc2c0 >> 2),
214 	0x00000000,
215 	(0x0e00 << 16) | (0x8228 >> 2),
216 	0x00000000,
217 	(0x0e00 << 16) | (0x829c >> 2),
218 	0x00000000,
219 	(0x0e00 << 16) | (0x869c >> 2),
220 	0x00000000,
221 	(0x0600 << 16) | (0x98f4 >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0x98f8 >> 2),
224 	0x00000000,
225 	(0x0e00 << 16) | (0x9900 >> 2),
226 	0x00000000,
227 	(0x0e00 << 16) | (0xc260 >> 2),
228 	0x00000000,
229 	(0x0e00 << 16) | (0x90e8 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0x3c000 >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0x3c00c >> 2),
234 	0x00000000,
235 	(0x0e00 << 16) | (0x8c1c >> 2),
236 	0x00000000,
237 	(0x0e00 << 16) | (0x9700 >> 2),
238 	0x00000000,
239 	(0x0e00 << 16) | (0xcd20 >> 2),
240 	0x00000000,
241 	(0x4e00 << 16) | (0xcd20 >> 2),
242 	0x00000000,
243 	(0x5e00 << 16) | (0xcd20 >> 2),
244 	0x00000000,
245 	(0x6e00 << 16) | (0xcd20 >> 2),
246 	0x00000000,
247 	(0x7e00 << 16) | (0xcd20 >> 2),
248 	0x00000000,
249 	(0x8e00 << 16) | (0xcd20 >> 2),
250 	0x00000000,
251 	(0x9e00 << 16) | (0xcd20 >> 2),
252 	0x00000000,
253 	(0xae00 << 16) | (0xcd20 >> 2),
254 	0x00000000,
255 	(0xbe00 << 16) | (0xcd20 >> 2),
256 	0x00000000,
257 	(0x0e00 << 16) | (0x89bc >> 2),
258 	0x00000000,
259 	(0x0e00 << 16) | (0x8900 >> 2),
260 	0x00000000,
261 	0x3,
262 	(0x0e00 << 16) | (0xc130 >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc134 >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc1fc >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc208 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc264 >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc268 >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0xc26c >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0xc270 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0xc274 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0xc278 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc27c >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc280 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc284 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc288 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc28c >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc290 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc294 >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc298 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xc29c >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xc2a0 >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0xc2a4 >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0xc2a8 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0xc2ac  >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0xc2b0 >> 2),
309 	0x00000000,
310 	(0x0e00 << 16) | (0x301d0 >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0x30238 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0x30250 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0x30254 >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0x30258 >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0x3025c >> 2),
321 	0x00000000,
322 	(0x4e00 << 16) | (0xc900 >> 2),
323 	0x00000000,
324 	(0x5e00 << 16) | (0xc900 >> 2),
325 	0x00000000,
326 	(0x6e00 << 16) | (0xc900 >> 2),
327 	0x00000000,
328 	(0x7e00 << 16) | (0xc900 >> 2),
329 	0x00000000,
330 	(0x8e00 << 16) | (0xc900 >> 2),
331 	0x00000000,
332 	(0x9e00 << 16) | (0xc900 >> 2),
333 	0x00000000,
334 	(0xae00 << 16) | (0xc900 >> 2),
335 	0x00000000,
336 	(0xbe00 << 16) | (0xc900 >> 2),
337 	0x00000000,
338 	(0x4e00 << 16) | (0xc904 >> 2),
339 	0x00000000,
340 	(0x5e00 << 16) | (0xc904 >> 2),
341 	0x00000000,
342 	(0x6e00 << 16) | (0xc904 >> 2),
343 	0x00000000,
344 	(0x7e00 << 16) | (0xc904 >> 2),
345 	0x00000000,
346 	(0x8e00 << 16) | (0xc904 >> 2),
347 	0x00000000,
348 	(0x9e00 << 16) | (0xc904 >> 2),
349 	0x00000000,
350 	(0xae00 << 16) | (0xc904 >> 2),
351 	0x00000000,
352 	(0xbe00 << 16) | (0xc904 >> 2),
353 	0x00000000,
354 	(0x4e00 << 16) | (0xc908 >> 2),
355 	0x00000000,
356 	(0x5e00 << 16) | (0xc908 >> 2),
357 	0x00000000,
358 	(0x6e00 << 16) | (0xc908 >> 2),
359 	0x00000000,
360 	(0x7e00 << 16) | (0xc908 >> 2),
361 	0x00000000,
362 	(0x8e00 << 16) | (0xc908 >> 2),
363 	0x00000000,
364 	(0x9e00 << 16) | (0xc908 >> 2),
365 	0x00000000,
366 	(0xae00 << 16) | (0xc908 >> 2),
367 	0x00000000,
368 	(0xbe00 << 16) | (0xc908 >> 2),
369 	0x00000000,
370 	(0x4e00 << 16) | (0xc90c >> 2),
371 	0x00000000,
372 	(0x5e00 << 16) | (0xc90c >> 2),
373 	0x00000000,
374 	(0x6e00 << 16) | (0xc90c >> 2),
375 	0x00000000,
376 	(0x7e00 << 16) | (0xc90c >> 2),
377 	0x00000000,
378 	(0x8e00 << 16) | (0xc90c >> 2),
379 	0x00000000,
380 	(0x9e00 << 16) | (0xc90c >> 2),
381 	0x00000000,
382 	(0xae00 << 16) | (0xc90c >> 2),
383 	0x00000000,
384 	(0xbe00 << 16) | (0xc90c >> 2),
385 	0x00000000,
386 	(0x4e00 << 16) | (0xc910 >> 2),
387 	0x00000000,
388 	(0x5e00 << 16) | (0xc910 >> 2),
389 	0x00000000,
390 	(0x6e00 << 16) | (0xc910 >> 2),
391 	0x00000000,
392 	(0x7e00 << 16) | (0xc910 >> 2),
393 	0x00000000,
394 	(0x8e00 << 16) | (0xc910 >> 2),
395 	0x00000000,
396 	(0x9e00 << 16) | (0xc910 >> 2),
397 	0x00000000,
398 	(0xae00 << 16) | (0xc910 >> 2),
399 	0x00000000,
400 	(0xbe00 << 16) | (0xc910 >> 2),
401 	0x00000000,
402 	(0x0e00 << 16) | (0xc99c >> 2),
403 	0x00000000,
404 	(0x0e00 << 16) | (0x9834 >> 2),
405 	0x00000000,
406 	(0x0000 << 16) | (0x30f00 >> 2),
407 	0x00000000,
408 	(0x0001 << 16) | (0x30f00 >> 2),
409 	0x00000000,
410 	(0x0000 << 16) | (0x30f04 >> 2),
411 	0x00000000,
412 	(0x0001 << 16) | (0x30f04 >> 2),
413 	0x00000000,
414 	(0x0000 << 16) | (0x30f08 >> 2),
415 	0x00000000,
416 	(0x0001 << 16) | (0x30f08 >> 2),
417 	0x00000000,
418 	(0x0000 << 16) | (0x30f0c >> 2),
419 	0x00000000,
420 	(0x0001 << 16) | (0x30f0c >> 2),
421 	0x00000000,
422 	(0x0600 << 16) | (0x9b7c >> 2),
423 	0x00000000,
424 	(0x0e00 << 16) | (0x8a14 >> 2),
425 	0x00000000,
426 	(0x0e00 << 16) | (0x8a18 >> 2),
427 	0x00000000,
428 	(0x0600 << 16) | (0x30a00 >> 2),
429 	0x00000000,
430 	(0x0e00 << 16) | (0x8bf0 >> 2),
431 	0x00000000,
432 	(0x0e00 << 16) | (0x8bcc >> 2),
433 	0x00000000,
434 	(0x0e00 << 16) | (0x8b24 >> 2),
435 	0x00000000,
436 	(0x0e00 << 16) | (0x30a04 >> 2),
437 	0x00000000,
438 	(0x0600 << 16) | (0x30a10 >> 2),
439 	0x00000000,
440 	(0x0600 << 16) | (0x30a14 >> 2),
441 	0x00000000,
442 	(0x0600 << 16) | (0x30a18 >> 2),
443 	0x00000000,
444 	(0x0600 << 16) | (0x30a2c >> 2),
445 	0x00000000,
446 	(0x0e00 << 16) | (0xc700 >> 2),
447 	0x00000000,
448 	(0x0e00 << 16) | (0xc704 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0xc708 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0xc768 >> 2),
453 	0x00000000,
454 	(0x0400 << 16) | (0xc770 >> 2),
455 	0x00000000,
456 	(0x0400 << 16) | (0xc774 >> 2),
457 	0x00000000,
458 	(0x0400 << 16) | (0xc778 >> 2),
459 	0x00000000,
460 	(0x0400 << 16) | (0xc77c >> 2),
461 	0x00000000,
462 	(0x0400 << 16) | (0xc780 >> 2),
463 	0x00000000,
464 	(0x0400 << 16) | (0xc784 >> 2),
465 	0x00000000,
466 	(0x0400 << 16) | (0xc788 >> 2),
467 	0x00000000,
468 	(0x0400 << 16) | (0xc78c >> 2),
469 	0x00000000,
470 	(0x0400 << 16) | (0xc798 >> 2),
471 	0x00000000,
472 	(0x0400 << 16) | (0xc79c >> 2),
473 	0x00000000,
474 	(0x0400 << 16) | (0xc7a0 >> 2),
475 	0x00000000,
476 	(0x0400 << 16) | (0xc7a4 >> 2),
477 	0x00000000,
478 	(0x0400 << 16) | (0xc7a8 >> 2),
479 	0x00000000,
480 	(0x0400 << 16) | (0xc7ac >> 2),
481 	0x00000000,
482 	(0x0400 << 16) | (0xc7b0 >> 2),
483 	0x00000000,
484 	(0x0400 << 16) | (0xc7b4 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x9100 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0x3c010 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0x92a8 >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0x92ac >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0x92b4 >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0x92b8 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0x92bc >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0x92c0 >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0x92c4 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x92c8 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x92cc >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x92d0 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8c00 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8c04 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x8c20 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x8c38 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x8c3c >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0xae00 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x9604 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xac08 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xac0c >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xac10 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xac14 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0xac58 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0xac68 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0xac6c >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0xac70 >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0xac74 >> 2),
541 	0x00000000,
542 	(0x0e00 << 16) | (0xac78 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xac7c >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xac80 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0xac84 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0xac88 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0xac8c >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0x970c >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0x9714 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x9718 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x971c >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x31068 >> 2),
563 	0x00000000,
564 	(0x4e00 << 16) | (0x31068 >> 2),
565 	0x00000000,
566 	(0x5e00 << 16) | (0x31068 >> 2),
567 	0x00000000,
568 	(0x6e00 << 16) | (0x31068 >> 2),
569 	0x00000000,
570 	(0x7e00 << 16) | (0x31068 >> 2),
571 	0x00000000,
572 	(0x8e00 << 16) | (0x31068 >> 2),
573 	0x00000000,
574 	(0x9e00 << 16) | (0x31068 >> 2),
575 	0x00000000,
576 	(0xae00 << 16) | (0x31068 >> 2),
577 	0x00000000,
578 	(0xbe00 << 16) | (0x31068 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0xcd10 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0xcd14 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x88b0 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x88b4 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x88b8 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x88bc >> 2),
591 	0x00000000,
592 	(0x0400 << 16) | (0x89c0 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x88c4 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x88c8 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x88d0 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x88d4 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0x88d8 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0x8980 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0x30938 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0x3093c >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0x30940 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0x89a0 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0x30900 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x30904 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x89b4 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x3c210 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0x3c214 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x3c218 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x8904 >> 2),
627 	0x00000000,
628 	0x5,
629 	(0x0e00 << 16) | (0x8c28 >> 2),
630 	(0x0e00 << 16) | (0x8c2c >> 2),
631 	(0x0e00 << 16) | (0x8c30 >> 2),
632 	(0x0e00 << 16) | (0x8c34 >> 2),
633 	(0x0e00 << 16) | (0x9600 >> 2),
634 };
635 
636 static const u32 kalindi_rlc_save_restore_register_list[] =
637 {
638 	(0x0e00 << 16) | (0xc12c >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0xc140 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0xc150 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0xc15c >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0xc168 >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0xc170 >> 2),
649 	0x00000000,
650 	(0x0e00 << 16) | (0xc204 >> 2),
651 	0x00000000,
652 	(0x0e00 << 16) | (0xc2b4 >> 2),
653 	0x00000000,
654 	(0x0e00 << 16) | (0xc2b8 >> 2),
655 	0x00000000,
656 	(0x0e00 << 16) | (0xc2bc >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xc2c0 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0x8228 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0x829c >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x869c >> 2),
665 	0x00000000,
666 	(0x0600 << 16) | (0x98f4 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x98f8 >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x9900 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0xc260 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x90e8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x3c000 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x3c00c >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x8c1c >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x9700 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0xcd20 >> 2),
685 	0x00000000,
686 	(0x4e00 << 16) | (0xcd20 >> 2),
687 	0x00000000,
688 	(0x5e00 << 16) | (0xcd20 >> 2),
689 	0x00000000,
690 	(0x6e00 << 16) | (0xcd20 >> 2),
691 	0x00000000,
692 	(0x7e00 << 16) | (0xcd20 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x89bc >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x8900 >> 2),
697 	0x00000000,
698 	0x3,
699 	(0x0e00 << 16) | (0xc130 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0xc134 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0xc1fc >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0xc208 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0xc264 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0xc268 >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0xc26c >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0xc270 >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0xc274 >> 2),
716 	0x00000000,
717 	(0x0e00 << 16) | (0xc28c >> 2),
718 	0x00000000,
719 	(0x0e00 << 16) | (0xc290 >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0xc294 >> 2),
722 	0x00000000,
723 	(0x0e00 << 16) | (0xc298 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0xc2a0 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0xc2a4 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc2a8 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc2ac >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0x301d0 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0x30238 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0x30250 >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0x30254 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0x30258 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0x3025c >> 2),
744 	0x00000000,
745 	(0x4e00 << 16) | (0xc900 >> 2),
746 	0x00000000,
747 	(0x5e00 << 16) | (0xc900 >> 2),
748 	0x00000000,
749 	(0x6e00 << 16) | (0xc900 >> 2),
750 	0x00000000,
751 	(0x7e00 << 16) | (0xc900 >> 2),
752 	0x00000000,
753 	(0x4e00 << 16) | (0xc904 >> 2),
754 	0x00000000,
755 	(0x5e00 << 16) | (0xc904 >> 2),
756 	0x00000000,
757 	(0x6e00 << 16) | (0xc904 >> 2),
758 	0x00000000,
759 	(0x7e00 << 16) | (0xc904 >> 2),
760 	0x00000000,
761 	(0x4e00 << 16) | (0xc908 >> 2),
762 	0x00000000,
763 	(0x5e00 << 16) | (0xc908 >> 2),
764 	0x00000000,
765 	(0x6e00 << 16) | (0xc908 >> 2),
766 	0x00000000,
767 	(0x7e00 << 16) | (0xc908 >> 2),
768 	0x00000000,
769 	(0x4e00 << 16) | (0xc90c >> 2),
770 	0x00000000,
771 	(0x5e00 << 16) | (0xc90c >> 2),
772 	0x00000000,
773 	(0x6e00 << 16) | (0xc90c >> 2),
774 	0x00000000,
775 	(0x7e00 << 16) | (0xc90c >> 2),
776 	0x00000000,
777 	(0x4e00 << 16) | (0xc910 >> 2),
778 	0x00000000,
779 	(0x5e00 << 16) | (0xc910 >> 2),
780 	0x00000000,
781 	(0x6e00 << 16) | (0xc910 >> 2),
782 	0x00000000,
783 	(0x7e00 << 16) | (0xc910 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc99c >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0x9834 >> 2),
788 	0x00000000,
789 	(0x0000 << 16) | (0x30f00 >> 2),
790 	0x00000000,
791 	(0x0000 << 16) | (0x30f04 >> 2),
792 	0x00000000,
793 	(0x0000 << 16) | (0x30f08 >> 2),
794 	0x00000000,
795 	(0x0000 << 16) | (0x30f0c >> 2),
796 	0x00000000,
797 	(0x0600 << 16) | (0x9b7c >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0x8a14 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0x8a18 >> 2),
802 	0x00000000,
803 	(0x0600 << 16) | (0x30a00 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x8bf0 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0x8bcc >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0x8b24 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x30a04 >> 2),
812 	0x00000000,
813 	(0x0600 << 16) | (0x30a10 >> 2),
814 	0x00000000,
815 	(0x0600 << 16) | (0x30a14 >> 2),
816 	0x00000000,
817 	(0x0600 << 16) | (0x30a18 >> 2),
818 	0x00000000,
819 	(0x0600 << 16) | (0x30a2c >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0xc700 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0xc704 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xc708 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0xc768 >> 2),
828 	0x00000000,
829 	(0x0400 << 16) | (0xc770 >> 2),
830 	0x00000000,
831 	(0x0400 << 16) | (0xc774 >> 2),
832 	0x00000000,
833 	(0x0400 << 16) | (0xc798 >> 2),
834 	0x00000000,
835 	(0x0400 << 16) | (0xc79c >> 2),
836 	0x00000000,
837 	(0x0e00 << 16) | (0x9100 >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0x3c010 >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0x8c00 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0x8c04 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0x8c20 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0x8c38 >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0x8c3c >> 2),
850 	0x00000000,
851 	(0x0e00 << 16) | (0xae00 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0x9604 >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0xac08 >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0xac0c >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0xac10 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0xac14 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0xac58 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0xac68 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0xac6c >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0xac70 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0xac74 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0xac78 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0xac7c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xac80 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xac84 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0xac88 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0xac8c >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x970c >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x9714 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x9718 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x971c >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x31068 >> 2),
894 	0x00000000,
895 	(0x4e00 << 16) | (0x31068 >> 2),
896 	0x00000000,
897 	(0x5e00 << 16) | (0x31068 >> 2),
898 	0x00000000,
899 	(0x6e00 << 16) | (0x31068 >> 2),
900 	0x00000000,
901 	(0x7e00 << 16) | (0x31068 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xcd10 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xcd14 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x88b0 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x88b4 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x88b8 >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x88bc >> 2),
914 	0x00000000,
915 	(0x0400 << 16) | (0x89c0 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x88c4 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x88c8 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x88d0 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x88d4 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x88d8 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8980 >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0x30938 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x3093c >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0x30940 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0x89a0 >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0x30900 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0x30904 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0x89b4 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0x3e1fc >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0x3c210 >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0x3c214 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0x3c218 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0x8904 >> 2),
952 	0x00000000,
953 	0x5,
954 	(0x0e00 << 16) | (0x8c28 >> 2),
955 	(0x0e00 << 16) | (0x8c2c >> 2),
956 	(0x0e00 << 16) | (0x8c30 >> 2),
957 	(0x0e00 << 16) | (0x8c34 >> 2),
958 	(0x0e00 << 16) | (0x9600 >> 2),
959 };
960 
961 static const u32 bonaire_golden_spm_registers[] =
962 {
963 	0x30800, 0xe0ffffff, 0xe0000000
964 };
965 
966 static const u32 bonaire_golden_common_registers[] =
967 {
968 	0xc770, 0xffffffff, 0x00000800,
969 	0xc774, 0xffffffff, 0x00000800,
970 	0xc798, 0xffffffff, 0x00007fbf,
971 	0xc79c, 0xffffffff, 0x00007faf
972 };
973 
974 static const u32 bonaire_golden_registers[] =
975 {
976 	0x3354, 0x00000333, 0x00000333,
977 	0x3350, 0x000c0fc0, 0x00040200,
978 	0x9a10, 0x00010000, 0x00058208,
979 	0x3c000, 0xffff1fff, 0x00140000,
980 	0x3c200, 0xfdfc0fff, 0x00000100,
981 	0x3c234, 0x40000000, 0x40000200,
982 	0x9830, 0xffffffff, 0x00000000,
983 	0x9834, 0xf00fffff, 0x00000400,
984 	0x9838, 0x0002021c, 0x00020200,
985 	0xc78, 0x00000080, 0x00000000,
986 	0x5bb0, 0x000000f0, 0x00000070,
987 	0x5bc0, 0xf0311fff, 0x80300000,
988 	0x98f8, 0x73773777, 0x12010001,
989 	0x350c, 0x00810000, 0x408af000,
990 	0x7030, 0x31000111, 0x00000011,
991 	0x2f48, 0x73773777, 0x12010001,
992 	0x220c, 0x00007fb6, 0x0021a1b1,
993 	0x2210, 0x00007fb6, 0x002021b1,
994 	0x2180, 0x00007fb6, 0x00002191,
995 	0x2218, 0x00007fb6, 0x002121b1,
996 	0x221c, 0x00007fb6, 0x002021b1,
997 	0x21dc, 0x00007fb6, 0x00002191,
998 	0x21e0, 0x00007fb6, 0x00002191,
999 	0x3628, 0x0000003f, 0x0000000a,
1000 	0x362c, 0x0000003f, 0x0000000a,
1001 	0x2ae4, 0x00073ffe, 0x000022a2,
1002 	0x240c, 0x000007ff, 0x00000000,
1003 	0x8a14, 0xf000003f, 0x00000007,
1004 	0x8bf0, 0x00002001, 0x00000001,
1005 	0x8b24, 0xffffffff, 0x00ffffff,
1006 	0x30a04, 0x0000ff0f, 0x00000000,
1007 	0x28a4c, 0x07ffffff, 0x06000000,
1008 	0x4d8, 0x00000fff, 0x00000100,
1009 	0x3e78, 0x00000001, 0x00000002,
1010 	0x9100, 0x03000000, 0x0362c688,
1011 	0x8c00, 0x000000ff, 0x00000001,
1012 	0xe40, 0x00001fff, 0x00001fff,
1013 	0x9060, 0x0000007f, 0x00000020,
1014 	0x9508, 0x00010000, 0x00010000,
1015 	0xac14, 0x000003ff, 0x000000f3,
1016 	0xac0c, 0xffffffff, 0x00001032
1017 };
1018 
1019 static const u32 bonaire_mgcg_cgcg_init[] =
1020 {
1021 	0xc420, 0xffffffff, 0xfffffffc,
1022 	0x30800, 0xffffffff, 0xe0000000,
1023 	0x3c2a0, 0xffffffff, 0x00000100,
1024 	0x3c208, 0xffffffff, 0x00000100,
1025 	0x3c2c0, 0xffffffff, 0xc0000100,
1026 	0x3c2c8, 0xffffffff, 0xc0000100,
1027 	0x3c2c4, 0xffffffff, 0xc0000100,
1028 	0x55e4, 0xffffffff, 0x00600100,
1029 	0x3c280, 0xffffffff, 0x00000100,
1030 	0x3c214, 0xffffffff, 0x06000100,
1031 	0x3c220, 0xffffffff, 0x00000100,
1032 	0x3c218, 0xffffffff, 0x06000100,
1033 	0x3c204, 0xffffffff, 0x00000100,
1034 	0x3c2e0, 0xffffffff, 0x00000100,
1035 	0x3c224, 0xffffffff, 0x00000100,
1036 	0x3c200, 0xffffffff, 0x00000100,
1037 	0x3c230, 0xffffffff, 0x00000100,
1038 	0x3c234, 0xffffffff, 0x00000100,
1039 	0x3c250, 0xffffffff, 0x00000100,
1040 	0x3c254, 0xffffffff, 0x00000100,
1041 	0x3c258, 0xffffffff, 0x00000100,
1042 	0x3c25c, 0xffffffff, 0x00000100,
1043 	0x3c260, 0xffffffff, 0x00000100,
1044 	0x3c27c, 0xffffffff, 0x00000100,
1045 	0x3c278, 0xffffffff, 0x00000100,
1046 	0x3c210, 0xffffffff, 0x06000100,
1047 	0x3c290, 0xffffffff, 0x00000100,
1048 	0x3c274, 0xffffffff, 0x00000100,
1049 	0x3c2b4, 0xffffffff, 0x00000100,
1050 	0x3c2b0, 0xffffffff, 0x00000100,
1051 	0x3c270, 0xffffffff, 0x00000100,
1052 	0x30800, 0xffffffff, 0xe0000000,
1053 	0x3c020, 0xffffffff, 0x00010000,
1054 	0x3c024, 0xffffffff, 0x00030002,
1055 	0x3c028, 0xffffffff, 0x00040007,
1056 	0x3c02c, 0xffffffff, 0x00060005,
1057 	0x3c030, 0xffffffff, 0x00090008,
1058 	0x3c034, 0xffffffff, 0x00010000,
1059 	0x3c038, 0xffffffff, 0x00030002,
1060 	0x3c03c, 0xffffffff, 0x00040007,
1061 	0x3c040, 0xffffffff, 0x00060005,
1062 	0x3c044, 0xffffffff, 0x00090008,
1063 	0x3c048, 0xffffffff, 0x00010000,
1064 	0x3c04c, 0xffffffff, 0x00030002,
1065 	0x3c050, 0xffffffff, 0x00040007,
1066 	0x3c054, 0xffffffff, 0x00060005,
1067 	0x3c058, 0xffffffff, 0x00090008,
1068 	0x3c05c, 0xffffffff, 0x00010000,
1069 	0x3c060, 0xffffffff, 0x00030002,
1070 	0x3c064, 0xffffffff, 0x00040007,
1071 	0x3c068, 0xffffffff, 0x00060005,
1072 	0x3c06c, 0xffffffff, 0x00090008,
1073 	0x3c070, 0xffffffff, 0x00010000,
1074 	0x3c074, 0xffffffff, 0x00030002,
1075 	0x3c078, 0xffffffff, 0x00040007,
1076 	0x3c07c, 0xffffffff, 0x00060005,
1077 	0x3c080, 0xffffffff, 0x00090008,
1078 	0x3c084, 0xffffffff, 0x00010000,
1079 	0x3c088, 0xffffffff, 0x00030002,
1080 	0x3c08c, 0xffffffff, 0x00040007,
1081 	0x3c090, 0xffffffff, 0x00060005,
1082 	0x3c094, 0xffffffff, 0x00090008,
1083 	0x3c098, 0xffffffff, 0x00010000,
1084 	0x3c09c, 0xffffffff, 0x00030002,
1085 	0x3c0a0, 0xffffffff, 0x00040007,
1086 	0x3c0a4, 0xffffffff, 0x00060005,
1087 	0x3c0a8, 0xffffffff, 0x00090008,
1088 	0x3c000, 0xffffffff, 0x96e00200,
1089 	0x8708, 0xffffffff, 0x00900100,
1090 	0xc424, 0xffffffff, 0x0020003f,
1091 	0x38, 0xffffffff, 0x0140001c,
1092 	0x3c, 0x000f0000, 0x000f0000,
1093 	0x220, 0xffffffff, 0xC060000C,
1094 	0x224, 0xc0000fff, 0x00000100,
1095 	0xf90, 0xffffffff, 0x00000100,
1096 	0xf98, 0x00000101, 0x00000000,
1097 	0x20a8, 0xffffffff, 0x00000104,
1098 	0x55e4, 0xff000fff, 0x00000100,
1099 	0x30cc, 0xc0000fff, 0x00000104,
1100 	0xc1e4, 0x00000001, 0x00000001,
1101 	0xd00c, 0xff000ff0, 0x00000100,
1102 	0xd80c, 0xff000ff0, 0x00000100
1103 };
1104 
1105 static const u32 spectre_golden_spm_registers[] =
1106 {
1107 	0x30800, 0xe0ffffff, 0xe0000000
1108 };
1109 
1110 static const u32 spectre_golden_common_registers[] =
1111 {
1112 	0xc770, 0xffffffff, 0x00000800,
1113 	0xc774, 0xffffffff, 0x00000800,
1114 	0xc798, 0xffffffff, 0x00007fbf,
1115 	0xc79c, 0xffffffff, 0x00007faf
1116 };
1117 
1118 static const u32 spectre_golden_registers[] =
1119 {
1120 	0x3c000, 0xffff1fff, 0x96940200,
1121 	0x3c00c, 0xffff0001, 0xff000000,
1122 	0x3c200, 0xfffc0fff, 0x00000100,
1123 	0x6ed8, 0x00010101, 0x00010000,
1124 	0x9834, 0xf00fffff, 0x00000400,
1125 	0x9838, 0xfffffffc, 0x00020200,
1126 	0x5bb0, 0x000000f0, 0x00000070,
1127 	0x5bc0, 0xf0311fff, 0x80300000,
1128 	0x98f8, 0x73773777, 0x12010001,
1129 	0x9b7c, 0x00ff0000, 0x00fc0000,
1130 	0x2f48, 0x73773777, 0x12010001,
1131 	0x8a14, 0xf000003f, 0x00000007,
1132 	0x8b24, 0xffffffff, 0x00ffffff,
1133 	0x28350, 0x3f3f3fff, 0x00000082,
1134 	0x28354, 0x0000003f, 0x00000000,
1135 	0x3e78, 0x00000001, 0x00000002,
1136 	0x913c, 0xffff03df, 0x00000004,
1137 	0xc768, 0x00000008, 0x00000008,
1138 	0x8c00, 0x000008ff, 0x00000800,
1139 	0x9508, 0x00010000, 0x00010000,
1140 	0xac0c, 0xffffffff, 0x54763210,
1141 	0x214f8, 0x01ff01ff, 0x00000002,
1142 	0x21498, 0x007ff800, 0x00200000,
1143 	0x2015c, 0xffffffff, 0x00000f40,
1144 	0x30934, 0xffffffff, 0x00000001
1145 };
1146 
1147 static const u32 spectre_mgcg_cgcg_init[] =
1148 {
1149 	0xc420, 0xffffffff, 0xfffffffc,
1150 	0x30800, 0xffffffff, 0xe0000000,
1151 	0x3c2a0, 0xffffffff, 0x00000100,
1152 	0x3c208, 0xffffffff, 0x00000100,
1153 	0x3c2c0, 0xffffffff, 0x00000100,
1154 	0x3c2c8, 0xffffffff, 0x00000100,
1155 	0x3c2c4, 0xffffffff, 0x00000100,
1156 	0x55e4, 0xffffffff, 0x00600100,
1157 	0x3c280, 0xffffffff, 0x00000100,
1158 	0x3c214, 0xffffffff, 0x06000100,
1159 	0x3c220, 0xffffffff, 0x00000100,
1160 	0x3c218, 0xffffffff, 0x06000100,
1161 	0x3c204, 0xffffffff, 0x00000100,
1162 	0x3c2e0, 0xffffffff, 0x00000100,
1163 	0x3c224, 0xffffffff, 0x00000100,
1164 	0x3c200, 0xffffffff, 0x00000100,
1165 	0x3c230, 0xffffffff, 0x00000100,
1166 	0x3c234, 0xffffffff, 0x00000100,
1167 	0x3c250, 0xffffffff, 0x00000100,
1168 	0x3c254, 0xffffffff, 0x00000100,
1169 	0x3c258, 0xffffffff, 0x00000100,
1170 	0x3c25c, 0xffffffff, 0x00000100,
1171 	0x3c260, 0xffffffff, 0x00000100,
1172 	0x3c27c, 0xffffffff, 0x00000100,
1173 	0x3c278, 0xffffffff, 0x00000100,
1174 	0x3c210, 0xffffffff, 0x06000100,
1175 	0x3c290, 0xffffffff, 0x00000100,
1176 	0x3c274, 0xffffffff, 0x00000100,
1177 	0x3c2b4, 0xffffffff, 0x00000100,
1178 	0x3c2b0, 0xffffffff, 0x00000100,
1179 	0x3c270, 0xffffffff, 0x00000100,
1180 	0x30800, 0xffffffff, 0xe0000000,
1181 	0x3c020, 0xffffffff, 0x00010000,
1182 	0x3c024, 0xffffffff, 0x00030002,
1183 	0x3c028, 0xffffffff, 0x00040007,
1184 	0x3c02c, 0xffffffff, 0x00060005,
1185 	0x3c030, 0xffffffff, 0x00090008,
1186 	0x3c034, 0xffffffff, 0x00010000,
1187 	0x3c038, 0xffffffff, 0x00030002,
1188 	0x3c03c, 0xffffffff, 0x00040007,
1189 	0x3c040, 0xffffffff, 0x00060005,
1190 	0x3c044, 0xffffffff, 0x00090008,
1191 	0x3c048, 0xffffffff, 0x00010000,
1192 	0x3c04c, 0xffffffff, 0x00030002,
1193 	0x3c050, 0xffffffff, 0x00040007,
1194 	0x3c054, 0xffffffff, 0x00060005,
1195 	0x3c058, 0xffffffff, 0x00090008,
1196 	0x3c05c, 0xffffffff, 0x00010000,
1197 	0x3c060, 0xffffffff, 0x00030002,
1198 	0x3c064, 0xffffffff, 0x00040007,
1199 	0x3c068, 0xffffffff, 0x00060005,
1200 	0x3c06c, 0xffffffff, 0x00090008,
1201 	0x3c070, 0xffffffff, 0x00010000,
1202 	0x3c074, 0xffffffff, 0x00030002,
1203 	0x3c078, 0xffffffff, 0x00040007,
1204 	0x3c07c, 0xffffffff, 0x00060005,
1205 	0x3c080, 0xffffffff, 0x00090008,
1206 	0x3c084, 0xffffffff, 0x00010000,
1207 	0x3c088, 0xffffffff, 0x00030002,
1208 	0x3c08c, 0xffffffff, 0x00040007,
1209 	0x3c090, 0xffffffff, 0x00060005,
1210 	0x3c094, 0xffffffff, 0x00090008,
1211 	0x3c098, 0xffffffff, 0x00010000,
1212 	0x3c09c, 0xffffffff, 0x00030002,
1213 	0x3c0a0, 0xffffffff, 0x00040007,
1214 	0x3c0a4, 0xffffffff, 0x00060005,
1215 	0x3c0a8, 0xffffffff, 0x00090008,
1216 	0x3c0ac, 0xffffffff, 0x00010000,
1217 	0x3c0b0, 0xffffffff, 0x00030002,
1218 	0x3c0b4, 0xffffffff, 0x00040007,
1219 	0x3c0b8, 0xffffffff, 0x00060005,
1220 	0x3c0bc, 0xffffffff, 0x00090008,
1221 	0x3c000, 0xffffffff, 0x96e00200,
1222 	0x8708, 0xffffffff, 0x00900100,
1223 	0xc424, 0xffffffff, 0x0020003f,
1224 	0x38, 0xffffffff, 0x0140001c,
1225 	0x3c, 0x000f0000, 0x000f0000,
1226 	0x220, 0xffffffff, 0xC060000C,
1227 	0x224, 0xc0000fff, 0x00000100,
1228 	0xf90, 0xffffffff, 0x00000100,
1229 	0xf98, 0x00000101, 0x00000000,
1230 	0x20a8, 0xffffffff, 0x00000104,
1231 	0x55e4, 0xff000fff, 0x00000100,
1232 	0x30cc, 0xc0000fff, 0x00000104,
1233 	0xc1e4, 0x00000001, 0x00000001,
1234 	0xd00c, 0xff000ff0, 0x00000100,
1235 	0xd80c, 0xff000ff0, 0x00000100
1236 };
1237 
1238 static const u32 kalindi_golden_spm_registers[] =
1239 {
1240 	0x30800, 0xe0ffffff, 0xe0000000
1241 };
1242 
1243 static const u32 kalindi_golden_common_registers[] =
1244 {
1245 	0xc770, 0xffffffff, 0x00000800,
1246 	0xc774, 0xffffffff, 0x00000800,
1247 	0xc798, 0xffffffff, 0x00007fbf,
1248 	0xc79c, 0xffffffff, 0x00007faf
1249 };
1250 
1251 static const u32 kalindi_golden_registers[] =
1252 {
1253 	0x3c000, 0xffffdfff, 0x6e944040,
1254 	0x55e4, 0xff607fff, 0xfc000100,
1255 	0x3c220, 0xff000fff, 0x00000100,
1256 	0x3c224, 0xff000fff, 0x00000100,
1257 	0x3c200, 0xfffc0fff, 0x00000100,
1258 	0x6ed8, 0x00010101, 0x00010000,
1259 	0x9830, 0xffffffff, 0x00000000,
1260 	0x9834, 0xf00fffff, 0x00000400,
1261 	0x5bb0, 0x000000f0, 0x00000070,
1262 	0x5bc0, 0xf0311fff, 0x80300000,
1263 	0x98f8, 0x73773777, 0x12010001,
1264 	0x98fc, 0xffffffff, 0x00000010,
1265 	0x9b7c, 0x00ff0000, 0x00fc0000,
1266 	0x8030, 0x00001f0f, 0x0000100a,
1267 	0x2f48, 0x73773777, 0x12010001,
1268 	0x2408, 0x000fffff, 0x000c007f,
1269 	0x8a14, 0xf000003f, 0x00000007,
1270 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1271 	0x30a04, 0x0000ff0f, 0x00000000,
1272 	0x28a4c, 0x07ffffff, 0x06000000,
1273 	0x4d8, 0x00000fff, 0x00000100,
1274 	0x3e78, 0x00000001, 0x00000002,
1275 	0xc768, 0x00000008, 0x00000008,
1276 	0x8c00, 0x000000ff, 0x00000003,
1277 	0x214f8, 0x01ff01ff, 0x00000002,
1278 	0x21498, 0x007ff800, 0x00200000,
1279 	0x2015c, 0xffffffff, 0x00000f40,
1280 	0x88c4, 0x001f3ae3, 0x00000082,
1281 	0x88d4, 0x0000001f, 0x00000010,
1282 	0x30934, 0xffffffff, 0x00000000
1283 };
1284 
1285 static const u32 kalindi_mgcg_cgcg_init[] =
1286 {
1287 	0xc420, 0xffffffff, 0xfffffffc,
1288 	0x30800, 0xffffffff, 0xe0000000,
1289 	0x3c2a0, 0xffffffff, 0x00000100,
1290 	0x3c208, 0xffffffff, 0x00000100,
1291 	0x3c2c0, 0xffffffff, 0x00000100,
1292 	0x3c2c8, 0xffffffff, 0x00000100,
1293 	0x3c2c4, 0xffffffff, 0x00000100,
1294 	0x55e4, 0xffffffff, 0x00600100,
1295 	0x3c280, 0xffffffff, 0x00000100,
1296 	0x3c214, 0xffffffff, 0x06000100,
1297 	0x3c220, 0xffffffff, 0x00000100,
1298 	0x3c218, 0xffffffff, 0x06000100,
1299 	0x3c204, 0xffffffff, 0x00000100,
1300 	0x3c2e0, 0xffffffff, 0x00000100,
1301 	0x3c224, 0xffffffff, 0x00000100,
1302 	0x3c200, 0xffffffff, 0x00000100,
1303 	0x3c230, 0xffffffff, 0x00000100,
1304 	0x3c234, 0xffffffff, 0x00000100,
1305 	0x3c250, 0xffffffff, 0x00000100,
1306 	0x3c254, 0xffffffff, 0x00000100,
1307 	0x3c258, 0xffffffff, 0x00000100,
1308 	0x3c25c, 0xffffffff, 0x00000100,
1309 	0x3c260, 0xffffffff, 0x00000100,
1310 	0x3c27c, 0xffffffff, 0x00000100,
1311 	0x3c278, 0xffffffff, 0x00000100,
1312 	0x3c210, 0xffffffff, 0x06000100,
1313 	0x3c290, 0xffffffff, 0x00000100,
1314 	0x3c274, 0xffffffff, 0x00000100,
1315 	0x3c2b4, 0xffffffff, 0x00000100,
1316 	0x3c2b0, 0xffffffff, 0x00000100,
1317 	0x3c270, 0xffffffff, 0x00000100,
1318 	0x30800, 0xffffffff, 0xe0000000,
1319 	0x3c020, 0xffffffff, 0x00010000,
1320 	0x3c024, 0xffffffff, 0x00030002,
1321 	0x3c028, 0xffffffff, 0x00040007,
1322 	0x3c02c, 0xffffffff, 0x00060005,
1323 	0x3c030, 0xffffffff, 0x00090008,
1324 	0x3c034, 0xffffffff, 0x00010000,
1325 	0x3c038, 0xffffffff, 0x00030002,
1326 	0x3c03c, 0xffffffff, 0x00040007,
1327 	0x3c040, 0xffffffff, 0x00060005,
1328 	0x3c044, 0xffffffff, 0x00090008,
1329 	0x3c000, 0xffffffff, 0x96e00200,
1330 	0x8708, 0xffffffff, 0x00900100,
1331 	0xc424, 0xffffffff, 0x0020003f,
1332 	0x38, 0xffffffff, 0x0140001c,
1333 	0x3c, 0x000f0000, 0x000f0000,
1334 	0x220, 0xffffffff, 0xC060000C,
1335 	0x224, 0xc0000fff, 0x00000100,
1336 	0x20a8, 0xffffffff, 0x00000104,
1337 	0x55e4, 0xff000fff, 0x00000100,
1338 	0x30cc, 0xc0000fff, 0x00000104,
1339 	0xc1e4, 0x00000001, 0x00000001,
1340 	0xd00c, 0xff000ff0, 0x00000100,
1341 	0xd80c, 0xff000ff0, 0x00000100
1342 };
1343 
1344 static const u32 hawaii_golden_spm_registers[] =
1345 {
1346 	0x30800, 0xe0ffffff, 0xe0000000
1347 };
1348 
1349 static const u32 hawaii_golden_common_registers[] =
1350 {
1351 	0x30800, 0xffffffff, 0xe0000000,
1352 	0x28350, 0xffffffff, 0x3a00161a,
1353 	0x28354, 0xffffffff, 0x0000002e,
1354 	0x9a10, 0xffffffff, 0x00018208,
1355 	0x98f8, 0xffffffff, 0x12011003
1356 };
1357 
1358 static const u32 hawaii_golden_registers[] =
1359 {
1360 	0x3354, 0x00000333, 0x00000333,
1361 	0x9a10, 0x00010000, 0x00058208,
1362 	0x9830, 0xffffffff, 0x00000000,
1363 	0x9834, 0xf00fffff, 0x00000400,
1364 	0x9838, 0x0002021c, 0x00020200,
1365 	0xc78, 0x00000080, 0x00000000,
1366 	0x5bb0, 0x000000f0, 0x00000070,
1367 	0x5bc0, 0xf0311fff, 0x80300000,
1368 	0x350c, 0x00810000, 0x408af000,
1369 	0x7030, 0x31000111, 0x00000011,
1370 	0x2f48, 0x73773777, 0x12010001,
1371 	0x2120, 0x0000007f, 0x0000001b,
1372 	0x21dc, 0x00007fb6, 0x00002191,
1373 	0x3628, 0x0000003f, 0x0000000a,
1374 	0x362c, 0x0000003f, 0x0000000a,
1375 	0x2ae4, 0x00073ffe, 0x000022a2,
1376 	0x240c, 0x000007ff, 0x00000000,
1377 	0x8bf0, 0x00002001, 0x00000001,
1378 	0x8b24, 0xffffffff, 0x00ffffff,
1379 	0x30a04, 0x0000ff0f, 0x00000000,
1380 	0x28a4c, 0x07ffffff, 0x06000000,
1381 	0x3e78, 0x00000001, 0x00000002,
1382 	0xc768, 0x00000008, 0x00000008,
1383 	0xc770, 0x00000f00, 0x00000800,
1384 	0xc774, 0x00000f00, 0x00000800,
1385 	0xc798, 0x00ffffff, 0x00ff7fbf,
1386 	0xc79c, 0x00ffffff, 0x00ff7faf,
1387 	0x8c00, 0x000000ff, 0x00000800,
1388 	0xe40, 0x00001fff, 0x00001fff,
1389 	0x9060, 0x0000007f, 0x00000020,
1390 	0x9508, 0x00010000, 0x00010000,
1391 	0xae00, 0x00100000, 0x000ff07c,
1392 	0xac14, 0x000003ff, 0x0000000f,
1393 	0xac10, 0xffffffff, 0x7564fdec,
1394 	0xac0c, 0xffffffff, 0x3120b9a8,
1395 	0xac08, 0x20000000, 0x0f9c0000
1396 };
1397 
1398 static const u32 hawaii_mgcg_cgcg_init[] =
1399 {
1400 	0xc420, 0xffffffff, 0xfffffffd,
1401 	0x30800, 0xffffffff, 0xe0000000,
1402 	0x3c2a0, 0xffffffff, 0x00000100,
1403 	0x3c208, 0xffffffff, 0x00000100,
1404 	0x3c2c0, 0xffffffff, 0x00000100,
1405 	0x3c2c8, 0xffffffff, 0x00000100,
1406 	0x3c2c4, 0xffffffff, 0x00000100,
1407 	0x55e4, 0xffffffff, 0x00200100,
1408 	0x3c280, 0xffffffff, 0x00000100,
1409 	0x3c214, 0xffffffff, 0x06000100,
1410 	0x3c220, 0xffffffff, 0x00000100,
1411 	0x3c218, 0xffffffff, 0x06000100,
1412 	0x3c204, 0xffffffff, 0x00000100,
1413 	0x3c2e0, 0xffffffff, 0x00000100,
1414 	0x3c224, 0xffffffff, 0x00000100,
1415 	0x3c200, 0xffffffff, 0x00000100,
1416 	0x3c230, 0xffffffff, 0x00000100,
1417 	0x3c234, 0xffffffff, 0x00000100,
1418 	0x3c250, 0xffffffff, 0x00000100,
1419 	0x3c254, 0xffffffff, 0x00000100,
1420 	0x3c258, 0xffffffff, 0x00000100,
1421 	0x3c25c, 0xffffffff, 0x00000100,
1422 	0x3c260, 0xffffffff, 0x00000100,
1423 	0x3c27c, 0xffffffff, 0x00000100,
1424 	0x3c278, 0xffffffff, 0x00000100,
1425 	0x3c210, 0xffffffff, 0x06000100,
1426 	0x3c290, 0xffffffff, 0x00000100,
1427 	0x3c274, 0xffffffff, 0x00000100,
1428 	0x3c2b4, 0xffffffff, 0x00000100,
1429 	0x3c2b0, 0xffffffff, 0x00000100,
1430 	0x3c270, 0xffffffff, 0x00000100,
1431 	0x30800, 0xffffffff, 0xe0000000,
1432 	0x3c020, 0xffffffff, 0x00010000,
1433 	0x3c024, 0xffffffff, 0x00030002,
1434 	0x3c028, 0xffffffff, 0x00040007,
1435 	0x3c02c, 0xffffffff, 0x00060005,
1436 	0x3c030, 0xffffffff, 0x00090008,
1437 	0x3c034, 0xffffffff, 0x00010000,
1438 	0x3c038, 0xffffffff, 0x00030002,
1439 	0x3c03c, 0xffffffff, 0x00040007,
1440 	0x3c040, 0xffffffff, 0x00060005,
1441 	0x3c044, 0xffffffff, 0x00090008,
1442 	0x3c048, 0xffffffff, 0x00010000,
1443 	0x3c04c, 0xffffffff, 0x00030002,
1444 	0x3c050, 0xffffffff, 0x00040007,
1445 	0x3c054, 0xffffffff, 0x00060005,
1446 	0x3c058, 0xffffffff, 0x00090008,
1447 	0x3c05c, 0xffffffff, 0x00010000,
1448 	0x3c060, 0xffffffff, 0x00030002,
1449 	0x3c064, 0xffffffff, 0x00040007,
1450 	0x3c068, 0xffffffff, 0x00060005,
1451 	0x3c06c, 0xffffffff, 0x00090008,
1452 	0x3c070, 0xffffffff, 0x00010000,
1453 	0x3c074, 0xffffffff, 0x00030002,
1454 	0x3c078, 0xffffffff, 0x00040007,
1455 	0x3c07c, 0xffffffff, 0x00060005,
1456 	0x3c080, 0xffffffff, 0x00090008,
1457 	0x3c084, 0xffffffff, 0x00010000,
1458 	0x3c088, 0xffffffff, 0x00030002,
1459 	0x3c08c, 0xffffffff, 0x00040007,
1460 	0x3c090, 0xffffffff, 0x00060005,
1461 	0x3c094, 0xffffffff, 0x00090008,
1462 	0x3c098, 0xffffffff, 0x00010000,
1463 	0x3c09c, 0xffffffff, 0x00030002,
1464 	0x3c0a0, 0xffffffff, 0x00040007,
1465 	0x3c0a4, 0xffffffff, 0x00060005,
1466 	0x3c0a8, 0xffffffff, 0x00090008,
1467 	0x3c0ac, 0xffffffff, 0x00010000,
1468 	0x3c0b0, 0xffffffff, 0x00030002,
1469 	0x3c0b4, 0xffffffff, 0x00040007,
1470 	0x3c0b8, 0xffffffff, 0x00060005,
1471 	0x3c0bc, 0xffffffff, 0x00090008,
1472 	0x3c0c0, 0xffffffff, 0x00010000,
1473 	0x3c0c4, 0xffffffff, 0x00030002,
1474 	0x3c0c8, 0xffffffff, 0x00040007,
1475 	0x3c0cc, 0xffffffff, 0x00060005,
1476 	0x3c0d0, 0xffffffff, 0x00090008,
1477 	0x3c0d4, 0xffffffff, 0x00010000,
1478 	0x3c0d8, 0xffffffff, 0x00030002,
1479 	0x3c0dc, 0xffffffff, 0x00040007,
1480 	0x3c0e0, 0xffffffff, 0x00060005,
1481 	0x3c0e4, 0xffffffff, 0x00090008,
1482 	0x3c0e8, 0xffffffff, 0x00010000,
1483 	0x3c0ec, 0xffffffff, 0x00030002,
1484 	0x3c0f0, 0xffffffff, 0x00040007,
1485 	0x3c0f4, 0xffffffff, 0x00060005,
1486 	0x3c0f8, 0xffffffff, 0x00090008,
1487 	0xc318, 0xffffffff, 0x00020200,
1488 	0x3350, 0xffffffff, 0x00000200,
1489 	0x15c0, 0xffffffff, 0x00000400,
1490 	0x55e8, 0xffffffff, 0x00000000,
1491 	0x2f50, 0xffffffff, 0x00000902,
1492 	0x3c000, 0xffffffff, 0x96940200,
1493 	0x8708, 0xffffffff, 0x00900100,
1494 	0xc424, 0xffffffff, 0x0020003f,
1495 	0x38, 0xffffffff, 0x0140001c,
1496 	0x3c, 0x000f0000, 0x000f0000,
1497 	0x220, 0xffffffff, 0xc060000c,
1498 	0x224, 0xc0000fff, 0x00000100,
1499 	0xf90, 0xffffffff, 0x00000100,
1500 	0xf98, 0x00000101, 0x00000000,
1501 	0x20a8, 0xffffffff, 0x00000104,
1502 	0x55e4, 0xff000fff, 0x00000100,
1503 	0x30cc, 0xc0000fff, 0x00000104,
1504 	0xc1e4, 0x00000001, 0x00000001,
1505 	0xd00c, 0xff000ff0, 0x00000100,
1506 	0xd80c, 0xff000ff0, 0x00000100
1507 };
1508 
1509 static const u32 godavari_golden_registers[] =
1510 {
1511 	0x55e4, 0xff607fff, 0xfc000100,
1512 	0x6ed8, 0x00010101, 0x00010000,
1513 	0x9830, 0xffffffff, 0x00000000,
1514 	0x98302, 0xf00fffff, 0x00000400,
1515 	0x6130, 0xffffffff, 0x00010000,
1516 	0x5bb0, 0x000000f0, 0x00000070,
1517 	0x5bc0, 0xf0311fff, 0x80300000,
1518 	0x98f8, 0x73773777, 0x12010001,
1519 	0x98fc, 0xffffffff, 0x00000010,
1520 	0x8030, 0x00001f0f, 0x0000100a,
1521 	0x2f48, 0x73773777, 0x12010001,
1522 	0x2408, 0x000fffff, 0x000c007f,
1523 	0x8a14, 0xf000003f, 0x00000007,
1524 	0x8b24, 0xffffffff, 0x00ff0fff,
1525 	0x30a04, 0x0000ff0f, 0x00000000,
1526 	0x28a4c, 0x07ffffff, 0x06000000,
1527 	0x4d8, 0x00000fff, 0x00000100,
1528 	0xd014, 0x00010000, 0x00810001,
1529 	0xd814, 0x00010000, 0x00810001,
1530 	0x3e78, 0x00000001, 0x00000002,
1531 	0xc768, 0x00000008, 0x00000008,
1532 	0xc770, 0x00000f00, 0x00000800,
1533 	0xc774, 0x00000f00, 0x00000800,
1534 	0xc798, 0x00ffffff, 0x00ff7fbf,
1535 	0xc79c, 0x00ffffff, 0x00ff7faf,
1536 	0x8c00, 0x000000ff, 0x00000001,
1537 	0x214f8, 0x01ff01ff, 0x00000002,
1538 	0x21498, 0x007ff800, 0x00200000,
1539 	0x2015c, 0xffffffff, 0x00000f40,
1540 	0x88c4, 0x001f3ae3, 0x00000082,
1541 	0x88d4, 0x0000001f, 0x00000010,
1542 	0x30934, 0xffffffff, 0x00000000
1543 };
1544 
1545 
1546 static void cik_init_golden_registers(struct radeon_device *rdev)
1547 {
1548 	switch (rdev->family) {
1549 	case CHIP_BONAIRE:
1550 		radeon_program_register_sequence(rdev,
1551 						 bonaire_mgcg_cgcg_init,
1552 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1553 		radeon_program_register_sequence(rdev,
1554 						 bonaire_golden_registers,
1555 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1556 		radeon_program_register_sequence(rdev,
1557 						 bonaire_golden_common_registers,
1558 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1559 		radeon_program_register_sequence(rdev,
1560 						 bonaire_golden_spm_registers,
1561 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1562 		break;
1563 	case CHIP_KABINI:
1564 		radeon_program_register_sequence(rdev,
1565 						 kalindi_mgcg_cgcg_init,
1566 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1567 		radeon_program_register_sequence(rdev,
1568 						 kalindi_golden_registers,
1569 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1570 		radeon_program_register_sequence(rdev,
1571 						 kalindi_golden_common_registers,
1572 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1573 		radeon_program_register_sequence(rdev,
1574 						 kalindi_golden_spm_registers,
1575 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1576 		break;
1577 	case CHIP_MULLINS:
1578 		radeon_program_register_sequence(rdev,
1579 						 kalindi_mgcg_cgcg_init,
1580 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1581 		radeon_program_register_sequence(rdev,
1582 						 godavari_golden_registers,
1583 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1584 		radeon_program_register_sequence(rdev,
1585 						 kalindi_golden_common_registers,
1586 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1587 		radeon_program_register_sequence(rdev,
1588 						 kalindi_golden_spm_registers,
1589 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1590 		break;
1591 	case CHIP_KAVERI:
1592 		radeon_program_register_sequence(rdev,
1593 						 spectre_mgcg_cgcg_init,
1594 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1595 		radeon_program_register_sequence(rdev,
1596 						 spectre_golden_registers,
1597 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1598 		radeon_program_register_sequence(rdev,
1599 						 spectre_golden_common_registers,
1600 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1601 		radeon_program_register_sequence(rdev,
1602 						 spectre_golden_spm_registers,
1603 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1604 		break;
1605 	case CHIP_HAWAII:
1606 		radeon_program_register_sequence(rdev,
1607 						 hawaii_mgcg_cgcg_init,
1608 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1609 		radeon_program_register_sequence(rdev,
1610 						 hawaii_golden_registers,
1611 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1612 		radeon_program_register_sequence(rdev,
1613 						 hawaii_golden_common_registers,
1614 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1615 		radeon_program_register_sequence(rdev,
1616 						 hawaii_golden_spm_registers,
1617 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1618 		break;
1619 	default:
1620 		break;
1621 	}
1622 }
1623 
1624 /**
1625  * cik_get_xclk - get the xclk
1626  *
1627  * @rdev: radeon_device pointer
1628  *
1629  * Returns the reference clock used by the gfx engine
1630  * (CIK).
1631  */
1632 u32 cik_get_xclk(struct radeon_device *rdev)
1633 {
1634         u32 reference_clock = rdev->clock.spll.reference_freq;
1635 
1636 	if (rdev->flags & RADEON_IS_IGP) {
1637 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1638 			return reference_clock / 2;
1639 	} else {
1640 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1641 			return reference_clock / 4;
1642 	}
1643 	return reference_clock;
1644 }
1645 
1646 /**
1647  * cik_mm_rdoorbell - read a doorbell dword
1648  *
1649  * @rdev: radeon_device pointer
1650  * @index: doorbell index
1651  *
1652  * Returns the value in the doorbell aperture at the
1653  * requested doorbell index (CIK).
1654  */
1655 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1656 {
1657 	if (index < rdev->doorbell.num_doorbells) {
1658 		return readl(rdev->doorbell.ptr + index);
1659 	} else {
1660 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1661 		return 0;
1662 	}
1663 }
1664 
1665 /**
1666  * cik_mm_wdoorbell - write a doorbell dword
1667  *
1668  * @rdev: radeon_device pointer
1669  * @index: doorbell index
1670  * @v: value to write
1671  *
1672  * Writes @v to the doorbell aperture at the
1673  * requested doorbell index (CIK).
1674  */
1675 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1676 {
1677 	if (index < rdev->doorbell.num_doorbells) {
1678 		writel(v, rdev->doorbell.ptr + index);
1679 	} else {
1680 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1681 	}
1682 }
1683 
1684 #define BONAIRE_IO_MC_REGS_SIZE 36
1685 
1686 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1687 {
1688 	{0x00000070, 0x04400000},
1689 	{0x00000071, 0x80c01803},
1690 	{0x00000072, 0x00004004},
1691 	{0x00000073, 0x00000100},
1692 	{0x00000074, 0x00ff0000},
1693 	{0x00000075, 0x34000000},
1694 	{0x00000076, 0x08000014},
1695 	{0x00000077, 0x00cc08ec},
1696 	{0x00000078, 0x00000400},
1697 	{0x00000079, 0x00000000},
1698 	{0x0000007a, 0x04090000},
1699 	{0x0000007c, 0x00000000},
1700 	{0x0000007e, 0x4408a8e8},
1701 	{0x0000007f, 0x00000304},
1702 	{0x00000080, 0x00000000},
1703 	{0x00000082, 0x00000001},
1704 	{0x00000083, 0x00000002},
1705 	{0x00000084, 0xf3e4f400},
1706 	{0x00000085, 0x052024e3},
1707 	{0x00000087, 0x00000000},
1708 	{0x00000088, 0x01000000},
1709 	{0x0000008a, 0x1c0a0000},
1710 	{0x0000008b, 0xff010000},
1711 	{0x0000008d, 0xffffefff},
1712 	{0x0000008e, 0xfff3efff},
1713 	{0x0000008f, 0xfff3efbf},
1714 	{0x00000092, 0xf7ffffff},
1715 	{0x00000093, 0xffffff7f},
1716 	{0x00000095, 0x00101101},
1717 	{0x00000096, 0x00000fff},
1718 	{0x00000097, 0x00116fff},
1719 	{0x00000098, 0x60010000},
1720 	{0x00000099, 0x10010000},
1721 	{0x0000009a, 0x00006000},
1722 	{0x0000009b, 0x00001000},
1723 	{0x0000009f, 0x00b48000}
1724 };
1725 
1726 #define HAWAII_IO_MC_REGS_SIZE 22
1727 
1728 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1729 {
1730 	{0x0000007d, 0x40000000},
1731 	{0x0000007e, 0x40180304},
1732 	{0x0000007f, 0x0000ff00},
1733 	{0x00000081, 0x00000000},
1734 	{0x00000083, 0x00000800},
1735 	{0x00000086, 0x00000000},
1736 	{0x00000087, 0x00000100},
1737 	{0x00000088, 0x00020100},
1738 	{0x00000089, 0x00000000},
1739 	{0x0000008b, 0x00040000},
1740 	{0x0000008c, 0x00000100},
1741 	{0x0000008e, 0xff010000},
1742 	{0x00000090, 0xffffefff},
1743 	{0x00000091, 0xfff3efff},
1744 	{0x00000092, 0xfff3efbf},
1745 	{0x00000093, 0xf7ffffff},
1746 	{0x00000094, 0xffffff7f},
1747 	{0x00000095, 0x00000fff},
1748 	{0x00000096, 0x00116fff},
1749 	{0x00000097, 0x60010000},
1750 	{0x00000098, 0x10010000},
1751 	{0x0000009f, 0x00c79000}
1752 };
1753 
1754 
1755 /**
1756  * cik_srbm_select - select specific register instances
1757  *
1758  * @rdev: radeon_device pointer
1759  * @me: selected ME (micro engine)
1760  * @pipe: pipe
1761  * @queue: queue
1762  * @vmid: VMID
1763  *
1764  * Switches the currently active registers instances.  Some
1765  * registers are instanced per VMID, others are instanced per
1766  * me/pipe/queue combination.
1767  */
1768 static void cik_srbm_select(struct radeon_device *rdev,
1769 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1770 {
1771 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1772 			     MEID(me & 0x3) |
1773 			     VMID(vmid & 0xf) |
1774 			     QUEUEID(queue & 0x7));
1775 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1776 }
1777 
1778 /* ucode loading */
1779 /**
1780  * ci_mc_load_microcode - load MC ucode into the hw
1781  *
1782  * @rdev: radeon_device pointer
1783  *
1784  * Load the GDDR MC ucode into the hw (CIK).
1785  * Returns 0 on success, error on failure.
1786  */
1787 int ci_mc_load_microcode(struct radeon_device *rdev)
1788 {
1789 	const __be32 *fw_data = NULL;
1790 	const __le32 *new_fw_data = NULL;
1791 	u32 running, blackout = 0;
1792 	u32 *io_mc_regs = NULL;
1793 	const __le32 *new_io_mc_regs = NULL;
1794 	int i, regs_size, ucode_size;
1795 
1796 	if (!rdev->mc_fw)
1797 		return -EINVAL;
1798 
1799 	if (rdev->new_fw) {
1800 		const struct mc_firmware_header_v1_0 *hdr =
1801 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1802 
1803 		radeon_ucode_print_mc_hdr(&hdr->header);
1804 
1805 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1806 		new_io_mc_regs = (const __le32 *)
1807 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1808 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1809 		new_fw_data = (const __le32 *)
1810 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1811 	} else {
1812 		ucode_size = rdev->mc_fw->datasize / 4;
1813 
1814 		switch (rdev->family) {
1815 		case CHIP_BONAIRE:
1816 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1817 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1818 			break;
1819 		case CHIP_HAWAII:
1820 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1821 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1822 			break;
1823 		default:
1824 			return -EINVAL;
1825 		}
1826 		fw_data = (const __be32 *)rdev->mc_fw->data;
1827 	}
1828 
1829 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1830 
1831 	if (running == 0) {
1832 		if (running) {
1833 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1834 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1835 		}
1836 
1837 		/* reset the engine and set to writable */
1838 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1839 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1840 
1841 		/* load mc io regs */
1842 		for (i = 0; i < regs_size; i++) {
1843 			if (rdev->new_fw) {
1844 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1845 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1846 			} else {
1847 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1848 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1849 			}
1850 		}
1851 		/* load the MC ucode */
1852 		for (i = 0; i < ucode_size; i++) {
1853 			if (rdev->new_fw)
1854 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1855 			else
1856 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1857 		}
1858 
1859 		/* put the engine back into the active state */
1860 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1861 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1862 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1863 
1864 		/* wait for training to complete */
1865 		for (i = 0; i < rdev->usec_timeout; i++) {
1866 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1867 				break;
1868 			udelay(1);
1869 		}
1870 		for (i = 0; i < rdev->usec_timeout; i++) {
1871 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1872 				break;
1873 			udelay(1);
1874 		}
1875 
1876 		if (running)
1877 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1878 	}
1879 
1880 	return 0;
1881 }
1882 
1883 /**
1884  * cik_init_microcode - load ucode images from disk
1885  *
1886  * @rdev: radeon_device pointer
1887  *
1888  * Use the firmware interface to load the ucode images into
1889  * the driver (not loaded into hw).
1890  * Returns 0 on success, error on failure.
1891  */
1892 static int cik_init_microcode(struct radeon_device *rdev)
1893 {
1894 	const char *chip_name;
1895 	const char *new_chip_name;
1896 	size_t pfp_req_size, me_req_size, ce_req_size,
1897 		mec_req_size, rlc_req_size, mc_req_size = 0,
1898 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1899 	char fw_name[30];
1900 	int new_fw = 0;
1901 	int err;
1902 	int num_fw;
1903 
1904 	DRM_DEBUG("\n");
1905 
1906 	switch (rdev->family) {
1907 	case CHIP_BONAIRE:
1908 		chip_name = "BONAIRE";
1909 		new_chip_name = "bonaire";
1910 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1911 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1912 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1913 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1914 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1915 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1916 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1917 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1918 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1919 		num_fw = 8;
1920 		break;
1921 	case CHIP_HAWAII:
1922 		chip_name = "HAWAII";
1923 		new_chip_name = "hawaii";
1924 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1925 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1926 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1927 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1928 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1929 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1930 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1931 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1932 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1933 		num_fw = 8;
1934 		break;
1935 	case CHIP_KAVERI:
1936 		chip_name = "KAVERI";
1937 		new_chip_name = "kaveri";
1938 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1939 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1940 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1941 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1942 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1943 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1944 		num_fw = 7;
1945 		break;
1946 	case CHIP_KABINI:
1947 		chip_name = "KABINI";
1948 		new_chip_name = "kabini";
1949 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1950 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1951 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1952 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1953 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1954 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1955 		num_fw = 6;
1956 		break;
1957 	case CHIP_MULLINS:
1958 		chip_name = "MULLINS";
1959 		new_chip_name = "mullins";
1960 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1961 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1962 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1963 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1964 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1965 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1966 		num_fw = 6;
1967 		break;
1968 	default: BUG();
1969 	}
1970 
1971 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1972 
1973 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1974 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1975 	if (err) {
1976 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1977 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1978 		if (err)
1979 			goto out;
1980 		if (rdev->pfp_fw->datasize != pfp_req_size) {
1981 			printk(KERN_ERR
1982 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1983 			       rdev->pfp_fw->datasize, fw_name);
1984 			err = -EINVAL;
1985 			goto out;
1986 		}
1987 	} else {
1988 		err = radeon_ucode_validate(rdev->pfp_fw);
1989 		if (err) {
1990 			printk(KERN_ERR
1991 			       "cik_fw: validation failed for firmware \"%s\"\n",
1992 			       fw_name);
1993 			goto out;
1994 		} else {
1995 			new_fw++;
1996 		}
1997 	}
1998 
1999 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
2000 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2001 	if (err) {
2002 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
2003 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2004 		if (err)
2005 			goto out;
2006 		if (rdev->me_fw->datasize != me_req_size) {
2007 			printk(KERN_ERR
2008 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2009 			       rdev->me_fw->datasize, fw_name);
2010 			err = -EINVAL;
2011 		}
2012 	} else {
2013 		err = radeon_ucode_validate(rdev->me_fw);
2014 		if (err) {
2015 			printk(KERN_ERR
2016 			       "cik_fw: validation failed for firmware \"%s\"\n",
2017 			       fw_name);
2018 			goto out;
2019 		} else {
2020 			new_fw++;
2021 		}
2022 	}
2023 
2024 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
2025 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2026 	if (err) {
2027 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
2028 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2029 		if (err)
2030 			goto out;
2031 		if (rdev->ce_fw->datasize != ce_req_size) {
2032 			printk(KERN_ERR
2033 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2034 			       rdev->ce_fw->datasize, fw_name);
2035 			err = -EINVAL;
2036 		}
2037 	} else {
2038 		err = radeon_ucode_validate(rdev->ce_fw);
2039 		if (err) {
2040 			printk(KERN_ERR
2041 			       "cik_fw: validation failed for firmware \"%s\"\n",
2042 			       fw_name);
2043 			goto out;
2044 		} else {
2045 			new_fw++;
2046 		}
2047 	}
2048 
2049 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name);
2050 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2051 	if (err) {
2052 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
2053 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2054 		if (err)
2055 			goto out;
2056 		if (rdev->mec_fw->datasize != mec_req_size) {
2057 			printk(KERN_ERR
2058 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2059 			       rdev->mec_fw->datasize, fw_name);
2060 			err = -EINVAL;
2061 		}
2062 	} else {
2063 		err = radeon_ucode_validate(rdev->mec_fw);
2064 		if (err) {
2065 			printk(KERN_ERR
2066 			       "cik_fw: validation failed for firmware \"%s\"\n",
2067 			       fw_name);
2068 			goto out;
2069 		} else {
2070 			new_fw++;
2071 		}
2072 	}
2073 
2074 	if (rdev->family == CHIP_KAVERI) {
2075 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name);
2076 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2077 		if (err) {
2078 			goto out;
2079 		} else {
2080 			err = radeon_ucode_validate(rdev->mec2_fw);
2081 			if (err) {
2082 				goto out;
2083 			} else {
2084 				new_fw++;
2085 			}
2086 		}
2087 	}
2088 
2089 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
2090 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2091 	if (err) {
2092 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
2093 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2094 		if (err)
2095 			goto out;
2096 		if (rdev->rlc_fw->datasize != rlc_req_size) {
2097 			printk(KERN_ERR
2098 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->rlc_fw->datasize, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->rlc_fw);
2104 		if (err) {
2105 			printk(KERN_ERR
2106 			       "cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name);
2115 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
2118 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->sdma_fw->datasize != sdma_req_size) {
2122 			printk(KERN_ERR
2123 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2124 			       rdev->sdma_fw->datasize, fw_name);
2125 			err = -EINVAL;
2126 		}
2127 	} else {
2128 		err = radeon_ucode_validate(rdev->sdma_fw);
2129 		if (err) {
2130 			printk(KERN_ERR
2131 			       "cik_fw: validation failed for firmware \"%s\"\n",
2132 			       fw_name);
2133 			goto out;
2134 		} else {
2135 			new_fw++;
2136 		}
2137 	}
2138 
2139 	/* No SMC, MC ucode on APUs */
2140 	if (!(rdev->flags & RADEON_IS_IGP)) {
2141 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
2142 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2143 		if (err) {
2144 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
2145 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2146 			if (err) {
2147 				ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
2148 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2149 				if (err)
2150 					goto out;
2151 			}
2152 			if ((rdev->mc_fw->datasize != mc_req_size) &&
2153 			    (rdev->mc_fw->datasize != mc2_req_size)){
2154 				printk(KERN_ERR
2155 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2156 				       rdev->mc_fw->datasize, fw_name);
2157 				err = -EINVAL;
2158 			}
2159 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
2160 		} else {
2161 			err = radeon_ucode_validate(rdev->mc_fw);
2162 			if (err) {
2163 				printk(KERN_ERR
2164 				       "cik_fw: validation failed for firmware \"%s\"\n",
2165 				       fw_name);
2166 				goto out;
2167 			} else {
2168 				new_fw++;
2169 			}
2170 		}
2171 
2172 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
2173 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2174 		if (err) {
2175 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
2176 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2177 			if (err) {
2178 				printk(KERN_ERR
2179 				       "smc: error loading firmware \"%s\"\n",
2180 				       fw_name);
2181 				release_firmware(rdev->smc_fw);
2182 				rdev->smc_fw = NULL;
2183 				err = 0;
2184 			} else if (rdev->smc_fw->datasize != smc_req_size) {
2185 				printk(KERN_ERR
2186 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2187 				       rdev->smc_fw->datasize, fw_name);
2188 				err = -EINVAL;
2189 			}
2190 		} else {
2191 			err = radeon_ucode_validate(rdev->smc_fw);
2192 			if (err) {
2193 				printk(KERN_ERR
2194 				       "cik_fw: validation failed for firmware \"%s\"\n",
2195 				       fw_name);
2196 				goto out;
2197 			} else {
2198 				new_fw++;
2199 			}
2200 		}
2201 	}
2202 
2203 	if (new_fw == 0) {
2204 		rdev->new_fw = false;
2205 	} else if (new_fw < num_fw) {
2206 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2207 		err = -EINVAL;
2208 	} else {
2209 		rdev->new_fw = true;
2210 	}
2211 
2212 out:
2213 	if (err) {
2214 		if (err != -EINVAL)
2215 			printk(KERN_ERR
2216 			       "cik_cp: Failed to load firmware \"%s\"\n",
2217 			       fw_name);
2218 		release_firmware(rdev->pfp_fw);
2219 		rdev->pfp_fw = NULL;
2220 		release_firmware(rdev->me_fw);
2221 		rdev->me_fw = NULL;
2222 		release_firmware(rdev->ce_fw);
2223 		rdev->ce_fw = NULL;
2224 		release_firmware(rdev->mec_fw);
2225 		rdev->mec_fw = NULL;
2226 		release_firmware(rdev->mec2_fw);
2227 		rdev->mec2_fw = NULL;
2228 		release_firmware(rdev->rlc_fw);
2229 		rdev->rlc_fw = NULL;
2230 		release_firmware(rdev->sdma_fw);
2231 		rdev->sdma_fw = NULL;
2232 		release_firmware(rdev->mc_fw);
2233 		rdev->mc_fw = NULL;
2234 		release_firmware(rdev->smc_fw);
2235 		rdev->smc_fw = NULL;
2236 	}
2237 	return err;
2238 }
2239 
2240 /*
2241  * Core functions
2242  */
2243 /**
2244  * cik_tiling_mode_table_init - init the hw tiling table
2245  *
2246  * @rdev: radeon_device pointer
2247  *
2248  * Starting with SI, the tiling setup is done globally in a
2249  * set of 32 tiling modes.  Rather than selecting each set of
2250  * parameters per surface as on older asics, we just select
2251  * which index in the tiling table we want to use, and the
2252  * surface uses those parameters (CIK).
2253  */
2254 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2255 {
2256 	const u32 num_tile_mode_states = 32;
2257 	const u32 num_secondary_tile_mode_states = 16;
2258 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2259 	u32 num_pipe_configs;
2260 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2261 		rdev->config.cik.max_shader_engines;
2262 
2263 	switch (rdev->config.cik.mem_row_size_in_kb) {
2264 	case 1:
2265 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2266 		break;
2267 	case 2:
2268 	default:
2269 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2270 		break;
2271 	case 4:
2272 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2273 		break;
2274 	}
2275 
2276 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2277 	if (num_pipe_configs > 8)
2278 		num_pipe_configs = 16;
2279 
2280 	if (num_pipe_configs == 16) {
2281 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2282 			switch (reg_offset) {
2283 			case 0:
2284 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2285 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2286 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2287 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2288 				break;
2289 			case 1:
2290 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2292 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2294 				break;
2295 			case 2:
2296 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2297 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2298 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2299 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2300 				break;
2301 			case 3:
2302 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2303 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2304 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2306 				break;
2307 			case 4:
2308 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2310 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311 						 TILE_SPLIT(split_equal_to_row_size));
2312 				break;
2313 			case 5:
2314 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2315 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2317 				break;
2318 			case 6:
2319 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2320 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2321 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2323 				break;
2324 			case 7:
2325 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2326 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2327 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328 						 TILE_SPLIT(split_equal_to_row_size));
2329 				break;
2330 			case 8:
2331 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2332 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2333 				break;
2334 			case 9:
2335 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2336 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2338 				break;
2339 			case 10:
2340 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2342 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344 				break;
2345 			case 11:
2346 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2347 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2348 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2349 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2350 				break;
2351 			case 12:
2352 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2353 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2354 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2356 				break;
2357 			case 13:
2358 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2359 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2361 				break;
2362 			case 14:
2363 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2364 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2365 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2367 				break;
2368 			case 16:
2369 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2370 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2372 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373 				break;
2374 			case 17:
2375 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2376 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2377 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2379 				break;
2380 			case 27:
2381 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2382 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2384 				break;
2385 			case 28:
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2390 				break;
2391 			case 29:
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2393 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2394 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2395 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396 				break;
2397 			case 30:
2398 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2399 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2400 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2402 				break;
2403 			default:
2404 				gb_tile_moden = 0;
2405 				break;
2406 			}
2407 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2408 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2409 		}
2410 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2411 			switch (reg_offset) {
2412 			case 0:
2413 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2416 						 NUM_BANKS(ADDR_SURF_16_BANK));
2417 				break;
2418 			case 1:
2419 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2420 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2421 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2422 						 NUM_BANKS(ADDR_SURF_16_BANK));
2423 				break;
2424 			case 2:
2425 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2427 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2428 						 NUM_BANKS(ADDR_SURF_16_BANK));
2429 				break;
2430 			case 3:
2431 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2433 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2434 						 NUM_BANKS(ADDR_SURF_16_BANK));
2435 				break;
2436 			case 4:
2437 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2439 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2440 						 NUM_BANKS(ADDR_SURF_8_BANK));
2441 				break;
2442 			case 5:
2443 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2445 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2446 						 NUM_BANKS(ADDR_SURF_4_BANK));
2447 				break;
2448 			case 6:
2449 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452 						 NUM_BANKS(ADDR_SURF_2_BANK));
2453 				break;
2454 			case 8:
2455 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2457 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2458 						 NUM_BANKS(ADDR_SURF_16_BANK));
2459 				break;
2460 			case 9:
2461 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2463 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464 						 NUM_BANKS(ADDR_SURF_16_BANK));
2465 				break;
2466 			case 10:
2467 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470 						 NUM_BANKS(ADDR_SURF_16_BANK));
2471 				break;
2472 			case 11:
2473 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476 						 NUM_BANKS(ADDR_SURF_8_BANK));
2477 				break;
2478 			case 12:
2479 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2481 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2482 						 NUM_BANKS(ADDR_SURF_4_BANK));
2483 				break;
2484 			case 13:
2485 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 						 NUM_BANKS(ADDR_SURF_2_BANK));
2489 				break;
2490 			case 14:
2491 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2493 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2494 						 NUM_BANKS(ADDR_SURF_2_BANK));
2495 				break;
2496 			default:
2497 				gb_tile_moden = 0;
2498 				break;
2499 			}
2500 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2501 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2502 		}
2503 	} else if (num_pipe_configs == 8) {
2504 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2505 			switch (reg_offset) {
2506 			case 0:
2507 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2509 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2511 				break;
2512 			case 1:
2513 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2517 				break;
2518 			case 2:
2519 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2520 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2521 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2523 				break;
2524 			case 3:
2525 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2529 				break;
2530 			case 4:
2531 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2533 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 						 TILE_SPLIT(split_equal_to_row_size));
2535 				break;
2536 			case 5:
2537 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2538 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2540 				break;
2541 			case 6:
2542 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2543 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2544 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2546 				break;
2547 			case 7:
2548 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2550 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 						 TILE_SPLIT(split_equal_to_row_size));
2552 				break;
2553 			case 8:
2554 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2555 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2556 				break;
2557 			case 9:
2558 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2559 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2561 				break;
2562 			case 10:
2563 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2565 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2567 				break;
2568 			case 11:
2569 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2570 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2571 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2572 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2573 				break;
2574 			case 12:
2575 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2576 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2577 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2579 				break;
2580 			case 13:
2581 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2582 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2584 				break;
2585 			case 14:
2586 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2587 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2588 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2590 				break;
2591 			case 16:
2592 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2593 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2594 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2595 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596 				break;
2597 			case 17:
2598 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2600 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2602 				break;
2603 			case 27:
2604 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2605 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2607 				break;
2608 			case 28:
2609 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2611 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2613 				break;
2614 			case 29:
2615 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2616 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2618 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2619 				break;
2620 			case 30:
2621 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2622 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2623 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625 				break;
2626 			default:
2627 				gb_tile_moden = 0;
2628 				break;
2629 			}
2630 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2631 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2632 		}
2633 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2634 			switch (reg_offset) {
2635 			case 0:
2636 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2638 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2639 						 NUM_BANKS(ADDR_SURF_16_BANK));
2640 				break;
2641 			case 1:
2642 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2644 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2645 						 NUM_BANKS(ADDR_SURF_16_BANK));
2646 				break;
2647 			case 2:
2648 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2650 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2651 						 NUM_BANKS(ADDR_SURF_16_BANK));
2652 				break;
2653 			case 3:
2654 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2657 						 NUM_BANKS(ADDR_SURF_16_BANK));
2658 				break;
2659 			case 4:
2660 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2663 						 NUM_BANKS(ADDR_SURF_8_BANK));
2664 				break;
2665 			case 5:
2666 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2668 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2669 						 NUM_BANKS(ADDR_SURF_4_BANK));
2670 				break;
2671 			case 6:
2672 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2674 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2675 						 NUM_BANKS(ADDR_SURF_2_BANK));
2676 				break;
2677 			case 8:
2678 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2680 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2681 						 NUM_BANKS(ADDR_SURF_16_BANK));
2682 				break;
2683 			case 9:
2684 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2686 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2687 						 NUM_BANKS(ADDR_SURF_16_BANK));
2688 				break;
2689 			case 10:
2690 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2693 						 NUM_BANKS(ADDR_SURF_16_BANK));
2694 				break;
2695 			case 11:
2696 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2699 						 NUM_BANKS(ADDR_SURF_16_BANK));
2700 				break;
2701 			case 12:
2702 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2705 						 NUM_BANKS(ADDR_SURF_8_BANK));
2706 				break;
2707 			case 13:
2708 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2711 						 NUM_BANKS(ADDR_SURF_4_BANK));
2712 				break;
2713 			case 14:
2714 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2717 						 NUM_BANKS(ADDR_SURF_2_BANK));
2718 				break;
2719 			default:
2720 				gb_tile_moden = 0;
2721 				break;
2722 			}
2723 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2724 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2725 		}
2726 	} else if (num_pipe_configs == 4) {
2727 		if (num_rbs == 4) {
2728 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2729 				switch (reg_offset) {
2730 				case 0:
2731 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2733 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2735 					break;
2736 				case 1:
2737 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2741 					break;
2742 				case 2:
2743 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2744 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2745 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2747 					break;
2748 				case 3:
2749 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2753 					break;
2754 				case 4:
2755 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2756 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2757 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758 							 TILE_SPLIT(split_equal_to_row_size));
2759 					break;
2760 				case 5:
2761 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2762 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2764 					break;
2765 				case 6:
2766 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2767 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2768 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2770 					break;
2771 				case 7:
2772 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2773 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2774 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775 							 TILE_SPLIT(split_equal_to_row_size));
2776 					break;
2777 				case 8:
2778 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2779 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2780 					break;
2781 				case 9:
2782 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2783 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2785 					break;
2786 				case 10:
2787 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2789 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2791 					break;
2792 				case 11:
2793 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2794 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2795 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2796 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2797 					break;
2798 				case 12:
2799 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2800 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2801 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2803 					break;
2804 				case 13:
2805 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2808 					break;
2809 				case 14:
2810 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2811 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2812 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2814 					break;
2815 				case 16:
2816 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2818 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2819 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2820 					break;
2821 				case 17:
2822 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2823 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2824 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826 					break;
2827 				case 27:
2828 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2829 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2831 					break;
2832 				case 28:
2833 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2834 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2835 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2837 					break;
2838 				case 29:
2839 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2840 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2841 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2842 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2843 					break;
2844 				case 30:
2845 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2846 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2847 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2849 					break;
2850 				default:
2851 					gb_tile_moden = 0;
2852 					break;
2853 				}
2854 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2855 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2856 			}
2857 		} else if (num_rbs < 4) {
2858 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2859 				switch (reg_offset) {
2860 				case 0:
2861 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2862 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2863 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2864 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2865 					break;
2866 				case 1:
2867 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2869 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2870 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2871 					break;
2872 				case 2:
2873 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2874 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2875 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2876 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2877 					break;
2878 				case 3:
2879 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2881 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2882 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2883 					break;
2884 				case 4:
2885 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2888 							 TILE_SPLIT(split_equal_to_row_size));
2889 					break;
2890 				case 5:
2891 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2892 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2893 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2894 					break;
2895 				case 6:
2896 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2897 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2899 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2900 					break;
2901 				case 7:
2902 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2903 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2904 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2905 							 TILE_SPLIT(split_equal_to_row_size));
2906 					break;
2907 				case 8:
2908 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2909 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2910 					break;
2911 				case 9:
2912 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2913 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2914 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2915 					break;
2916 				case 10:
2917 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2920 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921 					break;
2922 				case 11:
2923 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2925 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2926 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927 					break;
2928 				case 12:
2929 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2930 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933 					break;
2934 				case 13:
2935 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2936 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2937 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2938 					break;
2939 				case 14:
2940 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2941 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2943 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 					break;
2945 				case 16:
2946 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2947 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2948 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2949 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950 					break;
2951 				case 17:
2952 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2953 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2954 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2955 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2956 					break;
2957 				case 27:
2958 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2959 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2961 					break;
2962 				case 28:
2963 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2964 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2965 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967 					break;
2968 				case 29:
2969 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2971 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2972 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2973 					break;
2974 				case 30:
2975 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2976 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2977 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2979 					break;
2980 				default:
2981 					gb_tile_moden = 0;
2982 					break;
2983 				}
2984 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2985 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2986 			}
2987 		}
2988 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2989 			switch (reg_offset) {
2990 			case 0:
2991 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2993 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994 						 NUM_BANKS(ADDR_SURF_16_BANK));
2995 				break;
2996 			case 1:
2997 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2999 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3000 						 NUM_BANKS(ADDR_SURF_16_BANK));
3001 				break;
3002 			case 2:
3003 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3005 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3006 						 NUM_BANKS(ADDR_SURF_16_BANK));
3007 				break;
3008 			case 3:
3009 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3010 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3011 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3012 						 NUM_BANKS(ADDR_SURF_16_BANK));
3013 				break;
3014 			case 4:
3015 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3017 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3018 						 NUM_BANKS(ADDR_SURF_16_BANK));
3019 				break;
3020 			case 5:
3021 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3023 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3024 						 NUM_BANKS(ADDR_SURF_8_BANK));
3025 				break;
3026 			case 6:
3027 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3030 						 NUM_BANKS(ADDR_SURF_4_BANK));
3031 				break;
3032 			case 8:
3033 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3034 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3035 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036 						 NUM_BANKS(ADDR_SURF_16_BANK));
3037 				break;
3038 			case 9:
3039 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3040 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3041 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3042 						 NUM_BANKS(ADDR_SURF_16_BANK));
3043 				break;
3044 			case 10:
3045 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3047 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048 						 NUM_BANKS(ADDR_SURF_16_BANK));
3049 				break;
3050 			case 11:
3051 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3053 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3054 						 NUM_BANKS(ADDR_SURF_16_BANK));
3055 				break;
3056 			case 12:
3057 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3058 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3059 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3060 						 NUM_BANKS(ADDR_SURF_16_BANK));
3061 				break;
3062 			case 13:
3063 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3065 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3066 						 NUM_BANKS(ADDR_SURF_8_BANK));
3067 				break;
3068 			case 14:
3069 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3071 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3072 						 NUM_BANKS(ADDR_SURF_4_BANK));
3073 				break;
3074 			default:
3075 				gb_tile_moden = 0;
3076 				break;
3077 			}
3078 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3079 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3080 		}
3081 	} else if (num_pipe_configs == 2) {
3082 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3083 			switch (reg_offset) {
3084 			case 0:
3085 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3086 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3087 						 PIPE_CONFIG(ADDR_SURF_P2) |
3088 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3089 				break;
3090 			case 1:
3091 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3092 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3093 						 PIPE_CONFIG(ADDR_SURF_P2) |
3094 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3095 				break;
3096 			case 2:
3097 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3098 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3099 						 PIPE_CONFIG(ADDR_SURF_P2) |
3100 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3101 				break;
3102 			case 3:
3103 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3105 						 PIPE_CONFIG(ADDR_SURF_P2) |
3106 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3107 				break;
3108 			case 4:
3109 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3111 						 PIPE_CONFIG(ADDR_SURF_P2) |
3112 						 TILE_SPLIT(split_equal_to_row_size));
3113 				break;
3114 			case 5:
3115 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3116 						 PIPE_CONFIG(ADDR_SURF_P2) |
3117 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118 				break;
3119 			case 6:
3120 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3121 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3122 						 PIPE_CONFIG(ADDR_SURF_P2) |
3123 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3124 				break;
3125 			case 7:
3126 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3127 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3128 						 PIPE_CONFIG(ADDR_SURF_P2) |
3129 						 TILE_SPLIT(split_equal_to_row_size));
3130 				break;
3131 			case 8:
3132 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3133 						PIPE_CONFIG(ADDR_SURF_P2);
3134 				break;
3135 			case 9:
3136 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3137 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3138 						 PIPE_CONFIG(ADDR_SURF_P2));
3139 				break;
3140 			case 10:
3141 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3142 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3143 						 PIPE_CONFIG(ADDR_SURF_P2) |
3144 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3145 				break;
3146 			case 11:
3147 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3148 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3149 						 PIPE_CONFIG(ADDR_SURF_P2) |
3150 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3151 				break;
3152 			case 12:
3153 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3154 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3155 						 PIPE_CONFIG(ADDR_SURF_P2) |
3156 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3157 				break;
3158 			case 13:
3159 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3160 						 PIPE_CONFIG(ADDR_SURF_P2) |
3161 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3162 				break;
3163 			case 14:
3164 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3165 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3166 						 PIPE_CONFIG(ADDR_SURF_P2) |
3167 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3168 				break;
3169 			case 16:
3170 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3171 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3172 						 PIPE_CONFIG(ADDR_SURF_P2) |
3173 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3174 				break;
3175 			case 17:
3176 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3177 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3178 						 PIPE_CONFIG(ADDR_SURF_P2) |
3179 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3180 				break;
3181 			case 27:
3182 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3183 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3184 						 PIPE_CONFIG(ADDR_SURF_P2));
3185 				break;
3186 			case 28:
3187 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3188 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3189 						 PIPE_CONFIG(ADDR_SURF_P2) |
3190 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3191 				break;
3192 			case 29:
3193 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3194 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195 						 PIPE_CONFIG(ADDR_SURF_P2) |
3196 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3197 				break;
3198 			case 30:
3199 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3200 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3201 						 PIPE_CONFIG(ADDR_SURF_P2) |
3202 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203 				break;
3204 			default:
3205 				gb_tile_moden = 0;
3206 				break;
3207 			}
3208 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3209 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3210 		}
3211 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3212 			switch (reg_offset) {
3213 			case 0:
3214 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3215 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3216 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3217 						 NUM_BANKS(ADDR_SURF_16_BANK));
3218 				break;
3219 			case 1:
3220 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3221 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3222 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223 						 NUM_BANKS(ADDR_SURF_16_BANK));
3224 				break;
3225 			case 2:
3226 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3227 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3228 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3229 						 NUM_BANKS(ADDR_SURF_16_BANK));
3230 				break;
3231 			case 3:
3232 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3233 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3234 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235 						 NUM_BANKS(ADDR_SURF_16_BANK));
3236 				break;
3237 			case 4:
3238 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3239 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3240 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241 						 NUM_BANKS(ADDR_SURF_16_BANK));
3242 				break;
3243 			case 5:
3244 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3246 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247 						 NUM_BANKS(ADDR_SURF_16_BANK));
3248 				break;
3249 			case 6:
3250 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3252 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3253 						 NUM_BANKS(ADDR_SURF_8_BANK));
3254 				break;
3255 			case 8:
3256 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3257 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3258 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259 						 NUM_BANKS(ADDR_SURF_16_BANK));
3260 				break;
3261 			case 9:
3262 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3263 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3264 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3265 						 NUM_BANKS(ADDR_SURF_16_BANK));
3266 				break;
3267 			case 10:
3268 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3269 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3270 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3271 						 NUM_BANKS(ADDR_SURF_16_BANK));
3272 				break;
3273 			case 11:
3274 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3275 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3276 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3277 						 NUM_BANKS(ADDR_SURF_16_BANK));
3278 				break;
3279 			case 12:
3280 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3282 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3283 						 NUM_BANKS(ADDR_SURF_16_BANK));
3284 				break;
3285 			case 13:
3286 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3287 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3288 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3289 						 NUM_BANKS(ADDR_SURF_16_BANK));
3290 				break;
3291 			case 14:
3292 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3293 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3294 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3295 						 NUM_BANKS(ADDR_SURF_8_BANK));
3296 				break;
3297 			default:
3298 				gb_tile_moden = 0;
3299 				break;
3300 			}
3301 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3302 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3303 		}
3304 	} else
3305 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3306 }
3307 
3308 /**
3309  * cik_select_se_sh - select which SE, SH to address
3310  *
3311  * @rdev: radeon_device pointer
3312  * @se_num: shader engine to address
3313  * @sh_num: sh block to address
3314  *
3315  * Select which SE, SH combinations to address. Certain
3316  * registers are instanced per SE or SH.  0xffffffff means
3317  * broadcast to all SEs or SHs (CIK).
3318  */
3319 static void cik_select_se_sh(struct radeon_device *rdev,
3320 			     u32 se_num, u32 sh_num)
3321 {
3322 	u32 data = INSTANCE_BROADCAST_WRITES;
3323 
3324 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3325 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3326 	else if (se_num == 0xffffffff)
3327 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3328 	else if (sh_num == 0xffffffff)
3329 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3330 	else
3331 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3332 	WREG32(GRBM_GFX_INDEX, data);
3333 }
3334 
3335 /**
3336  * cik_create_bitmask - create a bitmask
3337  *
3338  * @bit_width: length of the mask
3339  *
3340  * create a variable length bit mask (CIK).
3341  * Returns the bitmask.
3342  */
3343 static u32 cik_create_bitmask(u32 bit_width)
3344 {
3345 	u32 i, mask = 0;
3346 
3347 	for (i = 0; i < bit_width; i++) {
3348 		mask <<= 1;
3349 		mask |= 1;
3350 	}
3351 	return mask;
3352 }
3353 
3354 /**
3355  * cik_get_rb_disabled - computes the mask of disabled RBs
3356  *
3357  * @rdev: radeon_device pointer
3358  * @max_rb_num: max RBs (render backends) for the asic
3359  * @se_num: number of SEs (shader engines) for the asic
3360  * @sh_per_se: number of SH blocks per SE for the asic
3361  *
3362  * Calculates the bitmask of disabled RBs (CIK).
3363  * Returns the disabled RB bitmask.
3364  */
3365 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3366 			      u32 max_rb_num_per_se,
3367 			      u32 sh_per_se)
3368 {
3369 	u32 data, mask;
3370 
3371 	data = RREG32(CC_RB_BACKEND_DISABLE);
3372 	if (data & 1)
3373 		data &= BACKEND_DISABLE_MASK;
3374 	else
3375 		data = 0;
3376 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3377 
3378 	data >>= BACKEND_DISABLE_SHIFT;
3379 
3380 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3381 
3382 	return data & mask;
3383 }
3384 
3385 /**
3386  * cik_setup_rb - setup the RBs on the asic
3387  *
3388  * @rdev: radeon_device pointer
3389  * @se_num: number of SEs (shader engines) for the asic
3390  * @sh_per_se: number of SH blocks per SE for the asic
3391  * @max_rb_num: max RBs (render backends) for the asic
3392  *
3393  * Configures per-SE/SH RB registers (CIK).
3394  */
3395 static void cik_setup_rb(struct radeon_device *rdev,
3396 			 u32 se_num, u32 sh_per_se,
3397 			 u32 max_rb_num_per_se)
3398 {
3399 	int i, j;
3400 	u32 data, mask;
3401 	u32 disabled_rbs = 0;
3402 	u32 enabled_rbs = 0;
3403 
3404 	for (i = 0; i < se_num; i++) {
3405 		for (j = 0; j < sh_per_se; j++) {
3406 			cik_select_se_sh(rdev, i, j);
3407 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3408 			if (rdev->family == CHIP_HAWAII)
3409 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3410 			else
3411 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3412 		}
3413 	}
3414 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3415 
3416 	mask = 1;
3417 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3418 		if (!(disabled_rbs & mask))
3419 			enabled_rbs |= mask;
3420 		mask <<= 1;
3421 	}
3422 
3423 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3424 
3425 	for (i = 0; i < se_num; i++) {
3426 		cik_select_se_sh(rdev, i, 0xffffffff);
3427 		data = 0;
3428 		for (j = 0; j < sh_per_se; j++) {
3429 			switch (enabled_rbs & 3) {
3430 			case 0:
3431 				if (j == 0)
3432 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3433 				else
3434 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3435 				break;
3436 			case 1:
3437 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3438 				break;
3439 			case 2:
3440 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3441 				break;
3442 			case 3:
3443 			default:
3444 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3445 				break;
3446 			}
3447 			enabled_rbs >>= 2;
3448 		}
3449 		WREG32(PA_SC_RASTER_CONFIG, data);
3450 	}
3451 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3452 }
3453 
3454 /**
3455  * cik_gpu_init - setup the 3D engine
3456  *
3457  * @rdev: radeon_device pointer
3458  *
3459  * Configures the 3D engine and tiling configuration
3460  * registers so that the 3D engine is usable.
3461  */
3462 static void cik_gpu_init(struct radeon_device *rdev)
3463 {
3464 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3465 	u32 mc_shared_chmap, mc_arb_ramcfg;
3466 	u32 hdp_host_path_cntl;
3467 	u32 tmp;
3468 	int i, j;
3469 
3470 	switch (rdev->family) {
3471 	case CHIP_BONAIRE:
3472 		rdev->config.cik.max_shader_engines = 2;
3473 		rdev->config.cik.max_tile_pipes = 4;
3474 		rdev->config.cik.max_cu_per_sh = 7;
3475 		rdev->config.cik.max_sh_per_se = 1;
3476 		rdev->config.cik.max_backends_per_se = 2;
3477 		rdev->config.cik.max_texture_channel_caches = 4;
3478 		rdev->config.cik.max_gprs = 256;
3479 		rdev->config.cik.max_gs_threads = 32;
3480 		rdev->config.cik.max_hw_contexts = 8;
3481 
3482 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3483 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3484 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3485 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3486 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3487 		break;
3488 	case CHIP_HAWAII:
3489 		rdev->config.cik.max_shader_engines = 4;
3490 		rdev->config.cik.max_tile_pipes = 16;
3491 		rdev->config.cik.max_cu_per_sh = 11;
3492 		rdev->config.cik.max_sh_per_se = 1;
3493 		rdev->config.cik.max_backends_per_se = 4;
3494 		rdev->config.cik.max_texture_channel_caches = 16;
3495 		rdev->config.cik.max_gprs = 256;
3496 		rdev->config.cik.max_gs_threads = 32;
3497 		rdev->config.cik.max_hw_contexts = 8;
3498 
3499 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3500 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3501 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3502 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3503 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3504 		break;
3505 	case CHIP_KAVERI:
3506 		rdev->config.cik.max_shader_engines = 1;
3507 		rdev->config.cik.max_tile_pipes = 4;
3508 		if ((rdev->pdev->device == 0x1304) ||
3509 		    (rdev->pdev->device == 0x1305) ||
3510 		    (rdev->pdev->device == 0x130C) ||
3511 		    (rdev->pdev->device == 0x130F) ||
3512 		    (rdev->pdev->device == 0x1310) ||
3513 		    (rdev->pdev->device == 0x1311) ||
3514 		    (rdev->pdev->device == 0x131C)) {
3515 			rdev->config.cik.max_cu_per_sh = 8;
3516 			rdev->config.cik.max_backends_per_se = 2;
3517 		} else if ((rdev->pdev->device == 0x1309) ||
3518 			   (rdev->pdev->device == 0x130A) ||
3519 			   (rdev->pdev->device == 0x130D) ||
3520 			   (rdev->pdev->device == 0x1313) ||
3521 			   (rdev->pdev->device == 0x131D)) {
3522 			rdev->config.cik.max_cu_per_sh = 6;
3523 			rdev->config.cik.max_backends_per_se = 2;
3524 		} else if ((rdev->pdev->device == 0x1306) ||
3525 			   (rdev->pdev->device == 0x1307) ||
3526 			   (rdev->pdev->device == 0x130B) ||
3527 			   (rdev->pdev->device == 0x130E) ||
3528 			   (rdev->pdev->device == 0x1315) ||
3529 			   (rdev->pdev->device == 0x1318) ||
3530 			   (rdev->pdev->device == 0x131B)) {
3531 			rdev->config.cik.max_cu_per_sh = 4;
3532 			rdev->config.cik.max_backends_per_se = 1;
3533 		} else {
3534 			rdev->config.cik.max_cu_per_sh = 3;
3535 			rdev->config.cik.max_backends_per_se = 1;
3536 		}
3537 		rdev->config.cik.max_sh_per_se = 1;
3538 		rdev->config.cik.max_texture_channel_caches = 4;
3539 		rdev->config.cik.max_gprs = 256;
3540 		rdev->config.cik.max_gs_threads = 16;
3541 		rdev->config.cik.max_hw_contexts = 8;
3542 
3543 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3544 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3545 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3546 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3547 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3548 		break;
3549 	case CHIP_KABINI:
3550 	case CHIP_MULLINS:
3551 	default:
3552 		rdev->config.cik.max_shader_engines = 1;
3553 		rdev->config.cik.max_tile_pipes = 2;
3554 		rdev->config.cik.max_cu_per_sh = 2;
3555 		rdev->config.cik.max_sh_per_se = 1;
3556 		rdev->config.cik.max_backends_per_se = 1;
3557 		rdev->config.cik.max_texture_channel_caches = 2;
3558 		rdev->config.cik.max_gprs = 256;
3559 		rdev->config.cik.max_gs_threads = 16;
3560 		rdev->config.cik.max_hw_contexts = 8;
3561 
3562 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3563 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3564 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3565 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3566 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3567 		break;
3568 	}
3569 
3570 	/* Initialize HDP */
3571 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3572 		WREG32((0x2c14 + j), 0x00000000);
3573 		WREG32((0x2c18 + j), 0x00000000);
3574 		WREG32((0x2c1c + j), 0x00000000);
3575 		WREG32((0x2c20 + j), 0x00000000);
3576 		WREG32((0x2c24 + j), 0x00000000);
3577 	}
3578 
3579 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3580 
3581 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3582 
3583 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3584 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3585 
3586 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3587 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3588 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3589 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3590 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3591 		rdev->config.cik.mem_row_size_in_kb = 4;
3592 	/* XXX use MC settings? */
3593 	rdev->config.cik.shader_engine_tile_size = 32;
3594 	rdev->config.cik.num_gpus = 1;
3595 	rdev->config.cik.multi_gpu_tile_size = 64;
3596 
3597 	/* fix up row size */
3598 	gb_addr_config &= ~ROW_SIZE_MASK;
3599 	switch (rdev->config.cik.mem_row_size_in_kb) {
3600 	case 1:
3601 	default:
3602 		gb_addr_config |= ROW_SIZE(0);
3603 		break;
3604 	case 2:
3605 		gb_addr_config |= ROW_SIZE(1);
3606 		break;
3607 	case 4:
3608 		gb_addr_config |= ROW_SIZE(2);
3609 		break;
3610 	}
3611 
3612 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3613 	 * not have bank info, so create a custom tiling dword.
3614 	 * bits 3:0   num_pipes
3615 	 * bits 7:4   num_banks
3616 	 * bits 11:8  group_size
3617 	 * bits 15:12 row_size
3618 	 */
3619 	rdev->config.cik.tile_config = 0;
3620 	switch (rdev->config.cik.num_tile_pipes) {
3621 	case 1:
3622 		rdev->config.cik.tile_config |= (0 << 0);
3623 		break;
3624 	case 2:
3625 		rdev->config.cik.tile_config |= (1 << 0);
3626 		break;
3627 	case 4:
3628 		rdev->config.cik.tile_config |= (2 << 0);
3629 		break;
3630 	case 8:
3631 	default:
3632 		/* XXX what about 12? */
3633 		rdev->config.cik.tile_config |= (3 << 0);
3634 		break;
3635 	}
3636 	rdev->config.cik.tile_config |=
3637 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3638 	rdev->config.cik.tile_config |=
3639 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3640 	rdev->config.cik.tile_config |=
3641 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3642 
3643 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3644 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3645 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3646 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3647 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3648 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3649 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3650 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3651 
3652 	cik_tiling_mode_table_init(rdev);
3653 
3654 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3655 		     rdev->config.cik.max_sh_per_se,
3656 		     rdev->config.cik.max_backends_per_se);
3657 
3658 	rdev->config.cik.active_cus = 0;
3659 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3660 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3661 			rdev->config.cik.active_cus +=
3662 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3663 		}
3664 	}
3665 
3666 	/* set HW defaults for 3D engine */
3667 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3668 
3669 	WREG32(SX_DEBUG_1, 0x20);
3670 
3671 	WREG32(TA_CNTL_AUX, 0x00010000);
3672 
3673 	tmp = RREG32(SPI_CONFIG_CNTL);
3674 	tmp |= 0x03000000;
3675 	WREG32(SPI_CONFIG_CNTL, tmp);
3676 
3677 	WREG32(SQ_CONFIG, 1);
3678 
3679 	WREG32(DB_DEBUG, 0);
3680 
3681 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3682 	tmp |= 0x00000400;
3683 	WREG32(DB_DEBUG2, tmp);
3684 
3685 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3686 	tmp |= 0x00020200;
3687 	WREG32(DB_DEBUG3, tmp);
3688 
3689 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3690 	tmp |= 0x00018208;
3691 	WREG32(CB_HW_CONTROL, tmp);
3692 
3693 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3694 
3695 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3696 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3697 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3698 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3699 
3700 	WREG32(VGT_NUM_INSTANCES, 1);
3701 
3702 	WREG32(CP_PERFMON_CNTL, 0);
3703 
3704 	WREG32(SQ_CONFIG, 0);
3705 
3706 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3707 					  FORCE_EOV_MAX_REZ_CNT(255)));
3708 
3709 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3710 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3711 
3712 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3713 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3714 
3715 	tmp = RREG32(HDP_MISC_CNTL);
3716 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3717 	WREG32(HDP_MISC_CNTL, tmp);
3718 
3719 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3720 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3721 
3722 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3723 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3724 
3725 	udelay(50);
3726 }
3727 
3728 /*
3729  * GPU scratch registers helpers function.
3730  */
3731 /**
3732  * cik_scratch_init - setup driver info for CP scratch regs
3733  *
3734  * @rdev: radeon_device pointer
3735  *
3736  * Set up the number and offset of the CP scratch registers.
3737  * NOTE: use of CP scratch registers is a legacy inferface and
3738  * is not used by default on newer asics (r6xx+).  On newer asics,
3739  * memory buffers are used for fences rather than scratch regs.
3740  */
3741 static void cik_scratch_init(struct radeon_device *rdev)
3742 {
3743 	int i;
3744 
3745 	rdev->scratch.num_reg = 7;
3746 	rdev->scratch.reg_base = SCRATCH_REG0;
3747 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3748 		rdev->scratch.free[i] = true;
3749 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3750 	}
3751 }
3752 
3753 /**
3754  * cik_ring_test - basic gfx ring test
3755  *
3756  * @rdev: radeon_device pointer
3757  * @ring: radeon_ring structure holding ring information
3758  *
3759  * Allocate a scratch register and write to it using the gfx ring (CIK).
3760  * Provides a basic gfx ring test to verify that the ring is working.
3761  * Used by cik_cp_gfx_resume();
3762  * Returns 0 on success, error on failure.
3763  */
3764 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3765 {
3766 	uint32_t scratch;
3767 	uint32_t tmp = 0;
3768 	unsigned i;
3769 	int r;
3770 
3771 	r = radeon_scratch_get(rdev, &scratch);
3772 	if (r) {
3773 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3774 		return r;
3775 	}
3776 	WREG32(scratch, 0xCAFEDEAD);
3777 	r = radeon_ring_lock(rdev, ring, 3);
3778 	if (r) {
3779 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3780 		radeon_scratch_free(rdev, scratch);
3781 		return r;
3782 	}
3783 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3784 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3785 	radeon_ring_write(ring, 0xDEADBEEF);
3786 	radeon_ring_unlock_commit(rdev, ring, false);
3787 
3788 	for (i = 0; i < rdev->usec_timeout; i++) {
3789 		tmp = RREG32(scratch);
3790 		if (tmp == 0xDEADBEEF)
3791 			break;
3792 		DRM_UDELAY(1);
3793 	}
3794 	if (i < rdev->usec_timeout) {
3795 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3796 	} else {
3797 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3798 			  ring->idx, scratch, tmp);
3799 		r = -EINVAL;
3800 	}
3801 	radeon_scratch_free(rdev, scratch);
3802 	return r;
3803 }
3804 
3805 /**
3806  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3807  *
3808  * @rdev: radeon_device pointer
3809  * @ridx: radeon ring index
3810  *
3811  * Emits an hdp flush on the cp.
3812  */
3813 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3814 				       int ridx)
3815 {
3816 	struct radeon_ring *ring = &rdev->ring[ridx];
3817 	u32 ref_and_mask;
3818 
3819 	switch (ring->idx) {
3820 	case CAYMAN_RING_TYPE_CP1_INDEX:
3821 	case CAYMAN_RING_TYPE_CP2_INDEX:
3822 	default:
3823 		switch (ring->me) {
3824 		case 0:
3825 			ref_and_mask = CP2 << ring->pipe;
3826 			break;
3827 		case 1:
3828 			ref_and_mask = CP6 << ring->pipe;
3829 			break;
3830 		default:
3831 			return;
3832 		}
3833 		break;
3834 	case RADEON_RING_TYPE_GFX_INDEX:
3835 		ref_and_mask = CP0;
3836 		break;
3837 	}
3838 
3839 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3840 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3841 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3842 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3843 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3844 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3845 	radeon_ring_write(ring, ref_and_mask);
3846 	radeon_ring_write(ring, ref_and_mask);
3847 	radeon_ring_write(ring, 0x20); /* poll interval */
3848 }
3849 
3850 /**
3851  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3852  *
3853  * @rdev: radeon_device pointer
3854  * @fence: radeon fence object
3855  *
3856  * Emits a fence sequnce number on the gfx ring and flushes
3857  * GPU caches.
3858  */
3859 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3860 			     struct radeon_fence *fence)
3861 {
3862 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3863 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3864 
3865 	/* EVENT_WRITE_EOP - flush caches, send int */
3866 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3867 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3868 				 EOP_TC_ACTION_EN |
3869 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3870 				 EVENT_INDEX(5)));
3871 	radeon_ring_write(ring, addr & 0xfffffffc);
3872 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3873 	radeon_ring_write(ring, fence->seq);
3874 	radeon_ring_write(ring, 0);
3875 }
3876 
3877 /**
3878  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3879  *
3880  * @rdev: radeon_device pointer
3881  * @fence: radeon fence object
3882  *
3883  * Emits a fence sequnce number on the compute ring and flushes
3884  * GPU caches.
3885  */
3886 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3887 				 struct radeon_fence *fence)
3888 {
3889 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3890 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3891 
3892 	/* RELEASE_MEM - flush caches, send int */
3893 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3894 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3895 				 EOP_TC_ACTION_EN |
3896 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3897 				 EVENT_INDEX(5)));
3898 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3899 	radeon_ring_write(ring, addr & 0xfffffffc);
3900 	radeon_ring_write(ring, upper_32_bits(addr));
3901 	radeon_ring_write(ring, fence->seq);
3902 	radeon_ring_write(ring, 0);
3903 }
3904 
3905 /**
3906  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3907  *
3908  * @rdev: radeon_device pointer
3909  * @ring: radeon ring buffer object
3910  * @semaphore: radeon semaphore object
3911  * @emit_wait: Is this a sempahore wait?
3912  *
3913  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3914  * from running ahead of semaphore waits.
3915  */
3916 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3917 			     struct radeon_ring *ring,
3918 			     struct radeon_semaphore *semaphore,
3919 			     bool emit_wait)
3920 {
3921 	uint64_t addr = semaphore->gpu_addr;
3922 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3923 
3924 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3925 	radeon_ring_write(ring, lower_32_bits(addr));
3926 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3927 
3928 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3929 		/* Prevent the PFP from running ahead of the semaphore wait */
3930 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3931 		radeon_ring_write(ring, 0x0);
3932 	}
3933 
3934 	return true;
3935 }
3936 
3937 /**
3938  * cik_copy_cpdma - copy pages using the CP DMA engine
3939  *
3940  * @rdev: radeon_device pointer
3941  * @src_offset: src GPU address
3942  * @dst_offset: dst GPU address
3943  * @num_gpu_pages: number of GPU pages to xfer
3944  * @fence: radeon fence object
3945  *
3946  * Copy GPU paging using the CP DMA engine (CIK+).
3947  * Used by the radeon ttm implementation to move pages if
3948  * registered as the asic copy callback.
3949  */
3950 int cik_copy_cpdma(struct radeon_device *rdev,
3951 		   uint64_t src_offset, uint64_t dst_offset,
3952 		   unsigned num_gpu_pages,
3953 		   struct radeon_fence **fence)
3954 {
3955 	struct radeon_semaphore *sem = NULL;
3956 	int ring_index = rdev->asic->copy.blit_ring_index;
3957 	struct radeon_ring *ring = &rdev->ring[ring_index];
3958 	u32 size_in_bytes, cur_size_in_bytes, control;
3959 	int i, num_loops;
3960 	int r = 0;
3961 
3962 	r = radeon_semaphore_create(rdev, &sem);
3963 	if (r) {
3964 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3965 		return r;
3966 	}
3967 
3968 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3969 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3970 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3971 	if (r) {
3972 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3973 		radeon_semaphore_free(rdev, &sem, NULL);
3974 		return r;
3975 	}
3976 
3977 	radeon_semaphore_sync_to(sem, *fence);
3978 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3979 
3980 	for (i = 0; i < num_loops; i++) {
3981 		cur_size_in_bytes = size_in_bytes;
3982 		if (cur_size_in_bytes > 0x1fffff)
3983 			cur_size_in_bytes = 0x1fffff;
3984 		size_in_bytes -= cur_size_in_bytes;
3985 		control = 0;
3986 		if (size_in_bytes == 0)
3987 			control |= PACKET3_DMA_DATA_CP_SYNC;
3988 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3989 		radeon_ring_write(ring, control);
3990 		radeon_ring_write(ring, lower_32_bits(src_offset));
3991 		radeon_ring_write(ring, upper_32_bits(src_offset));
3992 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3993 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3994 		radeon_ring_write(ring, cur_size_in_bytes);
3995 		src_offset += cur_size_in_bytes;
3996 		dst_offset += cur_size_in_bytes;
3997 	}
3998 
3999 	r = radeon_fence_emit(rdev, fence, ring->idx);
4000 	if (r) {
4001 		radeon_ring_unlock_undo(rdev, ring);
4002 		radeon_semaphore_free(rdev, &sem, NULL);
4003 		return r;
4004 	}
4005 
4006 	radeon_ring_unlock_commit(rdev, ring, false);
4007 	radeon_semaphore_free(rdev, &sem, *fence);
4008 
4009 	return r;
4010 }
4011 
4012 /*
4013  * IB stuff
4014  */
4015 /**
4016  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4017  *
4018  * @rdev: radeon_device pointer
4019  * @ib: radeon indirect buffer object
4020  *
4021  * Emits an DE (drawing engine) or CE (constant engine) IB
4022  * on the gfx ring.  IBs are usually generated by userspace
4023  * acceleration drivers and submitted to the kernel for
4024  * sheduling on the ring.  This function schedules the IB
4025  * on the gfx ring for execution by the GPU.
4026  */
4027 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4028 {
4029 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4030 	u32 header, control = INDIRECT_BUFFER_VALID;
4031 
4032 	if (ib->is_const_ib) {
4033 		/* set switch buffer packet before const IB */
4034 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4035 		radeon_ring_write(ring, 0);
4036 
4037 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4038 	} else {
4039 		u32 next_rptr;
4040 		if (ring->rptr_save_reg) {
4041 			next_rptr = ring->wptr + 3 + 4;
4042 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4043 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4044 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4045 			radeon_ring_write(ring, next_rptr);
4046 		} else if (rdev->wb.enabled) {
4047 			next_rptr = ring->wptr + 5 + 4;
4048 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4049 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4050 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4051 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4052 			radeon_ring_write(ring, next_rptr);
4053 		}
4054 
4055 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4056 	}
4057 
4058 	control |= ib->length_dw |
4059 		(ib->vm ? (ib->vm->id << 24) : 0);
4060 
4061 	radeon_ring_write(ring, header);
4062 	radeon_ring_write(ring,
4063 #ifdef __BIG_ENDIAN
4064 			  (2 << 0) |
4065 #endif
4066 			  (ib->gpu_addr & 0xFFFFFFFC));
4067 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4068 	radeon_ring_write(ring, control);
4069 }
4070 
4071 /**
4072  * cik_ib_test - basic gfx ring IB test
4073  *
4074  * @rdev: radeon_device pointer
4075  * @ring: radeon_ring structure holding ring information
4076  *
4077  * Allocate an IB and execute it on the gfx ring (CIK).
4078  * Provides a basic gfx ring test to verify that IBs are working.
4079  * Returns 0 on success, error on failure.
4080  */
4081 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4082 {
4083 	struct radeon_ib ib;
4084 	uint32_t scratch;
4085 	uint32_t tmp = 0;
4086 	unsigned i;
4087 	int r;
4088 
4089 	r = radeon_scratch_get(rdev, &scratch);
4090 	if (r) {
4091 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4092 		return r;
4093 	}
4094 	WREG32(scratch, 0xCAFEDEAD);
4095 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4096 	if (r) {
4097 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4098 		radeon_scratch_free(rdev, scratch);
4099 		return r;
4100 	}
4101 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4102 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4103 	ib.ptr[2] = 0xDEADBEEF;
4104 	ib.length_dw = 3;
4105 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4106 	if (r) {
4107 		radeon_scratch_free(rdev, scratch);
4108 		radeon_ib_free(rdev, &ib);
4109 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4110 		return r;
4111 	}
4112 	r = radeon_fence_wait(ib.fence, false);
4113 	if (r) {
4114 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4115 		radeon_scratch_free(rdev, scratch);
4116 		radeon_ib_free(rdev, &ib);
4117 		return r;
4118 	}
4119 	for (i = 0; i < rdev->usec_timeout; i++) {
4120 		tmp = RREG32(scratch);
4121 		if (tmp == 0xDEADBEEF)
4122 			break;
4123 		DRM_UDELAY(1);
4124 	}
4125 	if (i < rdev->usec_timeout) {
4126 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4127 	} else {
4128 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4129 			  scratch, tmp);
4130 		r = -EINVAL;
4131 	}
4132 	radeon_scratch_free(rdev, scratch);
4133 	radeon_ib_free(rdev, &ib);
4134 	return r;
4135 }
4136 
4137 /*
4138  * CP.
4139  * On CIK, gfx and compute now have independant command processors.
4140  *
4141  * GFX
4142  * Gfx consists of a single ring and can process both gfx jobs and
4143  * compute jobs.  The gfx CP consists of three microengines (ME):
4144  * PFP - Pre-Fetch Parser
4145  * ME - Micro Engine
4146  * CE - Constant Engine
4147  * The PFP and ME make up what is considered the Drawing Engine (DE).
4148  * The CE is an asynchronous engine used for updating buffer desciptors
4149  * used by the DE so that they can be loaded into cache in parallel
4150  * while the DE is processing state update packets.
4151  *
4152  * Compute
4153  * The compute CP consists of two microengines (ME):
4154  * MEC1 - Compute MicroEngine 1
4155  * MEC2 - Compute MicroEngine 2
4156  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4157  * The queues are exposed to userspace and are programmed directly
4158  * by the compute runtime.
4159  */
4160 /**
4161  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4162  *
4163  * @rdev: radeon_device pointer
4164  * @enable: enable or disable the MEs
4165  *
4166  * Halts or unhalts the gfx MEs.
4167  */
4168 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4169 {
4170 	if (enable)
4171 		WREG32(CP_ME_CNTL, 0);
4172 	else {
4173 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4174 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4175 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4176 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4177 	}
4178 	udelay(50);
4179 }
4180 
4181 /**
4182  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4183  *
4184  * @rdev: radeon_device pointer
4185  *
4186  * Loads the gfx PFP, ME, and CE ucode.
4187  * Returns 0 for success, -EINVAL if the ucode is not available.
4188  */
4189 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4190 {
4191 	int i;
4192 
4193 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4194 		return -EINVAL;
4195 
4196 	cik_cp_gfx_enable(rdev, false);
4197 
4198 	if (rdev->new_fw) {
4199 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4200 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4201 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4202 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4203 		const struct gfx_firmware_header_v1_0 *me_hdr =
4204 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4205 		const __le32 *fw_data;
4206 		u32 fw_size;
4207 
4208 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4209 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4210 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4211 
4212 		/* PFP */
4213 		fw_data = (const __le32 *)
4214 			((const char *)rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4215 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4216 		WREG32(CP_PFP_UCODE_ADDR, 0);
4217 		for (i = 0; i < fw_size; i++)
4218 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4219 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4220 
4221 		/* CE */
4222 		fw_data = (const __le32 *)
4223 			((const char *)rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4224 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4225 		WREG32(CP_CE_UCODE_ADDR, 0);
4226 		for (i = 0; i < fw_size; i++)
4227 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4228 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4229 
4230 		/* ME */
4231 		fw_data = (const __be32 *)
4232 			((const char *)rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4233 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4234 		WREG32(CP_ME_RAM_WADDR, 0);
4235 		for (i = 0; i < fw_size; i++)
4236 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4237 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4238 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4239 	} else {
4240 		const __be32 *fw_data;
4241 
4242 		/* PFP */
4243 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4244 		WREG32(CP_PFP_UCODE_ADDR, 0);
4245 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4246 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4247 		WREG32(CP_PFP_UCODE_ADDR, 0);
4248 
4249 		/* CE */
4250 		fw_data = (const __be32 *)rdev->ce_fw->data;
4251 		WREG32(CP_CE_UCODE_ADDR, 0);
4252 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4253 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4254 		WREG32(CP_CE_UCODE_ADDR, 0);
4255 
4256 		/* ME */
4257 		fw_data = (const __be32 *)rdev->me_fw->data;
4258 		WREG32(CP_ME_RAM_WADDR, 0);
4259 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4260 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4261 		WREG32(CP_ME_RAM_WADDR, 0);
4262 	}
4263 
4264 	return 0;
4265 }
4266 
4267 /**
4268  * cik_cp_gfx_start - start the gfx ring
4269  *
4270  * @rdev: radeon_device pointer
4271  *
4272  * Enables the ring and loads the clear state context and other
4273  * packets required to init the ring.
4274  * Returns 0 for success, error for failure.
4275  */
4276 static int cik_cp_gfx_start(struct radeon_device *rdev)
4277 {
4278 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4279 	int r, i;
4280 
4281 	/* init the CP */
4282 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4283 	WREG32(CP_ENDIAN_SWAP, 0);
4284 	WREG32(CP_DEVICE_ID, 1);
4285 
4286 	cik_cp_gfx_enable(rdev, true);
4287 
4288 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4289 	if (r) {
4290 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4291 		return r;
4292 	}
4293 
4294 	/* init the CE partitions.  CE only used for gfx on CIK */
4295 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4296 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4297 	radeon_ring_write(ring, 0x8000);
4298 	radeon_ring_write(ring, 0x8000);
4299 
4300 	/* setup clear context state */
4301 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4302 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4303 
4304 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4305 	radeon_ring_write(ring, 0x80000000);
4306 	radeon_ring_write(ring, 0x80000000);
4307 
4308 	for (i = 0; i < cik_default_size; i++)
4309 		radeon_ring_write(ring, cik_default_state[i]);
4310 
4311 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4312 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4313 
4314 	/* set clear context state */
4315 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4316 	radeon_ring_write(ring, 0);
4317 
4318 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4319 	radeon_ring_write(ring, 0x00000316);
4320 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4321 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4322 
4323 	radeon_ring_unlock_commit(rdev, ring, false);
4324 
4325 	return 0;
4326 }
4327 
4328 /**
4329  * cik_cp_gfx_fini - stop the gfx ring
4330  *
4331  * @rdev: radeon_device pointer
4332  *
4333  * Stop the gfx ring and tear down the driver ring
4334  * info.
4335  */
4336 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4337 {
4338 	cik_cp_gfx_enable(rdev, false);
4339 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4340 }
4341 
4342 /**
4343  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4344  *
4345  * @rdev: radeon_device pointer
4346  *
4347  * Program the location and size of the gfx ring buffer
4348  * and test it to make sure it's working.
4349  * Returns 0 for success, error for failure.
4350  */
4351 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4352 {
4353 	struct radeon_ring *ring;
4354 	u32 tmp;
4355 	u32 rb_bufsz;
4356 	u64 rb_addr;
4357 	int r;
4358 
4359 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4360 	if (rdev->family != CHIP_HAWAII)
4361 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4362 
4363 	/* Set the write pointer delay */
4364 	WREG32(CP_RB_WPTR_DELAY, 0);
4365 
4366 	/* set the RB to use vmid 0 */
4367 	WREG32(CP_RB_VMID, 0);
4368 
4369 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4370 
4371 	/* ring 0 - compute and gfx */
4372 	/* Set ring buffer size */
4373 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4374 	rb_bufsz = order_base_2(ring->ring_size / 8);
4375 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4376 #ifdef __BIG_ENDIAN
4377 	tmp |= BUF_SWAP_32BIT;
4378 #endif
4379 	WREG32(CP_RB0_CNTL, tmp);
4380 
4381 	/* Initialize the ring buffer's read and write pointers */
4382 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4383 	ring->wptr = 0;
4384 	WREG32(CP_RB0_WPTR, ring->wptr);
4385 
4386 	/* set the wb address wether it's enabled or not */
4387 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4388 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4389 
4390 	/* scratch register shadowing is no longer supported */
4391 	WREG32(SCRATCH_UMSK, 0);
4392 
4393 	if (!rdev->wb.enabled)
4394 		tmp |= RB_NO_UPDATE;
4395 
4396 	mdelay(1);
4397 	WREG32(CP_RB0_CNTL, tmp);
4398 
4399 	rb_addr = ring->gpu_addr >> 8;
4400 	WREG32(CP_RB0_BASE, rb_addr);
4401 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4402 
4403 	/* start the ring */
4404 	cik_cp_gfx_start(rdev);
4405 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4406 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4407 	if (r) {
4408 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4409 		return r;
4410 	}
4411 
4412 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4413 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4414 
4415 	return 0;
4416 }
4417 
4418 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4419 		     struct radeon_ring *ring)
4420 {
4421 	u32 rptr;
4422 
4423 	if (rdev->wb.enabled)
4424 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4425 	else
4426 		rptr = RREG32(CP_RB0_RPTR);
4427 
4428 	return rptr;
4429 }
4430 
4431 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4432 		     struct radeon_ring *ring)
4433 {
4434 	u32 wptr;
4435 
4436 	wptr = RREG32(CP_RB0_WPTR);
4437 
4438 	return wptr;
4439 }
4440 
4441 void cik_gfx_set_wptr(struct radeon_device *rdev,
4442 		      struct radeon_ring *ring)
4443 {
4444 	WREG32(CP_RB0_WPTR, ring->wptr);
4445 	(void)RREG32(CP_RB0_WPTR);
4446 }
4447 
4448 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4449 			 struct radeon_ring *ring)
4450 {
4451 	u32 rptr;
4452 
4453 	if (rdev->wb.enabled) {
4454 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4455 	} else {
4456 		spin_lock(&rdev->srbm_mutex);
4457 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4458 		rptr = RREG32(CP_HQD_PQ_RPTR);
4459 		cik_srbm_select(rdev, 0, 0, 0, 0);
4460 		spin_unlock(&rdev->srbm_mutex);
4461 	}
4462 
4463 	return rptr;
4464 }
4465 
4466 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4467 			 struct radeon_ring *ring)
4468 {
4469 	u32 wptr;
4470 
4471 	if (rdev->wb.enabled) {
4472 		/* XXX check if swapping is necessary on BE */
4473 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4474 	} else {
4475 		spin_lock(&rdev->srbm_mutex);
4476 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4477 		wptr = RREG32(CP_HQD_PQ_WPTR);
4478 		cik_srbm_select(rdev, 0, 0, 0, 0);
4479 		spin_unlock(&rdev->srbm_mutex);
4480 	}
4481 
4482 	return wptr;
4483 }
4484 
4485 void cik_compute_set_wptr(struct radeon_device *rdev,
4486 			  struct radeon_ring *ring)
4487 {
4488 	/* XXX check if swapping is necessary on BE */
4489 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4490 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4491 }
4492 
4493 /**
4494  * cik_cp_compute_enable - enable/disable the compute CP MEs
4495  *
4496  * @rdev: radeon_device pointer
4497  * @enable: enable or disable the MEs
4498  *
4499  * Halts or unhalts the compute MEs.
4500  */
4501 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4502 {
4503 	if (enable)
4504 		WREG32(CP_MEC_CNTL, 0);
4505 	else {
4506 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4507 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4508 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4509 	}
4510 	udelay(50);
4511 }
4512 
4513 /**
4514  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4515  *
4516  * @rdev: radeon_device pointer
4517  *
4518  * Loads the compute MEC1&2 ucode.
4519  * Returns 0 for success, -EINVAL if the ucode is not available.
4520  */
4521 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4522 {
4523 	int i;
4524 
4525 	if (!rdev->mec_fw)
4526 		return -EINVAL;
4527 
4528 	cik_cp_compute_enable(rdev, false);
4529 
4530 	if (rdev->new_fw) {
4531 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4532 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4533 		const __le32 *fw_data;
4534 		u32 fw_size;
4535 
4536 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4537 
4538 		/* MEC1 */
4539 		fw_data = (const __le32 *)
4540 			((const char *)rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4541 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4542 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4543 		for (i = 0; i < fw_size; i++)
4544 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4545 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4546 
4547 		/* MEC2 */
4548 		if (rdev->family == CHIP_KAVERI) {
4549 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4550 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4551 
4552 			fw_data = (const __le32 *)
4553 				((const char *)rdev->mec2_fw->data +
4554 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4555 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4556 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4557 			for (i = 0; i < fw_size; i++)
4558 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4559 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4560 		}
4561 	} else {
4562 		const __be32 *fw_data;
4563 
4564 		/* MEC1 */
4565 		fw_data = (const __be32 *)rdev->mec_fw->data;
4566 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4567 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4568 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4569 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4570 
4571 		if (rdev->family == CHIP_KAVERI) {
4572 			/* MEC2 */
4573 			fw_data = (const __be32 *)rdev->mec_fw->data;
4574 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4575 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4576 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4577 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4578 		}
4579 	}
4580 
4581 	return 0;
4582 }
4583 
4584 /**
4585  * cik_cp_compute_start - start the compute queues
4586  *
4587  * @rdev: radeon_device pointer
4588  *
4589  * Enable the compute queues.
4590  * Returns 0 for success, error for failure.
4591  */
4592 static int cik_cp_compute_start(struct radeon_device *rdev)
4593 {
4594 	cik_cp_compute_enable(rdev, true);
4595 
4596 	return 0;
4597 }
4598 
4599 /**
4600  * cik_cp_compute_fini - stop the compute queues
4601  *
4602  * @rdev: radeon_device pointer
4603  *
4604  * Stop the compute queues and tear down the driver queue
4605  * info.
4606  */
4607 static void cik_cp_compute_fini(struct radeon_device *rdev)
4608 {
4609 	int i, idx, r;
4610 
4611 	cik_cp_compute_enable(rdev, false);
4612 
4613 	for (i = 0; i < 2; i++) {
4614 		if (i == 0)
4615 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4616 		else
4617 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4618 
4619 		if (rdev->ring[idx].mqd_obj) {
4620 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4621 			if (unlikely(r != 0))
4622 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4623 
4624 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4625 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4626 
4627 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4628 			rdev->ring[idx].mqd_obj = NULL;
4629 		}
4630 	}
4631 }
4632 
4633 static void cik_mec_fini(struct radeon_device *rdev)
4634 {
4635 	int r;
4636 
4637 	if (rdev->mec.hpd_eop_obj) {
4638 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4639 		if (unlikely(r != 0))
4640 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4641 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4642 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4643 
4644 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4645 		rdev->mec.hpd_eop_obj = NULL;
4646 	}
4647 }
4648 
4649 #define MEC_HPD_SIZE 2048
4650 
4651 static int cik_mec_init(struct radeon_device *rdev)
4652 {
4653 	int r;
4654 	u32 *hpd;
4655 
4656 	/*
4657 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4658 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4659 	 */
4660 	if (rdev->family == CHIP_KAVERI)
4661 		rdev->mec.num_mec = 2;
4662 	else
4663 		rdev->mec.num_mec = 1;
4664 	rdev->mec.num_pipe = 4;
4665 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4666 
4667 	if (rdev->mec.hpd_eop_obj == NULL) {
4668 		r = radeon_bo_create(rdev,
4669 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4670 				     PAGE_SIZE, true,
4671 				     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4672 				     &rdev->mec.hpd_eop_obj);
4673 		if (r) {
4674 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4675 			return r;
4676 		}
4677 	}
4678 
4679 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4680 	if (unlikely(r != 0)) {
4681 		cik_mec_fini(rdev);
4682 		return r;
4683 	}
4684 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4685 			  &rdev->mec.hpd_eop_gpu_addr);
4686 	if (r) {
4687 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4688 		cik_mec_fini(rdev);
4689 		return r;
4690 	}
4691 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4692 	if (r) {
4693 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4694 		cik_mec_fini(rdev);
4695 		return r;
4696 	}
4697 
4698 	/* clear memory.  Not sure if this is required or not */
4699 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4700 
4701 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4702 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4703 
4704 	return 0;
4705 }
4706 
4707 struct hqd_registers
4708 {
4709 	u32 cp_mqd_base_addr;
4710 	u32 cp_mqd_base_addr_hi;
4711 	u32 cp_hqd_active;
4712 	u32 cp_hqd_vmid;
4713 	u32 cp_hqd_persistent_state;
4714 	u32 cp_hqd_pipe_priority;
4715 	u32 cp_hqd_queue_priority;
4716 	u32 cp_hqd_quantum;
4717 	u32 cp_hqd_pq_base;
4718 	u32 cp_hqd_pq_base_hi;
4719 	u32 cp_hqd_pq_rptr;
4720 	u32 cp_hqd_pq_rptr_report_addr;
4721 	u32 cp_hqd_pq_rptr_report_addr_hi;
4722 	u32 cp_hqd_pq_wptr_poll_addr;
4723 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4724 	u32 cp_hqd_pq_doorbell_control;
4725 	u32 cp_hqd_pq_wptr;
4726 	u32 cp_hqd_pq_control;
4727 	u32 cp_hqd_ib_base_addr;
4728 	u32 cp_hqd_ib_base_addr_hi;
4729 	u32 cp_hqd_ib_rptr;
4730 	u32 cp_hqd_ib_control;
4731 	u32 cp_hqd_iq_timer;
4732 	u32 cp_hqd_iq_rptr;
4733 	u32 cp_hqd_dequeue_request;
4734 	u32 cp_hqd_dma_offload;
4735 	u32 cp_hqd_sema_cmd;
4736 	u32 cp_hqd_msg_type;
4737 	u32 cp_hqd_atomic0_preop_lo;
4738 	u32 cp_hqd_atomic0_preop_hi;
4739 	u32 cp_hqd_atomic1_preop_lo;
4740 	u32 cp_hqd_atomic1_preop_hi;
4741 	u32 cp_hqd_hq_scheduler0;
4742 	u32 cp_hqd_hq_scheduler1;
4743 	u32 cp_mqd_control;
4744 };
4745 
4746 struct bonaire_mqd
4747 {
4748 	u32 header;
4749 	u32 dispatch_initiator;
4750 	u32 dimensions[3];
4751 	u32 start_idx[3];
4752 	u32 num_threads[3];
4753 	u32 pipeline_stat_enable;
4754 	u32 perf_counter_enable;
4755 	u32 pgm[2];
4756 	u32 tba[2];
4757 	u32 tma[2];
4758 	u32 pgm_rsrc[2];
4759 	u32 vmid;
4760 	u32 resource_limits;
4761 	u32 static_thread_mgmt01[2];
4762 	u32 tmp_ring_size;
4763 	u32 static_thread_mgmt23[2];
4764 	u32 restart[3];
4765 	u32 thread_trace_enable;
4766 	u32 reserved1;
4767 	u32 user_data[16];
4768 	u32 vgtcs_invoke_count[2];
4769 	struct hqd_registers queue_state;
4770 	u32 dequeue_cntr;
4771 	u32 interrupt_queue[64];
4772 };
4773 
4774 /**
4775  * cik_cp_compute_resume - setup the compute queue registers
4776  *
4777  * @rdev: radeon_device pointer
4778  *
4779  * Program the compute queues and test them to make sure they
4780  * are working.
4781  * Returns 0 for success, error for failure.
4782  */
4783 static int cik_cp_compute_resume(struct radeon_device *rdev)
4784 {
4785 	int r, i, j, idx;
4786 	u32 tmp;
4787 	bool use_doorbell = true;
4788 	u64 hqd_gpu_addr;
4789 	u64 mqd_gpu_addr;
4790 	u64 eop_gpu_addr;
4791 	u64 wb_gpu_addr;
4792 	u32 *buf;
4793 	struct bonaire_mqd *mqd;
4794 
4795 	r = cik_cp_compute_start(rdev);
4796 	if (r)
4797 		return r;
4798 
4799 	/* fix up chicken bits */
4800 	tmp = RREG32(CP_CPF_DEBUG);
4801 	tmp |= (1 << 23);
4802 	WREG32(CP_CPF_DEBUG, tmp);
4803 
4804 	/* init the pipes */
4805 	spin_lock(&rdev->srbm_mutex);
4806 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4807 		int me = (i < 4) ? 1 : 2;
4808 		int pipe = (i < 4) ? i : (i - 4);
4809 
4810 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4811 
4812 		cik_srbm_select(rdev, me, pipe, 0, 0);
4813 
4814 		/* write the EOP addr */
4815 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4816 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4817 
4818 		/* set the VMID assigned */
4819 		WREG32(CP_HPD_EOP_VMID, 0);
4820 
4821 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4822 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4823 		tmp &= ~EOP_SIZE_MASK;
4824 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4825 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4826 	}
4827 	cik_srbm_select(rdev, 0, 0, 0, 0);
4828 	spin_unlock(&rdev->srbm_mutex);
4829 
4830 	/* init the queues.  Just two for now. */
4831 	for (i = 0; i < 2; i++) {
4832 		if (i == 0)
4833 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4834 		else
4835 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4836 
4837 		if (rdev->ring[idx].mqd_obj == NULL) {
4838 			r = radeon_bo_create(rdev,
4839 					     sizeof(struct bonaire_mqd),
4840 					     PAGE_SIZE, true,
4841 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4842 					     &rdev->ring[idx].mqd_obj);
4843 			if (r) {
4844 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4845 				return r;
4846 			}
4847 		}
4848 
4849 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4850 		if (unlikely(r != 0)) {
4851 			cik_cp_compute_fini(rdev);
4852 			return r;
4853 		}
4854 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4855 				  &mqd_gpu_addr);
4856 		if (r) {
4857 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4858 			cik_cp_compute_fini(rdev);
4859 			return r;
4860 		}
4861 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4862 		if (r) {
4863 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4864 			cik_cp_compute_fini(rdev);
4865 			return r;
4866 		}
4867 
4868 		/* init the mqd struct */
4869 		memset(buf, 0, sizeof(struct bonaire_mqd));
4870 
4871 		mqd = (struct bonaire_mqd *)buf;
4872 		mqd->header = 0xC0310800;
4873 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4874 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4875 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4876 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4877 
4878 		spin_lock(&rdev->srbm_mutex);
4879 		cik_srbm_select(rdev, rdev->ring[idx].me,
4880 				rdev->ring[idx].pipe,
4881 				rdev->ring[idx].queue, 0);
4882 
4883 		/* disable wptr polling */
4884 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4885 		tmp &= ~WPTR_POLL_EN;
4886 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4887 
4888 		/* enable doorbell? */
4889 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4890 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4891 		if (use_doorbell)
4892 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4893 		else
4894 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4895 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4896 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4897 
4898 		/* disable the queue if it's active */
4899 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4900 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4901 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4902 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4903 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4904 			for (j = 0; j < rdev->usec_timeout; j++) {
4905 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4906 					break;
4907 				udelay(1);
4908 			}
4909 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4910 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4911 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4912 		}
4913 
4914 		/* set the pointer to the MQD */
4915 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4916 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4917 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4918 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4919 		/* set MQD vmid to 0 */
4920 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4921 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4922 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4923 
4924 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4925 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4926 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4927 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4928 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4929 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4930 
4931 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4932 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4933 		mqd->queue_state.cp_hqd_pq_control &=
4934 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4935 
4936 		mqd->queue_state.cp_hqd_pq_control |=
4937 			order_base_2(rdev->ring[idx].ring_size / 8);
4938 		mqd->queue_state.cp_hqd_pq_control |=
4939 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4940 #ifdef __BIG_ENDIAN
4941 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4942 #endif
4943 		mqd->queue_state.cp_hqd_pq_control &=
4944 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4945 		mqd->queue_state.cp_hqd_pq_control |=
4946 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4947 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4948 
4949 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4950 		if (i == 0)
4951 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4952 		else
4953 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4954 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4955 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4956 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4957 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4958 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4959 
4960 		/* set the wb address wether it's enabled or not */
4961 		if (i == 0)
4962 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4963 		else
4964 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4965 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4966 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4967 			upper_32_bits(wb_gpu_addr) & 0xffff;
4968 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4969 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4970 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4971 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4972 
4973 		/* enable the doorbell if requested */
4974 		if (use_doorbell) {
4975 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4976 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4977 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4978 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4979 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4980 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4981 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4982 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4983 
4984 		} else {
4985 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4986 		}
4987 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4988 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4989 
4990 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4991 		rdev->ring[idx].wptr = 0;
4992 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4993 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4994 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4995 
4996 		/* set the vmid for the queue */
4997 		mqd->queue_state.cp_hqd_vmid = 0;
4998 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4999 
5000 		/* activate the queue */
5001 		mqd->queue_state.cp_hqd_active = 1;
5002 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5003 
5004 		cik_srbm_select(rdev, 0, 0, 0, 0);
5005 		spin_unlock(&rdev->srbm_mutex);
5006 
5007 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5008 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5009 
5010 		rdev->ring[idx].ready = true;
5011 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5012 		if (r)
5013 			rdev->ring[idx].ready = false;
5014 	}
5015 
5016 	return 0;
5017 }
5018 
5019 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5020 {
5021 	cik_cp_gfx_enable(rdev, enable);
5022 	cik_cp_compute_enable(rdev, enable);
5023 }
5024 
5025 static int cik_cp_load_microcode(struct radeon_device *rdev)
5026 {
5027 	int r;
5028 
5029 	r = cik_cp_gfx_load_microcode(rdev);
5030 	if (r)
5031 		return r;
5032 	r = cik_cp_compute_load_microcode(rdev);
5033 	if (r)
5034 		return r;
5035 
5036 	return 0;
5037 }
5038 
5039 static void cik_cp_fini(struct radeon_device *rdev)
5040 {
5041 	cik_cp_gfx_fini(rdev);
5042 	cik_cp_compute_fini(rdev);
5043 }
5044 
5045 static int cik_cp_resume(struct radeon_device *rdev)
5046 {
5047 	int r;
5048 
5049 	cik_enable_gui_idle_interrupt(rdev, false);
5050 
5051 	r = cik_cp_load_microcode(rdev);
5052 	if (r)
5053 		return r;
5054 
5055 	r = cik_cp_gfx_resume(rdev);
5056 	if (r)
5057 		return r;
5058 	r = cik_cp_compute_resume(rdev);
5059 	if (r)
5060 		return r;
5061 
5062 	cik_enable_gui_idle_interrupt(rdev, true);
5063 
5064 	return 0;
5065 }
5066 
5067 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5068 {
5069 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5070 		RREG32(GRBM_STATUS));
5071 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5072 		RREG32(GRBM_STATUS2));
5073 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5074 		RREG32(GRBM_STATUS_SE0));
5075 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5076 		RREG32(GRBM_STATUS_SE1));
5077 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5078 		RREG32(GRBM_STATUS_SE2));
5079 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5080 		RREG32(GRBM_STATUS_SE3));
5081 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5082 		RREG32(SRBM_STATUS));
5083 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5084 		RREG32(SRBM_STATUS2));
5085 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5086 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5087 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5088 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5089 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5090 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5091 		 RREG32(CP_STALLED_STAT1));
5092 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5093 		 RREG32(CP_STALLED_STAT2));
5094 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5095 		 RREG32(CP_STALLED_STAT3));
5096 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5097 		 RREG32(CP_CPF_BUSY_STAT));
5098 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5099 		 RREG32(CP_CPF_STALLED_STAT1));
5100 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5101 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5102 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5103 		 RREG32(CP_CPC_STALLED_STAT1));
5104 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5105 }
5106 
5107 /**
5108  * cik_gpu_check_soft_reset - check which blocks are busy
5109  *
5110  * @rdev: radeon_device pointer
5111  *
5112  * Check which blocks are busy and return the relevant reset
5113  * mask to be used by cik_gpu_soft_reset().
5114  * Returns a mask of the blocks to be reset.
5115  */
5116 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5117 {
5118 	u32 reset_mask = 0;
5119 	u32 tmp;
5120 
5121 	/* GRBM_STATUS */
5122 	tmp = RREG32(GRBM_STATUS);
5123 	if (tmp & (PA_BUSY | SC_BUSY |
5124 		   BCI_BUSY | SX_BUSY |
5125 		   TA_BUSY | VGT_BUSY |
5126 		   DB_BUSY | CB_BUSY |
5127 		   GDS_BUSY | SPI_BUSY |
5128 		   IA_BUSY | IA_BUSY_NO_DMA))
5129 		reset_mask |= RADEON_RESET_GFX;
5130 
5131 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5132 		reset_mask |= RADEON_RESET_CP;
5133 
5134 	/* GRBM_STATUS2 */
5135 	tmp = RREG32(GRBM_STATUS2);
5136 	if (tmp & RLC_BUSY)
5137 		reset_mask |= RADEON_RESET_RLC;
5138 
5139 	/* SDMA0_STATUS_REG */
5140 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5141 	if (!(tmp & SDMA_IDLE))
5142 		reset_mask |= RADEON_RESET_DMA;
5143 
5144 	/* SDMA1_STATUS_REG */
5145 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5146 	if (!(tmp & SDMA_IDLE))
5147 		reset_mask |= RADEON_RESET_DMA1;
5148 
5149 	/* SRBM_STATUS2 */
5150 	tmp = RREG32(SRBM_STATUS2);
5151 	if (tmp & SDMA_BUSY)
5152 		reset_mask |= RADEON_RESET_DMA;
5153 
5154 	if (tmp & SDMA1_BUSY)
5155 		reset_mask |= RADEON_RESET_DMA1;
5156 
5157 	/* SRBM_STATUS */
5158 	tmp = RREG32(SRBM_STATUS);
5159 
5160 	if (tmp & IH_BUSY)
5161 		reset_mask |= RADEON_RESET_IH;
5162 
5163 	if (tmp & SEM_BUSY)
5164 		reset_mask |= RADEON_RESET_SEM;
5165 
5166 	if (tmp & GRBM_RQ_PENDING)
5167 		reset_mask |= RADEON_RESET_GRBM;
5168 
5169 	if (tmp & VMC_BUSY)
5170 		reset_mask |= RADEON_RESET_VMC;
5171 
5172 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5173 		   MCC_BUSY | MCD_BUSY))
5174 		reset_mask |= RADEON_RESET_MC;
5175 
5176 	if (evergreen_is_display_hung(rdev))
5177 		reset_mask |= RADEON_RESET_DISPLAY;
5178 
5179 	/* Skip MC reset as it's mostly likely not hung, just busy */
5180 	if (reset_mask & RADEON_RESET_MC) {
5181 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5182 		reset_mask &= ~RADEON_RESET_MC;
5183 	}
5184 
5185 	return reset_mask;
5186 }
5187 
5188 /**
5189  * cik_gpu_soft_reset - soft reset GPU
5190  *
5191  * @rdev: radeon_device pointer
5192  * @reset_mask: mask of which blocks to reset
5193  *
5194  * Soft reset the blocks specified in @reset_mask.
5195  */
5196 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5197 {
5198 	struct evergreen_mc_save save;
5199 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5200 	u32 tmp;
5201 
5202 	if (reset_mask == 0)
5203 		return;
5204 
5205 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5206 
5207 	cik_print_gpu_status_regs(rdev);
5208 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5209 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5210 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5211 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5212 
5213 	/* disable CG/PG */
5214 	cik_fini_pg(rdev);
5215 	cik_fini_cg(rdev);
5216 
5217 	/* stop the rlc */
5218 	cik_rlc_stop(rdev);
5219 
5220 	/* Disable GFX parsing/prefetching */
5221 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5222 
5223 	/* Disable MEC parsing/prefetching */
5224 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5225 
5226 	if (reset_mask & RADEON_RESET_DMA) {
5227 		/* sdma0 */
5228 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5229 		tmp |= SDMA_HALT;
5230 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5231 	}
5232 	if (reset_mask & RADEON_RESET_DMA1) {
5233 		/* sdma1 */
5234 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5235 		tmp |= SDMA_HALT;
5236 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5237 	}
5238 
5239 	evergreen_mc_stop(rdev, &save);
5240 	if (evergreen_mc_wait_for_idle(rdev)) {
5241 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5242 	}
5243 
5244 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5245 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5246 
5247 	if (reset_mask & RADEON_RESET_CP) {
5248 		grbm_soft_reset |= SOFT_RESET_CP;
5249 
5250 		srbm_soft_reset |= SOFT_RESET_GRBM;
5251 	}
5252 
5253 	if (reset_mask & RADEON_RESET_DMA)
5254 		srbm_soft_reset |= SOFT_RESET_SDMA;
5255 
5256 	if (reset_mask & RADEON_RESET_DMA1)
5257 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5258 
5259 	if (reset_mask & RADEON_RESET_DISPLAY)
5260 		srbm_soft_reset |= SOFT_RESET_DC;
5261 
5262 	if (reset_mask & RADEON_RESET_RLC)
5263 		grbm_soft_reset |= SOFT_RESET_RLC;
5264 
5265 	if (reset_mask & RADEON_RESET_SEM)
5266 		srbm_soft_reset |= SOFT_RESET_SEM;
5267 
5268 	if (reset_mask & RADEON_RESET_IH)
5269 		srbm_soft_reset |= SOFT_RESET_IH;
5270 
5271 	if (reset_mask & RADEON_RESET_GRBM)
5272 		srbm_soft_reset |= SOFT_RESET_GRBM;
5273 
5274 	if (reset_mask & RADEON_RESET_VMC)
5275 		srbm_soft_reset |= SOFT_RESET_VMC;
5276 
5277 	if (!(rdev->flags & RADEON_IS_IGP)) {
5278 		if (reset_mask & RADEON_RESET_MC)
5279 			srbm_soft_reset |= SOFT_RESET_MC;
5280 	}
5281 
5282 	if (grbm_soft_reset) {
5283 		tmp = RREG32(GRBM_SOFT_RESET);
5284 		tmp |= grbm_soft_reset;
5285 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5286 		WREG32(GRBM_SOFT_RESET, tmp);
5287 		tmp = RREG32(GRBM_SOFT_RESET);
5288 
5289 		udelay(50);
5290 
5291 		tmp &= ~grbm_soft_reset;
5292 		WREG32(GRBM_SOFT_RESET, tmp);
5293 		tmp = RREG32(GRBM_SOFT_RESET);
5294 	}
5295 
5296 	if (srbm_soft_reset) {
5297 		tmp = RREG32(SRBM_SOFT_RESET);
5298 		tmp |= srbm_soft_reset;
5299 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5300 		WREG32(SRBM_SOFT_RESET, tmp);
5301 		tmp = RREG32(SRBM_SOFT_RESET);
5302 
5303 		udelay(50);
5304 
5305 		tmp &= ~srbm_soft_reset;
5306 		WREG32(SRBM_SOFT_RESET, tmp);
5307 		tmp = RREG32(SRBM_SOFT_RESET);
5308 	}
5309 
5310 	/* Wait a little for things to settle down */
5311 	udelay(50);
5312 
5313 	evergreen_mc_resume(rdev, &save);
5314 	udelay(50);
5315 
5316 	cik_print_gpu_status_regs(rdev);
5317 }
5318 
5319 struct kv_reset_save_regs {
5320 	u32 gmcon_reng_execute;
5321 	u32 gmcon_misc;
5322 	u32 gmcon_misc3;
5323 };
5324 
5325 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5326 				   struct kv_reset_save_regs *save)
5327 {
5328 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5329 	save->gmcon_misc = RREG32(GMCON_MISC);
5330 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5331 
5332 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5333 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5334 						STCTRL_STUTTER_EN));
5335 }
5336 
5337 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5338 				      struct kv_reset_save_regs *save)
5339 {
5340 	int i;
5341 
5342 	WREG32(GMCON_PGFSM_WRITE, 0);
5343 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5344 
5345 	for (i = 0; i < 5; i++)
5346 		WREG32(GMCON_PGFSM_WRITE, 0);
5347 
5348 	WREG32(GMCON_PGFSM_WRITE, 0);
5349 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5350 
5351 	for (i = 0; i < 5; i++)
5352 		WREG32(GMCON_PGFSM_WRITE, 0);
5353 
5354 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5355 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5356 
5357 	for (i = 0; i < 5; i++)
5358 		WREG32(GMCON_PGFSM_WRITE, 0);
5359 
5360 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5361 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5362 
5363 	for (i = 0; i < 5; i++)
5364 		WREG32(GMCON_PGFSM_WRITE, 0);
5365 
5366 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5367 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5368 
5369 	for (i = 0; i < 5; i++)
5370 		WREG32(GMCON_PGFSM_WRITE, 0);
5371 
5372 	WREG32(GMCON_PGFSM_WRITE, 0);
5373 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5374 
5375 	for (i = 0; i < 5; i++)
5376 		WREG32(GMCON_PGFSM_WRITE, 0);
5377 
5378 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5379 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5380 
5381 	for (i = 0; i < 5; i++)
5382 		WREG32(GMCON_PGFSM_WRITE, 0);
5383 
5384 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5385 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5386 
5387 	for (i = 0; i < 5; i++)
5388 		WREG32(GMCON_PGFSM_WRITE, 0);
5389 
5390 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5391 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5392 
5393 	for (i = 0; i < 5; i++)
5394 		WREG32(GMCON_PGFSM_WRITE, 0);
5395 
5396 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5397 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5398 
5399 	for (i = 0; i < 5; i++)
5400 		WREG32(GMCON_PGFSM_WRITE, 0);
5401 
5402 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5403 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5404 
5405 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5406 	WREG32(GMCON_MISC, save->gmcon_misc);
5407 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5408 }
5409 
5410 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5411 {
5412 	struct evergreen_mc_save save;
5413 	struct kv_reset_save_regs kv_save = { 0 };
5414 	u32 tmp, i;
5415 
5416 	dev_info(rdev->dev, "GPU pci config reset\n");
5417 
5418 	/* disable dpm? */
5419 
5420 	/* disable cg/pg */
5421 	cik_fini_pg(rdev);
5422 	cik_fini_cg(rdev);
5423 
5424 	/* Disable GFX parsing/prefetching */
5425 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5426 
5427 	/* Disable MEC parsing/prefetching */
5428 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5429 
5430 	/* sdma0 */
5431 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5432 	tmp |= SDMA_HALT;
5433 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5434 	/* sdma1 */
5435 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5436 	tmp |= SDMA_HALT;
5437 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5438 	/* XXX other engines? */
5439 
5440 	/* halt the rlc, disable cp internal ints */
5441 	cik_rlc_stop(rdev);
5442 
5443 	udelay(50);
5444 
5445 	/* disable mem access */
5446 	evergreen_mc_stop(rdev, &save);
5447 	if (evergreen_mc_wait_for_idle(rdev)) {
5448 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5449 	}
5450 
5451 	if (rdev->flags & RADEON_IS_IGP)
5452 		kv_save_regs_for_reset(rdev, &kv_save);
5453 
5454 	/* disable BM */
5455 	pci_disable_busmaster(rdev->pdev->dev);
5456 	/* reset */
5457 	radeon_pci_config_reset(rdev);
5458 
5459 	udelay(100);
5460 
5461 	/* wait for asic to come out of reset */
5462 	for (i = 0; i < rdev->usec_timeout; i++) {
5463 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5464 			break;
5465 		udelay(1);
5466 	}
5467 
5468 	/* does asic init need to be run first??? */
5469 	if (rdev->flags & RADEON_IS_IGP)
5470 		kv_restore_regs_for_reset(rdev, &kv_save);
5471 }
5472 
5473 /**
5474  * cik_asic_reset - soft reset GPU
5475  *
5476  * @rdev: radeon_device pointer
5477  *
5478  * Look up which blocks are hung and attempt
5479  * to reset them.
5480  * Returns 0 for success.
5481  */
5482 int cik_asic_reset(struct radeon_device *rdev)
5483 {
5484 	u32 reset_mask;
5485 
5486 	reset_mask = cik_gpu_check_soft_reset(rdev);
5487 
5488 	if (reset_mask)
5489 		r600_set_bios_scratch_engine_hung(rdev, true);
5490 
5491 	/* try soft reset */
5492 	cik_gpu_soft_reset(rdev, reset_mask);
5493 
5494 	reset_mask = cik_gpu_check_soft_reset(rdev);
5495 
5496 	/* try pci config reset */
5497 	if (reset_mask && radeon_hard_reset)
5498 		cik_gpu_pci_config_reset(rdev);
5499 
5500 	reset_mask = cik_gpu_check_soft_reset(rdev);
5501 
5502 	if (!reset_mask)
5503 		r600_set_bios_scratch_engine_hung(rdev, false);
5504 
5505 	return 0;
5506 }
5507 
5508 /**
5509  * cik_gfx_is_lockup - check if the 3D engine is locked up
5510  *
5511  * @rdev: radeon_device pointer
5512  * @ring: radeon_ring structure holding ring information
5513  *
5514  * Check if the 3D engine is locked up (CIK).
5515  * Returns true if the engine is locked, false if not.
5516  */
5517 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5518 {
5519 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5520 
5521 	if (!(reset_mask & (RADEON_RESET_GFX |
5522 			    RADEON_RESET_COMPUTE |
5523 			    RADEON_RESET_CP))) {
5524 		radeon_ring_lockup_update(rdev, ring);
5525 		return false;
5526 	}
5527 	return radeon_ring_test_lockup(rdev, ring);
5528 }
5529 
5530 /* MC */
5531 /**
5532  * cik_mc_program - program the GPU memory controller
5533  *
5534  * @rdev: radeon_device pointer
5535  *
5536  * Set the location of vram, gart, and AGP in the GPU's
5537  * physical address space (CIK).
5538  */
5539 static void cik_mc_program(struct radeon_device *rdev)
5540 {
5541 	struct evergreen_mc_save save;
5542 	u32 tmp;
5543 	int i, j;
5544 
5545 	/* Initialize HDP */
5546 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5547 		WREG32((0x2c14 + j), 0x00000000);
5548 		WREG32((0x2c18 + j), 0x00000000);
5549 		WREG32((0x2c1c + j), 0x00000000);
5550 		WREG32((0x2c20 + j), 0x00000000);
5551 		WREG32((0x2c24 + j), 0x00000000);
5552 	}
5553 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5554 
5555 	evergreen_mc_stop(rdev, &save);
5556 	if (radeon_mc_wait_for_idle(rdev)) {
5557 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5558 	}
5559 	/* Lockout access through VGA aperture*/
5560 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5561 	/* Update configuration */
5562 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5563 	       rdev->mc.vram_start >> 12);
5564 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5565 	       rdev->mc.vram_end >> 12);
5566 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5567 	       rdev->vram_scratch.gpu_addr >> 12);
5568 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5569 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5570 	WREG32(MC_VM_FB_LOCATION, tmp);
5571 	/* XXX double check these! */
5572 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5573 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5574 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5575 	WREG32(MC_VM_AGP_BASE, 0);
5576 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5577 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5578 	if (radeon_mc_wait_for_idle(rdev)) {
5579 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5580 	}
5581 	evergreen_mc_resume(rdev, &save);
5582 	/* we need to own VRAM, so turn off the VGA renderer here
5583 	 * to stop it overwriting our objects */
5584 	rv515_vga_render_disable(rdev);
5585 }
5586 
5587 /**
5588  * cik_mc_init - initialize the memory controller driver params
5589  *
5590  * @rdev: radeon_device pointer
5591  *
5592  * Look up the amount of vram, vram width, and decide how to place
5593  * vram and gart within the GPU's physical address space (CIK).
5594  * Returns 0 for success.
5595  */
5596 static int cik_mc_init(struct radeon_device *rdev)
5597 {
5598 	u32 tmp;
5599 	int chansize, numchan;
5600 
5601 	/* Get VRAM informations */
5602 	rdev->mc.vram_is_ddr = true;
5603 	tmp = RREG32(MC_ARB_RAMCFG);
5604 	if (tmp & CHANSIZE_MASK) {
5605 		chansize = 64;
5606 	} else {
5607 		chansize = 32;
5608 	}
5609 	tmp = RREG32(MC_SHARED_CHMAP);
5610 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5611 	case 0:
5612 	default:
5613 		numchan = 1;
5614 		break;
5615 	case 1:
5616 		numchan = 2;
5617 		break;
5618 	case 2:
5619 		numchan = 4;
5620 		break;
5621 	case 3:
5622 		numchan = 8;
5623 		break;
5624 	case 4:
5625 		numchan = 3;
5626 		break;
5627 	case 5:
5628 		numchan = 6;
5629 		break;
5630 	case 6:
5631 		numchan = 10;
5632 		break;
5633 	case 7:
5634 		numchan = 12;
5635 		break;
5636 	case 8:
5637 		numchan = 16;
5638 		break;
5639 	}
5640 	rdev->mc.vram_width = numchan * chansize;
5641 	/* Could aper size report 0 ? */
5642 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5643 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5644 	/* size in MB on si */
5645 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5646 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5647 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5648 	si_vram_gtt_location(rdev, &rdev->mc);
5649 	radeon_update_bandwidth_info(rdev);
5650 
5651 	return 0;
5652 }
5653 
5654 /*
5655  * GART
5656  * VMID 0 is the physical GPU addresses as used by the kernel.
5657  * VMIDs 1-15 are used for userspace clients and are handled
5658  * by the radeon vm/hsa code.
5659  */
5660 /**
5661  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5662  *
5663  * @rdev: radeon_device pointer
5664  *
5665  * Flush the TLB for the VMID 0 page table (CIK).
5666  */
5667 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5668 {
5669 	/* flush hdp cache */
5670 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5671 
5672 	/* bits 0-15 are the VM contexts0-15 */
5673 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5674 }
5675 
5676 /**
5677  * cik_pcie_gart_enable - gart enable
5678  *
5679  * @rdev: radeon_device pointer
5680  *
5681  * This sets up the TLBs, programs the page tables for VMID0,
5682  * sets up the hw for VMIDs 1-15 which are allocated on
5683  * demand, and sets up the global locations for the LDS, GDS,
5684  * and GPUVM for FSA64 clients (CIK).
5685  * Returns 0 for success, errors for failure.
5686  */
5687 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5688 {
5689 	int r, i;
5690 
5691 	if (rdev->gart.robj == NULL) {
5692 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5693 		return -EINVAL;
5694 	}
5695 	r = radeon_gart_table_vram_pin(rdev);
5696 	if (r)
5697 		return r;
5698 	/* Setup TLB control */
5699 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5700 	       (0xA << 7) |
5701 	       ENABLE_L1_TLB |
5702 	       ENABLE_L1_FRAGMENT_PROCESSING |
5703 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5704 	       ENABLE_ADVANCED_DRIVER_MODEL |
5705 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5706 	/* Setup L2 cache */
5707 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5708 	       ENABLE_L2_FRAGMENT_PROCESSING |
5709 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5710 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5711 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5712 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5713 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5714 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5715 	       BANK_SELECT(4) |
5716 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5717 	/* setup context0 */
5718 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5719 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5720 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5721 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5722 			(u32)(rdev->dummy_page.addr >> 12));
5723 	WREG32(VM_CONTEXT0_CNTL2, 0);
5724 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5725 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5726 
5727 	WREG32(0x15D4, 0);
5728 	WREG32(0x15D8, 0);
5729 	WREG32(0x15DC, 0);
5730 
5731 	/* restore context1-15 */
5732 	/* set vm size, must be a multiple of 4 */
5733 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5734 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5735 	for (i = 1; i < 16; i++) {
5736 		if (i < 8)
5737 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5738 			       rdev->vm_manager.saved_table_addr[i]);
5739 		else
5740 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5741 			       rdev->vm_manager.saved_table_addr[i]);
5742 	}
5743 
5744 	/* enable context1-15 */
5745 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5746 	       (u32)(rdev->dummy_page.addr >> 12));
5747 	WREG32(VM_CONTEXT1_CNTL2, 4);
5748 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5749 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5750 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5751 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5752 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5753 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5754 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5755 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5756 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5757 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5758 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5759 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5760 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5761 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5762 
5763 	if (rdev->family == CHIP_KAVERI) {
5764 		u32 tmp = RREG32(CHUB_CONTROL);
5765 		tmp &= ~BYPASS_VM;
5766 		WREG32(CHUB_CONTROL, tmp);
5767 	}
5768 
5769 	/* XXX SH_MEM regs */
5770 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5771 	spin_lock(&rdev->srbm_mutex);
5772 	for (i = 0; i < 16; i++) {
5773 		cik_srbm_select(rdev, 0, 0, 0, i);
5774 		/* CP and shaders */
5775 		WREG32(SH_MEM_CONFIG, 0);
5776 		WREG32(SH_MEM_APE1_BASE, 1);
5777 		WREG32(SH_MEM_APE1_LIMIT, 0);
5778 		WREG32(SH_MEM_BASES, 0);
5779 		/* SDMA GFX */
5780 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5781 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5782 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5783 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5784 		/* XXX SDMA RLC - todo */
5785 	}
5786 	cik_srbm_select(rdev, 0, 0, 0, 0);
5787 	spin_unlock(&rdev->srbm_mutex);
5788 
5789 	cik_pcie_gart_tlb_flush(rdev);
5790 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5791 		 (unsigned)(rdev->mc.gtt_size >> 20),
5792 		 (unsigned long long)rdev->gart.table_addr);
5793 	rdev->gart.ready = true;
5794 	return 0;
5795 }
5796 
5797 /**
5798  * cik_pcie_gart_disable - gart disable
5799  *
5800  * @rdev: radeon_device pointer
5801  *
5802  * This disables all VM page table (CIK).
5803  */
5804 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5805 {
5806 	unsigned i;
5807 
5808 	for (i = 1; i < 16; ++i) {
5809 		uint32_t reg;
5810 		if (i < 8)
5811 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5812 		else
5813 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5814 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5815 	}
5816 
5817 	/* Disable all tables */
5818 	WREG32(VM_CONTEXT0_CNTL, 0);
5819 	WREG32(VM_CONTEXT1_CNTL, 0);
5820 	/* Setup TLB control */
5821 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5822 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5823 	/* Setup L2 cache */
5824 	WREG32(VM_L2_CNTL,
5825 	       ENABLE_L2_FRAGMENT_PROCESSING |
5826 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5827 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5828 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5829 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5830 	WREG32(VM_L2_CNTL2, 0);
5831 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5832 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5833 	radeon_gart_table_vram_unpin(rdev);
5834 }
5835 
5836 /**
5837  * cik_pcie_gart_fini - vm fini callback
5838  *
5839  * @rdev: radeon_device pointer
5840  *
5841  * Tears down the driver GART/VM setup (CIK).
5842  */
5843 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5844 {
5845 	cik_pcie_gart_disable(rdev);
5846 	radeon_gart_table_vram_free(rdev);
5847 	radeon_gart_fini(rdev);
5848 }
5849 
5850 /* vm parser */
5851 /**
5852  * cik_ib_parse - vm ib_parse callback
5853  *
5854  * @rdev: radeon_device pointer
5855  * @ib: indirect buffer pointer
5856  *
5857  * CIK uses hw IB checking so this is a nop (CIK).
5858  */
5859 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5860 {
5861 	return 0;
5862 }
5863 
5864 /*
5865  * vm
5866  * VMID 0 is the physical GPU addresses as used by the kernel.
5867  * VMIDs 1-15 are used for userspace clients and are handled
5868  * by the radeon vm/hsa code.
5869  */
5870 /**
5871  * cik_vm_init - cik vm init callback
5872  *
5873  * @rdev: radeon_device pointer
5874  *
5875  * Inits cik specific vm parameters (number of VMs, base of vram for
5876  * VMIDs 1-15) (CIK).
5877  * Returns 0 for success.
5878  */
5879 int cik_vm_init(struct radeon_device *rdev)
5880 {
5881 	/* number of VMs */
5882 	rdev->vm_manager.nvm = 16;
5883 	/* base offset of vram pages */
5884 	if (rdev->flags & RADEON_IS_IGP) {
5885 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5886 		tmp <<= 22;
5887 		rdev->vm_manager.vram_base_offset = tmp;
5888 	} else
5889 		rdev->vm_manager.vram_base_offset = 0;
5890 
5891 	return 0;
5892 }
5893 
5894 /**
5895  * cik_vm_fini - cik vm fini callback
5896  *
5897  * @rdev: radeon_device pointer
5898  *
5899  * Tear down any asic specific VM setup (CIK).
5900  */
5901 void cik_vm_fini(struct radeon_device *rdev)
5902 {
5903 }
5904 
5905 /**
5906  * cik_vm_decode_fault - print human readable fault info
5907  *
5908  * @rdev: radeon_device pointer
5909  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5910  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5911  *
5912  * Print human readable fault information (CIK).
5913  */
5914 static void cik_vm_decode_fault(struct radeon_device *rdev,
5915 				u32 status, u32 addr, u32 mc_client)
5916 {
5917 	u32 mc_id;
5918 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5919 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5920 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5921 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5922 
5923 	if (rdev->family == CHIP_HAWAII)
5924 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5925 	else
5926 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5927 
5928 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5929 	       protections, vmid, addr,
5930 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5931 	       block, mc_client, mc_id);
5932 }
5933 
5934 /**
5935  * cik_vm_flush - cik vm flush using the CP
5936  *
5937  * @rdev: radeon_device pointer
5938  *
5939  * Update the page table base and flush the VM TLB
5940  * using the CP (CIK).
5941  */
5942 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5943 {
5944 	struct radeon_ring *ring = &rdev->ring[ridx];
5945 	int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
5946 
5947 	if (vm == NULL)
5948 		return;
5949 
5950 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5951 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5952 				 WRITE_DATA_DST_SEL(0)));
5953 	if (vm->id < 8) {
5954 		radeon_ring_write(ring,
5955 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5956 	} else {
5957 		radeon_ring_write(ring,
5958 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5959 	}
5960 	radeon_ring_write(ring, 0);
5961 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5962 
5963 	/* update SH_MEM_* regs */
5964 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5965 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5966 				 WRITE_DATA_DST_SEL(0)));
5967 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5968 	radeon_ring_write(ring, 0);
5969 	radeon_ring_write(ring, VMID(vm->id));
5970 
5971 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5972 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5973 				 WRITE_DATA_DST_SEL(0)));
5974 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5975 	radeon_ring_write(ring, 0);
5976 
5977 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5978 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5979 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5980 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5981 
5982 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5983 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5984 				 WRITE_DATA_DST_SEL(0)));
5985 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5986 	radeon_ring_write(ring, 0);
5987 	radeon_ring_write(ring, VMID(0));
5988 
5989 	/* HDP flush */
5990 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5991 
5992 	/* bits 0-15 are the VM contexts0-15 */
5993 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5994 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5995 				 WRITE_DATA_DST_SEL(0)));
5996 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5997 	radeon_ring_write(ring, 0);
5998 	radeon_ring_write(ring, 1 << vm->id);
5999 
6000 	/* compute doesn't have PFP */
6001 	if (usepfp) {
6002 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6003 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6004 		radeon_ring_write(ring, 0x0);
6005 	}
6006 }
6007 
6008 /*
6009  * RLC
6010  * The RLC is a multi-purpose microengine that handles a
6011  * variety of functions, the most important of which is
6012  * the interrupt controller.
6013  */
6014 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6015 					  bool enable)
6016 {
6017 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6018 
6019 	if (enable)
6020 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6021 	else
6022 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6023 	WREG32(CP_INT_CNTL_RING0, tmp);
6024 }
6025 
6026 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6027 {
6028 	u32 tmp;
6029 
6030 	tmp = RREG32(RLC_LB_CNTL);
6031 	if (enable)
6032 		tmp |= LOAD_BALANCE_ENABLE;
6033 	else
6034 		tmp &= ~LOAD_BALANCE_ENABLE;
6035 	WREG32(RLC_LB_CNTL, tmp);
6036 }
6037 
6038 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6039 {
6040 	u32 i, j, k;
6041 	u32 mask;
6042 
6043 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6044 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6045 			cik_select_se_sh(rdev, i, j);
6046 			for (k = 0; k < rdev->usec_timeout; k++) {
6047 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6048 					break;
6049 				udelay(1);
6050 			}
6051 		}
6052 	}
6053 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6054 
6055 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6056 	for (k = 0; k < rdev->usec_timeout; k++) {
6057 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6058 			break;
6059 		udelay(1);
6060 	}
6061 }
6062 
6063 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6064 {
6065 	u32 tmp;
6066 
6067 	tmp = RREG32(RLC_CNTL);
6068 	if (tmp != rlc)
6069 		WREG32(RLC_CNTL, rlc);
6070 }
6071 
6072 static u32 cik_halt_rlc(struct radeon_device *rdev)
6073 {
6074 	u32 data, orig;
6075 
6076 	orig = data = RREG32(RLC_CNTL);
6077 
6078 	if (data & RLC_ENABLE) {
6079 		u32 i;
6080 
6081 		data &= ~RLC_ENABLE;
6082 		WREG32(RLC_CNTL, data);
6083 
6084 		for (i = 0; i < rdev->usec_timeout; i++) {
6085 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6086 				break;
6087 			udelay(1);
6088 		}
6089 
6090 		cik_wait_for_rlc_serdes(rdev);
6091 	}
6092 
6093 	return orig;
6094 }
6095 
6096 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6097 {
6098 	u32 tmp, i, mask;
6099 
6100 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6101 	WREG32(RLC_GPR_REG2, tmp);
6102 
6103 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6104 	for (i = 0; i < rdev->usec_timeout; i++) {
6105 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6106 			break;
6107 		udelay(1);
6108 	}
6109 
6110 	for (i = 0; i < rdev->usec_timeout; i++) {
6111 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6112 			break;
6113 		udelay(1);
6114 	}
6115 }
6116 
6117 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6118 {
6119 	u32 tmp;
6120 
6121 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6122 	WREG32(RLC_GPR_REG2, tmp);
6123 }
6124 
6125 /**
6126  * cik_rlc_stop - stop the RLC ME
6127  *
6128  * @rdev: radeon_device pointer
6129  *
6130  * Halt the RLC ME (MicroEngine) (CIK).
6131  */
6132 static void cik_rlc_stop(struct radeon_device *rdev)
6133 {
6134 	WREG32(RLC_CNTL, 0);
6135 
6136 	cik_enable_gui_idle_interrupt(rdev, false);
6137 
6138 	cik_wait_for_rlc_serdes(rdev);
6139 }
6140 
6141 /**
6142  * cik_rlc_start - start the RLC ME
6143  *
6144  * @rdev: radeon_device pointer
6145  *
6146  * Unhalt the RLC ME (MicroEngine) (CIK).
6147  */
6148 static void cik_rlc_start(struct radeon_device *rdev)
6149 {
6150 	WREG32(RLC_CNTL, RLC_ENABLE);
6151 
6152 	cik_enable_gui_idle_interrupt(rdev, true);
6153 
6154 	udelay(50);
6155 }
6156 
6157 /**
6158  * cik_rlc_resume - setup the RLC hw
6159  *
6160  * @rdev: radeon_device pointer
6161  *
6162  * Initialize the RLC registers, load the ucode,
6163  * and start the RLC (CIK).
6164  * Returns 0 for success, -EINVAL if the ucode is not available.
6165  */
6166 static int cik_rlc_resume(struct radeon_device *rdev)
6167 {
6168 	u32 i, size, tmp;
6169 
6170 	if (!rdev->rlc_fw)
6171 		return -EINVAL;
6172 
6173 	cik_rlc_stop(rdev);
6174 
6175 	/* disable CG */
6176 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6177 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6178 
6179 	si_rlc_reset(rdev);
6180 
6181 	cik_init_pg(rdev);
6182 
6183 	cik_init_cg(rdev);
6184 
6185 	WREG32(RLC_LB_CNTR_INIT, 0);
6186 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6187 
6188 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6189 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6190 	WREG32(RLC_LB_PARAMS, 0x00600408);
6191 	WREG32(RLC_LB_CNTL, 0x80000004);
6192 
6193 	WREG32(RLC_MC_CNTL, 0);
6194 	WREG32(RLC_UCODE_CNTL, 0);
6195 
6196 	if (rdev->new_fw) {
6197 		const struct rlc_firmware_header_v1_0 *hdr =
6198 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6199 		const __le32 *fw_data = (const __le32 *)
6200 			((const char *)rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6201 
6202 		radeon_ucode_print_rlc_hdr(&hdr->header);
6203 
6204 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6205 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6206 		for (i = 0; i < size; i++)
6207 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6208 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6209 	} else {
6210 		const __be32 *fw_data;
6211 
6212 		switch (rdev->family) {
6213 		case CHIP_BONAIRE:
6214 		case CHIP_HAWAII:
6215 		default:
6216 			size = BONAIRE_RLC_UCODE_SIZE;
6217 			break;
6218 		case CHIP_KAVERI:
6219 			size = KV_RLC_UCODE_SIZE;
6220 			break;
6221 		case CHIP_KABINI:
6222 			size = KB_RLC_UCODE_SIZE;
6223 			break;
6224 		case CHIP_MULLINS:
6225 			size = ML_RLC_UCODE_SIZE;
6226 			break;
6227 		}
6228 
6229 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6230 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6231 		for (i = 0; i < size; i++)
6232 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6233 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6234 	}
6235 
6236 	/* XXX - find out what chips support lbpw */
6237 	cik_enable_lbpw(rdev, false);
6238 
6239 	if (rdev->family == CHIP_BONAIRE)
6240 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6241 
6242 	cik_rlc_start(rdev);
6243 
6244 	return 0;
6245 }
6246 
6247 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6248 {
6249 	u32 data, orig, tmp, tmp2;
6250 
6251 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6252 
6253 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6254 		cik_enable_gui_idle_interrupt(rdev, true);
6255 
6256 		tmp = cik_halt_rlc(rdev);
6257 
6258 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6259 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6260 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6261 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6262 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6263 
6264 		cik_update_rlc(rdev, tmp);
6265 
6266 		data |= CGCG_EN | CGLS_EN;
6267 	} else {
6268 		cik_enable_gui_idle_interrupt(rdev, false);
6269 
6270 		RREG32(CB_CGTT_SCLK_CTRL);
6271 		RREG32(CB_CGTT_SCLK_CTRL);
6272 		RREG32(CB_CGTT_SCLK_CTRL);
6273 		RREG32(CB_CGTT_SCLK_CTRL);
6274 
6275 		data &= ~(CGCG_EN | CGLS_EN);
6276 	}
6277 
6278 	if (orig != data)
6279 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6280 
6281 }
6282 
6283 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6284 {
6285 	u32 data, orig, tmp = 0;
6286 
6287 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6288 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6289 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6290 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6291 				data |= CP_MEM_LS_EN;
6292 				if (orig != data)
6293 					WREG32(CP_MEM_SLP_CNTL, data);
6294 			}
6295 		}
6296 
6297 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6298 		data &= 0xfffffffd;
6299 		if (orig != data)
6300 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6301 
6302 		tmp = cik_halt_rlc(rdev);
6303 
6304 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6305 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6306 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6307 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6308 		WREG32(RLC_SERDES_WR_CTRL, data);
6309 
6310 		cik_update_rlc(rdev, tmp);
6311 
6312 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6313 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6314 			data &= ~SM_MODE_MASK;
6315 			data |= SM_MODE(0x2);
6316 			data |= SM_MODE_ENABLE;
6317 			data &= ~CGTS_OVERRIDE;
6318 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6319 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6320 				data &= ~CGTS_LS_OVERRIDE;
6321 			data &= ~ON_MONITOR_ADD_MASK;
6322 			data |= ON_MONITOR_ADD_EN;
6323 			data |= ON_MONITOR_ADD(0x96);
6324 			if (orig != data)
6325 				WREG32(CGTS_SM_CTRL_REG, data);
6326 		}
6327 	} else {
6328 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6329 		data |= 0x00000002;
6330 		if (orig != data)
6331 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6332 
6333 		data = RREG32(RLC_MEM_SLP_CNTL);
6334 		if (data & RLC_MEM_LS_EN) {
6335 			data &= ~RLC_MEM_LS_EN;
6336 			WREG32(RLC_MEM_SLP_CNTL, data);
6337 		}
6338 
6339 		data = RREG32(CP_MEM_SLP_CNTL);
6340 		if (data & CP_MEM_LS_EN) {
6341 			data &= ~CP_MEM_LS_EN;
6342 			WREG32(CP_MEM_SLP_CNTL, data);
6343 		}
6344 
6345 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6346 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6347 		if (orig != data)
6348 			WREG32(CGTS_SM_CTRL_REG, data);
6349 
6350 		tmp = cik_halt_rlc(rdev);
6351 
6352 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6353 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6354 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6355 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6356 		WREG32(RLC_SERDES_WR_CTRL, data);
6357 
6358 		cik_update_rlc(rdev, tmp);
6359 	}
6360 }
6361 
6362 static const u32 mc_cg_registers[] =
6363 {
6364 	MC_HUB_MISC_HUB_CG,
6365 	MC_HUB_MISC_SIP_CG,
6366 	MC_HUB_MISC_VM_CG,
6367 	MC_XPB_CLK_GAT,
6368 	ATC_MISC_CG,
6369 	MC_CITF_MISC_WR_CG,
6370 	MC_CITF_MISC_RD_CG,
6371 	MC_CITF_MISC_VM_CG,
6372 	VM_L2_CG,
6373 };
6374 
6375 static void cik_enable_mc_ls(struct radeon_device *rdev,
6376 			     bool enable)
6377 {
6378 	int i;
6379 	u32 orig, data;
6380 
6381 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6382 		orig = data = RREG32(mc_cg_registers[i]);
6383 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6384 			data |= MC_LS_ENABLE;
6385 		else
6386 			data &= ~MC_LS_ENABLE;
6387 		if (data != orig)
6388 			WREG32(mc_cg_registers[i], data);
6389 	}
6390 }
6391 
6392 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6393 			       bool enable)
6394 {
6395 	int i;
6396 	u32 orig, data;
6397 
6398 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6399 		orig = data = RREG32(mc_cg_registers[i]);
6400 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6401 			data |= MC_CG_ENABLE;
6402 		else
6403 			data &= ~MC_CG_ENABLE;
6404 		if (data != orig)
6405 			WREG32(mc_cg_registers[i], data);
6406 	}
6407 }
6408 
6409 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6410 				 bool enable)
6411 {
6412 	u32 orig, data;
6413 
6414 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6415 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6416 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6417 	} else {
6418 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6419 		data |= 0xff000000;
6420 		if (data != orig)
6421 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6422 
6423 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6424 		data |= 0xff000000;
6425 		if (data != orig)
6426 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6427 	}
6428 }
6429 
6430 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6431 				 bool enable)
6432 {
6433 	u32 orig, data;
6434 
6435 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6436 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6437 		data |= 0x100;
6438 		if (orig != data)
6439 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6440 
6441 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6442 		data |= 0x100;
6443 		if (orig != data)
6444 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6445 	} else {
6446 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6447 		data &= ~0x100;
6448 		if (orig != data)
6449 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6450 
6451 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6452 		data &= ~0x100;
6453 		if (orig != data)
6454 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6455 	}
6456 }
6457 
6458 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6459 				bool enable)
6460 {
6461 	u32 orig, data;
6462 
6463 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6464 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6465 		data = 0xfff;
6466 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6467 
6468 		orig = data = RREG32(UVD_CGC_CTRL);
6469 		data |= DCM;
6470 		if (orig != data)
6471 			WREG32(UVD_CGC_CTRL, data);
6472 	} else {
6473 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6474 		data &= ~0xfff;
6475 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6476 
6477 		orig = data = RREG32(UVD_CGC_CTRL);
6478 		data &= ~DCM;
6479 		if (orig != data)
6480 			WREG32(UVD_CGC_CTRL, data);
6481 	}
6482 }
6483 
6484 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6485 			       bool enable)
6486 {
6487 	u32 orig, data;
6488 
6489 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6490 
6491 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6492 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6493 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6494 	else
6495 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6496 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6497 
6498 	if (orig != data)
6499 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6500 }
6501 
6502 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6503 				bool enable)
6504 {
6505 	u32 orig, data;
6506 
6507 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6508 
6509 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6510 		data &= ~CLOCK_GATING_DIS;
6511 	else
6512 		data |= CLOCK_GATING_DIS;
6513 
6514 	if (orig != data)
6515 		WREG32(HDP_HOST_PATH_CNTL, data);
6516 }
6517 
6518 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6519 			      bool enable)
6520 {
6521 	u32 orig, data;
6522 
6523 	orig = data = RREG32(HDP_MEM_POWER_LS);
6524 
6525 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6526 		data |= HDP_LS_ENABLE;
6527 	else
6528 		data &= ~HDP_LS_ENABLE;
6529 
6530 	if (orig != data)
6531 		WREG32(HDP_MEM_POWER_LS, data);
6532 }
6533 
6534 void cik_update_cg(struct radeon_device *rdev,
6535 		   u32 block, bool enable)
6536 {
6537 
6538 	if (block & RADEON_CG_BLOCK_GFX) {
6539 		cik_enable_gui_idle_interrupt(rdev, false);
6540 		/* order matters! */
6541 		if (enable) {
6542 			cik_enable_mgcg(rdev, true);
6543 			cik_enable_cgcg(rdev, true);
6544 		} else {
6545 			cik_enable_cgcg(rdev, false);
6546 			cik_enable_mgcg(rdev, false);
6547 		}
6548 		cik_enable_gui_idle_interrupt(rdev, true);
6549 	}
6550 
6551 	if (block & RADEON_CG_BLOCK_MC) {
6552 		if (!(rdev->flags & RADEON_IS_IGP)) {
6553 			cik_enable_mc_mgcg(rdev, enable);
6554 			cik_enable_mc_ls(rdev, enable);
6555 		}
6556 	}
6557 
6558 	if (block & RADEON_CG_BLOCK_SDMA) {
6559 		cik_enable_sdma_mgcg(rdev, enable);
6560 		cik_enable_sdma_mgls(rdev, enable);
6561 	}
6562 
6563 	if (block & RADEON_CG_BLOCK_BIF) {
6564 		cik_enable_bif_mgls(rdev, enable);
6565 	}
6566 
6567 	if (block & RADEON_CG_BLOCK_UVD) {
6568 		if (rdev->has_uvd)
6569 			cik_enable_uvd_mgcg(rdev, enable);
6570 	}
6571 
6572 	if (block & RADEON_CG_BLOCK_HDP) {
6573 		cik_enable_hdp_mgcg(rdev, enable);
6574 		cik_enable_hdp_ls(rdev, enable);
6575 	}
6576 
6577 	if (block & RADEON_CG_BLOCK_VCE) {
6578 		vce_v2_0_enable_mgcg(rdev, enable);
6579 	}
6580 }
6581 
6582 static void cik_init_cg(struct radeon_device *rdev)
6583 {
6584 
6585 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6586 
6587 	if (rdev->has_uvd)
6588 		si_init_uvd_internal_cg(rdev);
6589 
6590 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6591 			     RADEON_CG_BLOCK_SDMA |
6592 			     RADEON_CG_BLOCK_BIF |
6593 			     RADEON_CG_BLOCK_UVD |
6594 			     RADEON_CG_BLOCK_HDP), true);
6595 }
6596 
6597 static void cik_fini_cg(struct radeon_device *rdev)
6598 {
6599 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6600 			     RADEON_CG_BLOCK_SDMA |
6601 			     RADEON_CG_BLOCK_BIF |
6602 			     RADEON_CG_BLOCK_UVD |
6603 			     RADEON_CG_BLOCK_HDP), false);
6604 
6605 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6606 }
6607 
6608 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6609 					  bool enable)
6610 {
6611 	u32 data, orig;
6612 
6613 	orig = data = RREG32(RLC_PG_CNTL);
6614 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6615 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6616 	else
6617 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6618 	if (orig != data)
6619 		WREG32(RLC_PG_CNTL, data);
6620 }
6621 
6622 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6623 					  bool enable)
6624 {
6625 	u32 data, orig;
6626 
6627 	orig = data = RREG32(RLC_PG_CNTL);
6628 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6629 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6630 	else
6631 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6632 	if (orig != data)
6633 		WREG32(RLC_PG_CNTL, data);
6634 }
6635 
6636 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6637 {
6638 	u32 data, orig;
6639 
6640 	orig = data = RREG32(RLC_PG_CNTL);
6641 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6642 		data &= ~DISABLE_CP_PG;
6643 	else
6644 		data |= DISABLE_CP_PG;
6645 	if (orig != data)
6646 		WREG32(RLC_PG_CNTL, data);
6647 }
6648 
6649 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6650 {
6651 	u32 data, orig;
6652 
6653 	orig = data = RREG32(RLC_PG_CNTL);
6654 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6655 		data &= ~DISABLE_GDS_PG;
6656 	else
6657 		data |= DISABLE_GDS_PG;
6658 	if (orig != data)
6659 		WREG32(RLC_PG_CNTL, data);
6660 }
6661 
6662 #define CP_ME_TABLE_SIZE    96
6663 #define CP_ME_TABLE_OFFSET  2048
6664 #define CP_MEC_TABLE_OFFSET 4096
6665 
6666 void cik_init_cp_pg_table(struct radeon_device *rdev)
6667 {
6668 	volatile u32 *dst_ptr;
6669 	int me, i, max_me = 4;
6670 	u32 bo_offset = 0;
6671 	u32 table_offset, table_size;
6672 
6673 	if (rdev->family == CHIP_KAVERI)
6674 		max_me = 5;
6675 
6676 	if (rdev->rlc.cp_table_ptr == NULL)
6677 		return;
6678 
6679 	/* write the cp table buffer */
6680 	dst_ptr = rdev->rlc.cp_table_ptr;
6681 	for (me = 0; me < max_me; me++) {
6682 		if (rdev->new_fw) {
6683 			const __le32 *fw_data;
6684 			const struct gfx_firmware_header_v1_0 *hdr;
6685 
6686 			if (me == 0) {
6687 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6688 				fw_data = (const __le32 *)
6689 					((const char *)rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6690 				table_offset = le32_to_cpu(hdr->jt_offset);
6691 				table_size = le32_to_cpu(hdr->jt_size);
6692 			} else if (me == 1) {
6693 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6694 				fw_data = (const __le32 *)
6695 					((const char *)rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6696 				table_offset = le32_to_cpu(hdr->jt_offset);
6697 				table_size = le32_to_cpu(hdr->jt_size);
6698 			} else if (me == 2) {
6699 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6700 				fw_data = (const __le32 *)
6701 					((const char *)rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6702 				table_offset = le32_to_cpu(hdr->jt_offset);
6703 				table_size = le32_to_cpu(hdr->jt_size);
6704 			} else if (me == 3) {
6705 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6706 				fw_data = (const __le32 *)
6707 					((const char *)rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6708 				table_offset = le32_to_cpu(hdr->jt_offset);
6709 				table_size = le32_to_cpu(hdr->jt_size);
6710 			} else {
6711 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6712 				fw_data = (const __le32 *)
6713 					((const char *)rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6714 				table_offset = le32_to_cpu(hdr->jt_offset);
6715 				table_size = le32_to_cpu(hdr->jt_size);
6716 			}
6717 
6718 			for (i = 0; i < table_size; i ++) {
6719 				dst_ptr[bo_offset + i] =
6720 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6721 			}
6722 			bo_offset += table_size;
6723 		} else {
6724 			const __be32 *fw_data;
6725 			table_size = CP_ME_TABLE_SIZE;
6726 
6727 			if (me == 0) {
6728 				fw_data = (const __be32 *)rdev->ce_fw->data;
6729 				table_offset = CP_ME_TABLE_OFFSET;
6730 			} else if (me == 1) {
6731 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6732 				table_offset = CP_ME_TABLE_OFFSET;
6733 			} else if (me == 2) {
6734 				fw_data = (const __be32 *)rdev->me_fw->data;
6735 				table_offset = CP_ME_TABLE_OFFSET;
6736 			} else {
6737 				fw_data = (const __be32 *)rdev->mec_fw->data;
6738 				table_offset = CP_MEC_TABLE_OFFSET;
6739 			}
6740 
6741 			for (i = 0; i < table_size; i ++) {
6742 				dst_ptr[bo_offset + i] =
6743 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6744 			}
6745 			bo_offset += table_size;
6746 		}
6747 	}
6748 }
6749 
6750 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6751 				bool enable)
6752 {
6753 	u32 data, orig;
6754 
6755 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6756 		orig = data = RREG32(RLC_PG_CNTL);
6757 		data |= GFX_PG_ENABLE;
6758 		if (orig != data)
6759 			WREG32(RLC_PG_CNTL, data);
6760 
6761 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6762 		data |= AUTO_PG_EN;
6763 		if (orig != data)
6764 			WREG32(RLC_AUTO_PG_CTRL, data);
6765 	} else {
6766 		orig = data = RREG32(RLC_PG_CNTL);
6767 		data &= ~GFX_PG_ENABLE;
6768 		if (orig != data)
6769 			WREG32(RLC_PG_CNTL, data);
6770 
6771 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6772 		data &= ~AUTO_PG_EN;
6773 		if (orig != data)
6774 			WREG32(RLC_AUTO_PG_CTRL, data);
6775 
6776 		data = RREG32(DB_RENDER_CONTROL);
6777 	}
6778 }
6779 
6780 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6781 {
6782 	u32 mask = 0, tmp, tmp1;
6783 	int i;
6784 
6785 	cik_select_se_sh(rdev, se, sh);
6786 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6787 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6788 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6789 
6790 	tmp &= 0xffff0000;
6791 
6792 	tmp |= tmp1;
6793 	tmp >>= 16;
6794 
6795 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6796 		mask <<= 1;
6797 		mask |= 1;
6798 	}
6799 
6800 	return (~tmp) & mask;
6801 }
6802 
6803 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6804 {
6805 	u32 i, j, k, active_cu_number = 0;
6806 	u32 mask, counter, cu_bitmap;
6807 	u32 tmp = 0;
6808 
6809 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6810 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6811 			mask = 1;
6812 			cu_bitmap = 0;
6813 			counter = 0;
6814 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6815 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6816 					if (counter < 2)
6817 						cu_bitmap |= mask;
6818 					counter ++;
6819 				}
6820 				mask <<= 1;
6821 			}
6822 
6823 			active_cu_number += counter;
6824 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6825 		}
6826 	}
6827 
6828 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6829 
6830 	tmp = RREG32(RLC_MAX_PG_CU);
6831 	tmp &= ~MAX_PU_CU_MASK;
6832 	tmp |= MAX_PU_CU(active_cu_number);
6833 	WREG32(RLC_MAX_PG_CU, tmp);
6834 }
6835 
6836 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6837 				       bool enable)
6838 {
6839 	u32 data, orig;
6840 
6841 	orig = data = RREG32(RLC_PG_CNTL);
6842 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6843 		data |= STATIC_PER_CU_PG_ENABLE;
6844 	else
6845 		data &= ~STATIC_PER_CU_PG_ENABLE;
6846 	if (orig != data)
6847 		WREG32(RLC_PG_CNTL, data);
6848 }
6849 
6850 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6851 					bool enable)
6852 {
6853 	u32 data, orig;
6854 
6855 	orig = data = RREG32(RLC_PG_CNTL);
6856 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6857 		data |= DYN_PER_CU_PG_ENABLE;
6858 	else
6859 		data &= ~DYN_PER_CU_PG_ENABLE;
6860 	if (orig != data)
6861 		WREG32(RLC_PG_CNTL, data);
6862 }
6863 
6864 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6865 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6866 
6867 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6868 {
6869 	u32 data, orig;
6870 	u32 i;
6871 
6872 	if (rdev->rlc.cs_data) {
6873 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6874 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6875 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6876 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6877 	} else {
6878 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6879 		for (i = 0; i < 3; i++)
6880 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6881 	}
6882 	if (rdev->rlc.reg_list) {
6883 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6884 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6885 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6886 	}
6887 
6888 	orig = data = RREG32(RLC_PG_CNTL);
6889 	data |= GFX_PG_SRC;
6890 	if (orig != data)
6891 		WREG32(RLC_PG_CNTL, data);
6892 
6893 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6894 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6895 
6896 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6897 	data &= ~IDLE_POLL_COUNT_MASK;
6898 	data |= IDLE_POLL_COUNT(0x60);
6899 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6900 
6901 	data = 0x10101010;
6902 	WREG32(RLC_PG_DELAY, data);
6903 
6904 	data = RREG32(RLC_PG_DELAY_2);
6905 	data &= ~0xff;
6906 	data |= 0x3;
6907 	WREG32(RLC_PG_DELAY_2, data);
6908 
6909 	data = RREG32(RLC_AUTO_PG_CTRL);
6910 	data &= ~GRBM_REG_SGIT_MASK;
6911 	data |= GRBM_REG_SGIT(0x700);
6912 	WREG32(RLC_AUTO_PG_CTRL, data);
6913 
6914 }
6915 
6916 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6917 {
6918 	cik_enable_gfx_cgpg(rdev, enable);
6919 	cik_enable_gfx_static_mgpg(rdev, enable);
6920 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6921 }
6922 
6923 u32 cik_get_csb_size(struct radeon_device *rdev)
6924 {
6925 	u32 count = 0;
6926 	const struct cs_section_def *sect = NULL;
6927 	const struct cs_extent_def *ext = NULL;
6928 
6929 	if (rdev->rlc.cs_data == NULL)
6930 		return 0;
6931 
6932 	/* begin clear state */
6933 	count += 2;
6934 	/* context control state */
6935 	count += 3;
6936 
6937 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6938 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6939 			if (sect->id == SECT_CONTEXT)
6940 				count += 2 + ext->reg_count;
6941 			else
6942 				return 0;
6943 		}
6944 	}
6945 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6946 	count += 4;
6947 	/* end clear state */
6948 	count += 2;
6949 	/* clear state */
6950 	count += 2;
6951 
6952 	return count;
6953 }
6954 
6955 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6956 {
6957 	u32 count = 0, i;
6958 	const struct cs_section_def *sect = NULL;
6959 	const struct cs_extent_def *ext = NULL;
6960 
6961 	if (rdev->rlc.cs_data == NULL)
6962 		return;
6963 	if (buffer == NULL)
6964 		return;
6965 
6966 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6967 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6968 
6969 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6970 	buffer[count++] = cpu_to_le32(0x80000000);
6971 	buffer[count++] = cpu_to_le32(0x80000000);
6972 
6973 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6974 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6975 			if (sect->id == SECT_CONTEXT) {
6976 				buffer[count++] =
6977 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6978 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6979 				for (i = 0; i < ext->reg_count; i++)
6980 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6981 			} else {
6982 				return;
6983 			}
6984 		}
6985 	}
6986 
6987 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6988 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6989 	switch (rdev->family) {
6990 	case CHIP_BONAIRE:
6991 		buffer[count++] = cpu_to_le32(0x16000012);
6992 		buffer[count++] = cpu_to_le32(0x00000000);
6993 		break;
6994 	case CHIP_KAVERI:
6995 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6996 		buffer[count++] = cpu_to_le32(0x00000000);
6997 		break;
6998 	case CHIP_KABINI:
6999 	case CHIP_MULLINS:
7000 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7001 		buffer[count++] = cpu_to_le32(0x00000000);
7002 		break;
7003 	case CHIP_HAWAII:
7004 		buffer[count++] = cpu_to_le32(0x3a00161a);
7005 		buffer[count++] = cpu_to_le32(0x0000002e);
7006 		break;
7007 	default:
7008 		buffer[count++] = cpu_to_le32(0x00000000);
7009 		buffer[count++] = cpu_to_le32(0x00000000);
7010 		break;
7011 	}
7012 
7013 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7014 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7015 
7016 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7017 	buffer[count++] = cpu_to_le32(0);
7018 }
7019 
7020 static void cik_init_pg(struct radeon_device *rdev)
7021 {
7022 	if (rdev->pg_flags) {
7023 		cik_enable_sck_slowdown_on_pu(rdev, true);
7024 		cik_enable_sck_slowdown_on_pd(rdev, true);
7025 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7026 			cik_init_gfx_cgpg(rdev);
7027 			cik_enable_cp_pg(rdev, true);
7028 			cik_enable_gds_pg(rdev, true);
7029 		}
7030 		cik_init_ao_cu_mask(rdev);
7031 		cik_update_gfx_pg(rdev, true);
7032 	}
7033 }
7034 
7035 static void cik_fini_pg(struct radeon_device *rdev)
7036 {
7037 	if (rdev->pg_flags) {
7038 		cik_update_gfx_pg(rdev, false);
7039 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7040 			cik_enable_cp_pg(rdev, false);
7041 			cik_enable_gds_pg(rdev, false);
7042 		}
7043 	}
7044 }
7045 
7046 /*
7047  * Interrupts
7048  * Starting with r6xx, interrupts are handled via a ring buffer.
7049  * Ring buffers are areas of GPU accessible memory that the GPU
7050  * writes interrupt vectors into and the host reads vectors out of.
7051  * There is a rptr (read pointer) that determines where the
7052  * host is currently reading, and a wptr (write pointer)
7053  * which determines where the GPU has written.  When the
7054  * pointers are equal, the ring is idle.  When the GPU
7055  * writes vectors to the ring buffer, it increments the
7056  * wptr.  When there is an interrupt, the host then starts
7057  * fetching commands and processing them until the pointers are
7058  * equal again at which point it updates the rptr.
7059  */
7060 
7061 /**
7062  * cik_enable_interrupts - Enable the interrupt ring buffer
7063  *
7064  * @rdev: radeon_device pointer
7065  *
7066  * Enable the interrupt ring buffer (CIK).
7067  */
7068 static void cik_enable_interrupts(struct radeon_device *rdev)
7069 {
7070 	u32 ih_cntl = RREG32(IH_CNTL);
7071 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7072 
7073 	ih_cntl |= ENABLE_INTR;
7074 	ih_rb_cntl |= IH_RB_ENABLE;
7075 	WREG32(IH_CNTL, ih_cntl);
7076 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7077 	rdev->ih.enabled = true;
7078 }
7079 
7080 /**
7081  * cik_disable_interrupts - Disable the interrupt ring buffer
7082  *
7083  * @rdev: radeon_device pointer
7084  *
7085  * Disable the interrupt ring buffer (CIK).
7086  */
7087 static void cik_disable_interrupts(struct radeon_device *rdev)
7088 {
7089 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7090 	u32 ih_cntl = RREG32(IH_CNTL);
7091 
7092 	ih_rb_cntl &= ~IH_RB_ENABLE;
7093 	ih_cntl &= ~ENABLE_INTR;
7094 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7095 	WREG32(IH_CNTL, ih_cntl);
7096 	/* set rptr, wptr to 0 */
7097 	WREG32(IH_RB_RPTR, 0);
7098 	WREG32(IH_RB_WPTR, 0);
7099 	rdev->ih.enabled = false;
7100 	rdev->ih.rptr = 0;
7101 }
7102 
7103 /**
7104  * cik_disable_interrupt_state - Disable all interrupt sources
7105  *
7106  * @rdev: radeon_device pointer
7107  *
7108  * Clear all interrupt enable bits used by the driver (CIK).
7109  */
7110 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7111 {
7112 	u32 tmp;
7113 
7114 	/* gfx ring */
7115 	tmp = RREG32(CP_INT_CNTL_RING0) &
7116 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7117 	WREG32(CP_INT_CNTL_RING0, tmp);
7118 	/* sdma */
7119 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7120 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7121 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7122 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7123 	/* compute queues */
7124 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7125 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7126 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7127 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7128 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7129 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7130 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7131 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7132 	/* grbm */
7133 	WREG32(GRBM_INT_CNTL, 0);
7134 	/* vline/vblank, etc. */
7135 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7136 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7137 	if (rdev->num_crtc >= 4) {
7138 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7139 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7140 	}
7141 	if (rdev->num_crtc >= 6) {
7142 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7143 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7144 	}
7145 	/* pflip */
7146 	if (rdev->num_crtc >= 2) {
7147 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7148 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7149 	}
7150 	if (rdev->num_crtc >= 4) {
7151 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7152 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7153 	}
7154 	if (rdev->num_crtc >= 6) {
7155 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7156 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7157 	}
7158 
7159 	/* dac hotplug */
7160 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7161 
7162 	/* digital hotplug */
7163 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7164 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7165 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7166 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7167 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7168 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7169 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7170 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7171 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7172 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7173 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7174 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7175 
7176 }
7177 
7178 /**
7179  * cik_irq_init - init and enable the interrupt ring
7180  *
7181  * @rdev: radeon_device pointer
7182  *
7183  * Allocate a ring buffer for the interrupt controller,
7184  * enable the RLC, disable interrupts, enable the IH
7185  * ring buffer and enable it (CIK).
7186  * Called at device load and reume.
7187  * Returns 0 for success, errors for failure.
7188  */
7189 static int cik_irq_init(struct radeon_device *rdev)
7190 {
7191 	int ret = 0;
7192 	int rb_bufsz;
7193 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7194 
7195 	/* allocate ring */
7196 	ret = r600_ih_ring_alloc(rdev);
7197 	if (ret)
7198 		return ret;
7199 
7200 	/* disable irqs */
7201 	cik_disable_interrupts(rdev);
7202 
7203 	/* init rlc */
7204 	ret = cik_rlc_resume(rdev);
7205 	if (ret) {
7206 		r600_ih_ring_fini(rdev);
7207 		return ret;
7208 	}
7209 
7210 	/* setup interrupt control */
7211 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7212 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7213 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7214 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7215 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7216 	 */
7217 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7218 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7219 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7220 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7221 
7222 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7223 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7224 
7225 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7226 		      IH_WPTR_OVERFLOW_CLEAR |
7227 		      (rb_bufsz << 1));
7228 
7229 	if (rdev->wb.enabled)
7230 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7231 
7232 	/* set the writeback address whether it's enabled or not */
7233 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7234 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7235 
7236 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7237 
7238 	/* set rptr, wptr to 0 */
7239 	WREG32(IH_RB_RPTR, 0);
7240 	WREG32(IH_RB_WPTR, 0);
7241 
7242 	/* Default settings for IH_CNTL (disabled at first) */
7243 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7244 	/* RPTR_REARM only works if msi's are enabled */
7245 	if (rdev->msi_enabled)
7246 		ih_cntl |= RPTR_REARM;
7247 	WREG32(IH_CNTL, ih_cntl);
7248 
7249 	/* force the active interrupt state to all disabled */
7250 	cik_disable_interrupt_state(rdev);
7251 
7252 	pci_enable_busmaster(rdev->pdev->dev);
7253 
7254 	/* enable irqs */
7255 	cik_enable_interrupts(rdev);
7256 
7257 	return ret;
7258 }
7259 
7260 /**
7261  * cik_irq_set - enable/disable interrupt sources
7262  *
7263  * @rdev: radeon_device pointer
7264  *
7265  * Enable interrupt sources on the GPU (vblanks, hpd,
7266  * etc.) (CIK).
7267  * Returns 0 for success, errors for failure.
7268  */
7269 int cik_irq_set(struct radeon_device *rdev)
7270 {
7271 	u32 cp_int_cntl;
7272 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7273 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7274 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7275 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7276 	u32 grbm_int_cntl = 0;
7277 	u32 dma_cntl, dma_cntl1;
7278 	u32 thermal_int;
7279 
7280 	if (!rdev->irq.installed) {
7281 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7282 		return -EINVAL;
7283 	}
7284 	/* don't enable anything if the ih is disabled */
7285 	if (!rdev->ih.enabled) {
7286 		cik_disable_interrupts(rdev);
7287 		/* force the active interrupt state to all disabled */
7288 		cik_disable_interrupt_state(rdev);
7289 		return 0;
7290 	}
7291 
7292 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7293 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7294 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7295 
7296 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7297 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7298 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7299 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7300 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7301 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7302 
7303 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7304 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7305 
7306 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7307 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7308 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7309 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7310 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7311 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7312 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7313 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7314 
7315 	if (rdev->flags & RADEON_IS_IGP)
7316 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7317 			~(THERM_INTH_MASK | THERM_INTL_MASK);
7318 	else
7319 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7320 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7321 
7322 	/* enable CP interrupts on all rings */
7323 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7324 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7325 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7326 	}
7327 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7328 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7329 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7330 		if (ring->me == 1) {
7331 			switch (ring->pipe) {
7332 			case 0:
7333 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7334 				break;
7335 			case 1:
7336 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7337 				break;
7338 			case 2:
7339 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7340 				break;
7341 			case 3:
7342 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7343 				break;
7344 			default:
7345 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7346 				break;
7347 			}
7348 		} else if (ring->me == 2) {
7349 			switch (ring->pipe) {
7350 			case 0:
7351 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7352 				break;
7353 			case 1:
7354 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7355 				break;
7356 			case 2:
7357 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7358 				break;
7359 			case 3:
7360 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7361 				break;
7362 			default:
7363 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7364 				break;
7365 			}
7366 		} else {
7367 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7368 		}
7369 	}
7370 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7371 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7372 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7373 		if (ring->me == 1) {
7374 			switch (ring->pipe) {
7375 			case 0:
7376 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7377 				break;
7378 			case 1:
7379 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7380 				break;
7381 			case 2:
7382 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7383 				break;
7384 			case 3:
7385 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7386 				break;
7387 			default:
7388 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7389 				break;
7390 			}
7391 		} else if (ring->me == 2) {
7392 			switch (ring->pipe) {
7393 			case 0:
7394 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7395 				break;
7396 			case 1:
7397 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7398 				break;
7399 			case 2:
7400 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7401 				break;
7402 			case 3:
7403 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7404 				break;
7405 			default:
7406 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7407 				break;
7408 			}
7409 		} else {
7410 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7411 		}
7412 	}
7413 
7414 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7415 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7416 		dma_cntl |= TRAP_ENABLE;
7417 	}
7418 
7419 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7420 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7421 		dma_cntl1 |= TRAP_ENABLE;
7422 	}
7423 
7424 	if (rdev->irq.crtc_vblank_int[0] ||
7425 	    atomic_read(&rdev->irq.pflip[0])) {
7426 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7427 		crtc1 |= VBLANK_INTERRUPT_MASK;
7428 	}
7429 	if (rdev->irq.crtc_vblank_int[1] ||
7430 	    atomic_read(&rdev->irq.pflip[1])) {
7431 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7432 		crtc2 |= VBLANK_INTERRUPT_MASK;
7433 	}
7434 	if (rdev->irq.crtc_vblank_int[2] ||
7435 	    atomic_read(&rdev->irq.pflip[2])) {
7436 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7437 		crtc3 |= VBLANK_INTERRUPT_MASK;
7438 	}
7439 	if (rdev->irq.crtc_vblank_int[3] ||
7440 	    atomic_read(&rdev->irq.pflip[3])) {
7441 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7442 		crtc4 |= VBLANK_INTERRUPT_MASK;
7443 	}
7444 	if (rdev->irq.crtc_vblank_int[4] ||
7445 	    atomic_read(&rdev->irq.pflip[4])) {
7446 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7447 		crtc5 |= VBLANK_INTERRUPT_MASK;
7448 	}
7449 	if (rdev->irq.crtc_vblank_int[5] ||
7450 	    atomic_read(&rdev->irq.pflip[5])) {
7451 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7452 		crtc6 |= VBLANK_INTERRUPT_MASK;
7453 	}
7454 	if (rdev->irq.hpd[0]) {
7455 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7456 		hpd1 |= DC_HPDx_INT_EN;
7457 	}
7458 	if (rdev->irq.hpd[1]) {
7459 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7460 		hpd2 |= DC_HPDx_INT_EN;
7461 	}
7462 	if (rdev->irq.hpd[2]) {
7463 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7464 		hpd3 |= DC_HPDx_INT_EN;
7465 	}
7466 	if (rdev->irq.hpd[3]) {
7467 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7468 		hpd4 |= DC_HPDx_INT_EN;
7469 	}
7470 	if (rdev->irq.hpd[4]) {
7471 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7472 		hpd5 |= DC_HPDx_INT_EN;
7473 	}
7474 	if (rdev->irq.hpd[5]) {
7475 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7476 		hpd6 |= DC_HPDx_INT_EN;
7477 	}
7478 
7479 	if (rdev->irq.dpm_thermal) {
7480 		DRM_DEBUG("dpm thermal\n");
7481 		if (rdev->flags & RADEON_IS_IGP)
7482 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7483 		else
7484 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7485 	}
7486 
7487 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7488 
7489 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7490 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7491 
7492 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7493 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7494 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7495 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7496 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7497 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7498 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7499 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7500 
7501 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7502 
7503 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7504 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7505 	if (rdev->num_crtc >= 4) {
7506 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7507 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7508 	}
7509 	if (rdev->num_crtc >= 6) {
7510 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7511 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7512 	}
7513 
7514 	if (rdev->num_crtc >= 2) {
7515 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7516 		       GRPH_PFLIP_INT_MASK);
7517 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7518 		       GRPH_PFLIP_INT_MASK);
7519 	}
7520 	if (rdev->num_crtc >= 4) {
7521 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7522 		       GRPH_PFLIP_INT_MASK);
7523 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7524 		       GRPH_PFLIP_INT_MASK);
7525 	}
7526 	if (rdev->num_crtc >= 6) {
7527 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7528 		       GRPH_PFLIP_INT_MASK);
7529 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7530 		       GRPH_PFLIP_INT_MASK);
7531 	}
7532 
7533 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7534 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7535 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7536 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7537 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7538 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7539 
7540 	if (rdev->flags & RADEON_IS_IGP)
7541 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7542 	else
7543 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7544 
7545 	return 0;
7546 }
7547 
7548 /**
7549  * cik_irq_ack - ack interrupt sources
7550  *
7551  * @rdev: radeon_device pointer
7552  *
7553  * Ack interrupt sources on the GPU (vblanks, hpd,
7554  * etc.) (CIK).  Certain interrupts sources are sw
7555  * generated and do not require an explicit ack.
7556  */
7557 static inline void cik_irq_ack(struct radeon_device *rdev)
7558 {
7559 	u32 tmp;
7560 
7561 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7562 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7563 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7564 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7565 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7566 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7567 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7568 
7569 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7570 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7571 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7572 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7573 	if (rdev->num_crtc >= 4) {
7574 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7575 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7576 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7577 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7578 	}
7579 	if (rdev->num_crtc >= 6) {
7580 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7581 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7582 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7583 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7584 	}
7585 
7586 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7587 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7588 		       GRPH_PFLIP_INT_CLEAR);
7589 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7590 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7591 		       GRPH_PFLIP_INT_CLEAR);
7592 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7593 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7594 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7595 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7596 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7597 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7598 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7599 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7600 
7601 	if (rdev->num_crtc >= 4) {
7602 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7603 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7604 			       GRPH_PFLIP_INT_CLEAR);
7605 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7606 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7607 			       GRPH_PFLIP_INT_CLEAR);
7608 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7609 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7610 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7611 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7612 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7613 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7614 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7615 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7616 	}
7617 
7618 	if (rdev->num_crtc >= 6) {
7619 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7620 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7621 			       GRPH_PFLIP_INT_CLEAR);
7622 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7623 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7624 			       GRPH_PFLIP_INT_CLEAR);
7625 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7626 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7627 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7628 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7629 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7630 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7631 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7632 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7633 	}
7634 
7635 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7636 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7637 		tmp |= DC_HPDx_INT_ACK;
7638 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7639 	}
7640 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7641 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7642 		tmp |= DC_HPDx_INT_ACK;
7643 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7644 	}
7645 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7646 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7647 		tmp |= DC_HPDx_INT_ACK;
7648 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7649 	}
7650 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7651 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7652 		tmp |= DC_HPDx_INT_ACK;
7653 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7654 	}
7655 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7656 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7657 		tmp |= DC_HPDx_INT_ACK;
7658 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7659 	}
7660 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7661 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7662 		tmp |= DC_HPDx_INT_ACK;
7663 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7664 	}
7665 }
7666 
7667 /**
7668  * cik_irq_disable - disable interrupts
7669  *
7670  * @rdev: radeon_device pointer
7671  *
7672  * Disable interrupts on the hw (CIK).
7673  */
7674 static void cik_irq_disable(struct radeon_device *rdev)
7675 {
7676 	cik_disable_interrupts(rdev);
7677 	/* Wait and acknowledge irq */
7678 	mdelay(1);
7679 	cik_irq_ack(rdev);
7680 	cik_disable_interrupt_state(rdev);
7681 }
7682 
7683 /**
7684  * cik_irq_disable - disable interrupts for suspend
7685  *
7686  * @rdev: radeon_device pointer
7687  *
7688  * Disable interrupts and stop the RLC (CIK).
7689  * Used for suspend.
7690  */
7691 static void cik_irq_suspend(struct radeon_device *rdev)
7692 {
7693 	cik_irq_disable(rdev);
7694 	cik_rlc_stop(rdev);
7695 }
7696 
7697 /**
7698  * cik_irq_fini - tear down interrupt support
7699  *
7700  * @rdev: radeon_device pointer
7701  *
7702  * Disable interrupts on the hw and free the IH ring
7703  * buffer (CIK).
7704  * Used for driver unload.
7705  */
7706 static void cik_irq_fini(struct radeon_device *rdev)
7707 {
7708 	cik_irq_suspend(rdev);
7709 	r600_ih_ring_fini(rdev);
7710 }
7711 
7712 /**
7713  * cik_get_ih_wptr - get the IH ring buffer wptr
7714  *
7715  * @rdev: radeon_device pointer
7716  *
7717  * Get the IH ring buffer wptr from either the register
7718  * or the writeback memory buffer (CIK).  Also check for
7719  * ring buffer overflow and deal with it.
7720  * Used by cik_irq_process().
7721  * Returns the value of the wptr.
7722  */
7723 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7724 {
7725 	u32 wptr, tmp;
7726 
7727 	if (rdev->wb.enabled)
7728 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7729 	else
7730 		wptr = RREG32(IH_RB_WPTR);
7731 
7732 	if (wptr & RB_OVERFLOW) {
7733 		wptr &= ~RB_OVERFLOW;
7734 		/* When a ring buffer overflow happen start parsing interrupt
7735 		 * from the last not overwritten vector (wptr + 16). Hopefully
7736 		 * this should allow us to catchup.
7737 		 */
7738 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7739 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7740 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7741 		tmp = RREG32(IH_RB_CNTL);
7742 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7743 		WREG32(IH_RB_CNTL, tmp);
7744 	}
7745 	return (wptr & rdev->ih.ptr_mask);
7746 }
7747 
7748 /*        CIK IV Ring
7749  * Each IV ring entry is 128 bits:
7750  * [7:0]    - interrupt source id
7751  * [31:8]   - reserved
7752  * [59:32]  - interrupt source data
7753  * [63:60]  - reserved
7754  * [71:64]  - RINGID
7755  *            CP:
7756  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7757  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7758  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7759  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7760  *            PIPE_ID - ME0 0=3D
7761  *                    - ME1&2 compute dispatcher (4 pipes each)
7762  *            SDMA:
7763  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7764  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7765  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7766  * [79:72]  - VMID
7767  * [95:80]  - PASID
7768  * [127:96] - reserved
7769  */
7770 /**
7771  * cik_irq_process - interrupt handler
7772  *
7773  * @rdev: radeon_device pointer
7774  *
7775  * Interrupt hander (CIK).  Walk the IH ring,
7776  * ack interrupts and schedule work to handle
7777  * interrupt events.
7778  * Returns irq process return code.
7779  */
7780 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7781 {
7782 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7783 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7784 	u32 wptr;
7785 	u32 rptr;
7786 	u32 src_id, src_data, ring_id;
7787 	u8 me_id, pipe_id, queue_id;
7788 	u32 ring_index;
7789 	bool queue_hotplug = false;
7790 	bool queue_reset = false;
7791 	u32 addr, status, mc_client;
7792 	bool queue_thermal = false;
7793 
7794 	if (!rdev->ih.enabled || rdev->shutdown)
7795 		return IRQ_NONE;
7796 
7797 	wptr = cik_get_ih_wptr(rdev);
7798 
7799 restart_ih:
7800 	/* is somebody else already processing irqs? */
7801 	if (atomic_xchg(&rdev->ih.lock, 1))
7802 		return IRQ_NONE;
7803 
7804 	rptr = rdev->ih.rptr;
7805 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7806 
7807 	/* Order reading of wptr vs. reading of IH ring data */
7808 	rmb();
7809 
7810 	/* display interrupts */
7811 	cik_irq_ack(rdev);
7812 
7813 	while (rptr != wptr) {
7814 		/* wptr/rptr are in bytes! */
7815 		ring_index = rptr / 4;
7816 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7817 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7818 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7819 
7820 		switch (src_id) {
7821 		case 1: /* D1 vblank/vline */
7822 			switch (src_data) {
7823 			case 0: /* D1 vblank */
7824 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7825 					if (rdev->irq.crtc_vblank_int[0]) {
7826 						drm_handle_vblank(rdev->ddev, 0);
7827 						rdev->pm.vblank_sync = true;
7828 						wake_up(&rdev->irq.vblank_queue);
7829 					}
7830 					if (atomic_read(&rdev->irq.pflip[0]))
7831 						radeon_crtc_handle_vblank(rdev, 0);
7832 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7833 					DRM_DEBUG("IH: D1 vblank\n");
7834 				}
7835 				break;
7836 			case 1: /* D1 vline */
7837 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7838 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7839 					DRM_DEBUG("IH: D1 vline\n");
7840 				}
7841 				break;
7842 			default:
7843 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7844 				break;
7845 			}
7846 			break;
7847 		case 2: /* D2 vblank/vline */
7848 			switch (src_data) {
7849 			case 0: /* D2 vblank */
7850 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7851 					if (rdev->irq.crtc_vblank_int[1]) {
7852 						drm_handle_vblank(rdev->ddev, 1);
7853 						rdev->pm.vblank_sync = true;
7854 						wake_up(&rdev->irq.vblank_queue);
7855 					}
7856 					if (atomic_read(&rdev->irq.pflip[1]))
7857 						radeon_crtc_handle_vblank(rdev, 1);
7858 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7859 					DRM_DEBUG("IH: D2 vblank\n");
7860 				}
7861 				break;
7862 			case 1: /* D2 vline */
7863 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7864 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7865 					DRM_DEBUG("IH: D2 vline\n");
7866 				}
7867 				break;
7868 			default:
7869 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7870 				break;
7871 			}
7872 			break;
7873 		case 3: /* D3 vblank/vline */
7874 			switch (src_data) {
7875 			case 0: /* D3 vblank */
7876 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7877 					if (rdev->irq.crtc_vblank_int[2]) {
7878 						drm_handle_vblank(rdev->ddev, 2);
7879 						rdev->pm.vblank_sync = true;
7880 						wake_up(&rdev->irq.vblank_queue);
7881 					}
7882 					if (atomic_read(&rdev->irq.pflip[2]))
7883 						radeon_crtc_handle_vblank(rdev, 2);
7884 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7885 					DRM_DEBUG("IH: D3 vblank\n");
7886 				}
7887 				break;
7888 			case 1: /* D3 vline */
7889 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7890 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7891 					DRM_DEBUG("IH: D3 vline\n");
7892 				}
7893 				break;
7894 			default:
7895 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7896 				break;
7897 			}
7898 			break;
7899 		case 4: /* D4 vblank/vline */
7900 			switch (src_data) {
7901 			case 0: /* D4 vblank */
7902 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7903 					if (rdev->irq.crtc_vblank_int[3]) {
7904 						drm_handle_vblank(rdev->ddev, 3);
7905 						rdev->pm.vblank_sync = true;
7906 						wake_up(&rdev->irq.vblank_queue);
7907 					}
7908 					if (atomic_read(&rdev->irq.pflip[3]))
7909 						radeon_crtc_handle_vblank(rdev, 3);
7910 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7911 					DRM_DEBUG("IH: D4 vblank\n");
7912 				}
7913 				break;
7914 			case 1: /* D4 vline */
7915 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7916 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7917 					DRM_DEBUG("IH: D4 vline\n");
7918 				}
7919 				break;
7920 			default:
7921 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7922 				break;
7923 			}
7924 			break;
7925 		case 5: /* D5 vblank/vline */
7926 			switch (src_data) {
7927 			case 0: /* D5 vblank */
7928 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7929 					if (rdev->irq.crtc_vblank_int[4]) {
7930 						drm_handle_vblank(rdev->ddev, 4);
7931 						rdev->pm.vblank_sync = true;
7932 						wake_up(&rdev->irq.vblank_queue);
7933 					}
7934 					if (atomic_read(&rdev->irq.pflip[4]))
7935 						radeon_crtc_handle_vblank(rdev, 4);
7936 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7937 					DRM_DEBUG("IH: D5 vblank\n");
7938 				}
7939 				break;
7940 			case 1: /* D5 vline */
7941 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7942 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7943 					DRM_DEBUG("IH: D5 vline\n");
7944 				}
7945 				break;
7946 			default:
7947 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7948 				break;
7949 			}
7950 			break;
7951 		case 6: /* D6 vblank/vline */
7952 			switch (src_data) {
7953 			case 0: /* D6 vblank */
7954 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7955 					if (rdev->irq.crtc_vblank_int[5]) {
7956 						drm_handle_vblank(rdev->ddev, 5);
7957 						rdev->pm.vblank_sync = true;
7958 						wake_up(&rdev->irq.vblank_queue);
7959 					}
7960 					if (atomic_read(&rdev->irq.pflip[5]))
7961 						radeon_crtc_handle_vblank(rdev, 5);
7962 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7963 					DRM_DEBUG("IH: D6 vblank\n");
7964 				}
7965 				break;
7966 			case 1: /* D6 vline */
7967 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7968 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7969 					DRM_DEBUG("IH: D6 vline\n");
7970 				}
7971 				break;
7972 			default:
7973 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7974 				break;
7975 			}
7976 			break;
7977 		case 8: /* D1 page flip */
7978 		case 10: /* D2 page flip */
7979 		case 12: /* D3 page flip */
7980 		case 14: /* D4 page flip */
7981 		case 16: /* D5 page flip */
7982 		case 18: /* D6 page flip */
7983 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7984 			if (radeon_use_pflipirq > 0)
7985 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7986 			break;
7987 		case 42: /* HPD hotplug */
7988 			switch (src_data) {
7989 			case 0:
7990 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7991 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7992 					queue_hotplug = true;
7993 					DRM_DEBUG("IH: HPD1\n");
7994 				}
7995 				break;
7996 			case 1:
7997 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7998 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7999 					queue_hotplug = true;
8000 					DRM_DEBUG("IH: HPD2\n");
8001 				}
8002 				break;
8003 			case 2:
8004 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8005 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8006 					queue_hotplug = true;
8007 					DRM_DEBUG("IH: HPD3\n");
8008 				}
8009 				break;
8010 			case 3:
8011 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8012 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8013 					queue_hotplug = true;
8014 					DRM_DEBUG("IH: HPD4\n");
8015 				}
8016 				break;
8017 			case 4:
8018 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8019 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8020 					queue_hotplug = true;
8021 					DRM_DEBUG("IH: HPD5\n");
8022 				}
8023 				break;
8024 			case 5:
8025 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8026 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8027 					queue_hotplug = true;
8028 					DRM_DEBUG("IH: HPD6\n");
8029 				}
8030 				break;
8031 			default:
8032 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8033 				break;
8034 			}
8035 			break;
8036 		case 124: /* UVD */
8037 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8038 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8039 			break;
8040 		case 146:
8041 		case 147:
8042 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8043 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8044 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8045 			/* reset addr and status */
8046 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8047 			if (addr == 0x0 && status == 0x0)
8048 				break;
8049 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8050 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8051 				addr);
8052 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8053 				status);
8054 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8055 			break;
8056 		case 167: /* VCE */
8057 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8058 			switch (src_data) {
8059 			case 0:
8060 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8061 				break;
8062 			case 1:
8063 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8064 				break;
8065 			default:
8066 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8067 				break;
8068 			}
8069 			break;
8070 		case 176: /* GFX RB CP_INT */
8071 		case 177: /* GFX IB CP_INT */
8072 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8073 			break;
8074 		case 181: /* CP EOP event */
8075 			DRM_DEBUG("IH: CP EOP\n");
8076 			/* XXX check the bitfield order! */
8077 			me_id = (ring_id & 0x60) >> 5;
8078 			pipe_id = (ring_id & 0x18) >> 3;
8079 			queue_id = (ring_id & 0x7) >> 0;
8080 			switch (me_id) {
8081 			case 0:
8082 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8083 				break;
8084 			case 1:
8085 			case 2:
8086 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8087 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8088 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8089 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8090 				break;
8091 			}
8092 			break;
8093 		case 184: /* CP Privileged reg access */
8094 			DRM_ERROR("Illegal register access in command stream\n");
8095 			/* XXX check the bitfield order! */
8096 			me_id = (ring_id & 0x60) >> 5;
8097 			pipe_id = (ring_id & 0x18) >> 3;
8098 			queue_id = (ring_id & 0x7) >> 0;
8099 			switch (me_id) {
8100 			case 0:
8101 				/* This results in a full GPU reset, but all we need to do is soft
8102 				 * reset the CP for gfx
8103 				 */
8104 				queue_reset = true;
8105 				break;
8106 			case 1:
8107 				/* XXX compute */
8108 				queue_reset = true;
8109 				break;
8110 			case 2:
8111 				/* XXX compute */
8112 				queue_reset = true;
8113 				break;
8114 			}
8115 			break;
8116 		case 185: /* CP Privileged inst */
8117 			DRM_ERROR("Illegal instruction in command stream\n");
8118 			/* XXX check the bitfield order! */
8119 			me_id = (ring_id & 0x60) >> 5;
8120 			pipe_id = (ring_id & 0x18) >> 3;
8121 			queue_id = (ring_id & 0x7) >> 0;
8122 			switch (me_id) {
8123 			case 0:
8124 				/* This results in a full GPU reset, but all we need to do is soft
8125 				 * reset the CP for gfx
8126 				 */
8127 				queue_reset = true;
8128 				break;
8129 			case 1:
8130 				/* XXX compute */
8131 				queue_reset = true;
8132 				break;
8133 			case 2:
8134 				/* XXX compute */
8135 				queue_reset = true;
8136 				break;
8137 			}
8138 			break;
8139 		case 224: /* SDMA trap event */
8140 			/* XXX check the bitfield order! */
8141 			me_id = (ring_id & 0x3) >> 0;
8142 			queue_id = (ring_id & 0xc) >> 2;
8143 			DRM_DEBUG("IH: SDMA trap\n");
8144 			switch (me_id) {
8145 			case 0:
8146 				switch (queue_id) {
8147 				case 0:
8148 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8149 					break;
8150 				case 1:
8151 					/* XXX compute */
8152 					break;
8153 				case 2:
8154 					/* XXX compute */
8155 					break;
8156 				}
8157 				break;
8158 			case 1:
8159 				switch (queue_id) {
8160 				case 0:
8161 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8162 					break;
8163 				case 1:
8164 					/* XXX compute */
8165 					break;
8166 				case 2:
8167 					/* XXX compute */
8168 					break;
8169 				}
8170 				break;
8171 			}
8172 			break;
8173 		case 230: /* thermal low to high */
8174 			DRM_DEBUG("IH: thermal low to high\n");
8175 			rdev->pm.dpm.thermal.high_to_low = false;
8176 			queue_thermal = true;
8177 			break;
8178 		case 231: /* thermal high to low */
8179 			DRM_DEBUG("IH: thermal high to low\n");
8180 			rdev->pm.dpm.thermal.high_to_low = true;
8181 			queue_thermal = true;
8182 			break;
8183 		case 233: /* GUI IDLE */
8184 			DRM_DEBUG("IH: GUI idle\n");
8185 			break;
8186 		case 241: /* SDMA Privileged inst */
8187 		case 247: /* SDMA Privileged inst */
8188 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8189 			/* XXX check the bitfield order! */
8190 			me_id = (ring_id & 0x3) >> 0;
8191 			queue_id = (ring_id & 0xc) >> 2;
8192 			switch (me_id) {
8193 			case 0:
8194 				switch (queue_id) {
8195 				case 0:
8196 					queue_reset = true;
8197 					break;
8198 				case 1:
8199 					/* XXX compute */
8200 					queue_reset = true;
8201 					break;
8202 				case 2:
8203 					/* XXX compute */
8204 					queue_reset = true;
8205 					break;
8206 				}
8207 				break;
8208 			case 1:
8209 				switch (queue_id) {
8210 				case 0:
8211 					queue_reset = true;
8212 					break;
8213 				case 1:
8214 					/* XXX compute */
8215 					queue_reset = true;
8216 					break;
8217 				case 2:
8218 					/* XXX compute */
8219 					queue_reset = true;
8220 					break;
8221 				}
8222 				break;
8223 			}
8224 			break;
8225 		default:
8226 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8227 			break;
8228 		}
8229 
8230 		/* wptr/rptr are in bytes! */
8231 		rptr += 16;
8232 		rptr &= rdev->ih.ptr_mask;
8233 		WREG32(IH_RB_RPTR, rptr);
8234 	}
8235 	if (queue_hotplug)
8236 		schedule_work(&rdev->hotplug_work);
8237 	if (queue_reset) {
8238 		rdev->needs_reset = true;
8239 		wake_up_all(&rdev->fence_queue);
8240 	}
8241 	if (queue_thermal)
8242 		taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
8243 	rdev->ih.rptr = rptr;
8244 	atomic_set(&rdev->ih.lock, 0);
8245 
8246 	/* make sure wptr hasn't changed while processing */
8247 	wptr = cik_get_ih_wptr(rdev);
8248 	if (wptr != rptr)
8249 		goto restart_ih;
8250 
8251 	return IRQ_HANDLED;
8252 }
8253 
8254 /*
8255  * startup/shutdown callbacks
8256  */
8257 /**
8258  * cik_startup - program the asic to a functional state
8259  *
8260  * @rdev: radeon_device pointer
8261  *
8262  * Programs the asic to a functional state (CIK).
8263  * Called by cik_init() and cik_resume().
8264  * Returns 0 for success, error for failure.
8265  */
8266 static int cik_startup(struct radeon_device *rdev)
8267 {
8268 	struct radeon_ring *ring;
8269 	u32 nop;
8270 	int r;
8271 
8272 	/* enable pcie gen2/3 link */
8273 	cik_pcie_gen3_enable(rdev);
8274 	/* enable aspm */
8275 	cik_program_aspm(rdev);
8276 
8277 	/* scratch needs to be initialized before MC */
8278 	r = r600_vram_scratch_init(rdev);
8279 	if (r)
8280 		return r;
8281 
8282 	cik_mc_program(rdev);
8283 
8284 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8285 		r = ci_mc_load_microcode(rdev);
8286 		if (r) {
8287 			DRM_ERROR("Failed to load MC firmware!\n");
8288 			return r;
8289 		}
8290 	}
8291 
8292 	r = cik_pcie_gart_enable(rdev);
8293 	if (r)
8294 		return r;
8295 	cik_gpu_init(rdev);
8296 
8297 	/* allocate rlc buffers */
8298 	if (rdev->flags & RADEON_IS_IGP) {
8299 		if (rdev->family == CHIP_KAVERI) {
8300 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8301 			rdev->rlc.reg_list_size =
8302 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8303 		} else {
8304 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8305 			rdev->rlc.reg_list_size =
8306 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8307 		}
8308 	}
8309 	rdev->rlc.cs_data = ci_cs_data;
8310 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8311 	r = sumo_rlc_init(rdev);
8312 	if (r) {
8313 		DRM_ERROR("Failed to init rlc BOs!\n");
8314 		return r;
8315 	}
8316 
8317 	/* allocate wb buffer */
8318 	r = radeon_wb_init(rdev);
8319 	if (r)
8320 		return r;
8321 
8322 	/* allocate mec buffers */
8323 	r = cik_mec_init(rdev);
8324 	if (r) {
8325 		DRM_ERROR("Failed to init MEC BOs!\n");
8326 		return r;
8327 	}
8328 
8329 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8330 	if (r) {
8331 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8332 		return r;
8333 	}
8334 
8335 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8336 	if (r) {
8337 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8338 		return r;
8339 	}
8340 
8341 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8342 	if (r) {
8343 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8344 		return r;
8345 	}
8346 
8347 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8348 	if (r) {
8349 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8350 		return r;
8351 	}
8352 
8353 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8354 	if (r) {
8355 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8356 		return r;
8357 	}
8358 
8359 	r = radeon_uvd_resume(rdev);
8360 	if (!r) {
8361 		r = uvd_v4_2_resume(rdev);
8362 		if (!r) {
8363 			r = radeon_fence_driver_start_ring(rdev,
8364 							   R600_RING_TYPE_UVD_INDEX);
8365 			if (r)
8366 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8367 		}
8368 	}
8369 	if (r)
8370 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8371 
8372 	r = radeon_vce_resume(rdev);
8373 	if (!r) {
8374 		r = vce_v2_0_resume(rdev);
8375 		if (!r)
8376 			r = radeon_fence_driver_start_ring(rdev,
8377 							   TN_RING_TYPE_VCE1_INDEX);
8378 		if (!r)
8379 			r = radeon_fence_driver_start_ring(rdev,
8380 							   TN_RING_TYPE_VCE2_INDEX);
8381 	}
8382 	if (r) {
8383 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8384 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8385 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8386 	}
8387 
8388 	/* Enable IRQ */
8389 	if (!rdev->irq.installed) {
8390 		r = radeon_irq_kms_init(rdev);
8391 		if (r)
8392 			return r;
8393 	}
8394 
8395 	r = cik_irq_init(rdev);
8396 	if (r) {
8397 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8398 		radeon_irq_kms_fini(rdev);
8399 		return r;
8400 	}
8401 	cik_irq_set(rdev);
8402 
8403 	if (rdev->family == CHIP_HAWAII) {
8404 		if (rdev->new_fw)
8405 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8406 		else
8407 			nop = RADEON_CP_PACKET2;
8408 	} else {
8409 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8410 	}
8411 
8412 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8413 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8414 			     nop);
8415 	if (r)
8416 		return r;
8417 
8418 	/* set up the compute queues */
8419 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8420 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8421 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8422 			     nop);
8423 	if (r)
8424 		return r;
8425 	ring->me = 1; /* first MEC */
8426 	ring->pipe = 0; /* first pipe */
8427 	ring->queue = 0; /* first queue */
8428 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8429 
8430 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8431 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8432 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8433 			     nop);
8434 	if (r)
8435 		return r;
8436 	/* dGPU only have 1 MEC */
8437 	ring->me = 1; /* first MEC */
8438 	ring->pipe = 0; /* first pipe */
8439 	ring->queue = 1; /* second queue */
8440 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8441 
8442 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8443 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8444 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8445 	if (r)
8446 		return r;
8447 
8448 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8449 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8450 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8451 	if (r)
8452 		return r;
8453 
8454 	r = cik_cp_resume(rdev);
8455 	if (r)
8456 		return r;
8457 
8458 	r = cik_sdma_resume(rdev);
8459 	if (r)
8460 		return r;
8461 
8462 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8463 	if (ring->ring_size) {
8464 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8465 				     RADEON_CP_PACKET2);
8466 		if (!r)
8467 			r = uvd_v1_0_init(rdev);
8468 		if (r)
8469 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8470 	}
8471 
8472 	r = -ENOENT;
8473 
8474 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8475 	if (ring->ring_size)
8476 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8477 				     VCE_CMD_NO_OP);
8478 
8479 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8480 	if (ring->ring_size)
8481 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8482 				     VCE_CMD_NO_OP);
8483 
8484 	if (!r)
8485 		r = vce_v1_0_init(rdev);
8486 	else if (r != -ENOENT)
8487 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8488 
8489 	r = radeon_ib_pool_init(rdev);
8490 	if (r) {
8491 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8492 		return r;
8493 	}
8494 
8495 	r = radeon_vm_manager_init(rdev);
8496 	if (r) {
8497 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8498 		return r;
8499 	}
8500 
8501 	r = dce6_audio_init(rdev);
8502 	if (r)
8503 		return r;
8504 
8505 	return 0;
8506 }
8507 
8508 /**
8509  * cik_resume - resume the asic to a functional state
8510  *
8511  * @rdev: radeon_device pointer
8512  *
8513  * Programs the asic to a functional state (CIK).
8514  * Called at resume.
8515  * Returns 0 for success, error for failure.
8516  */
8517 int cik_resume(struct radeon_device *rdev)
8518 {
8519 	int r;
8520 
8521 	/* post card */
8522 	atom_asic_init(rdev->mode_info.atom_context);
8523 
8524 	/* init golden registers */
8525 	cik_init_golden_registers(rdev);
8526 
8527 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8528 		radeon_pm_resume(rdev);
8529 
8530 	rdev->accel_working = true;
8531 	r = cik_startup(rdev);
8532 	if (r) {
8533 		DRM_ERROR("cik startup failed on resume\n");
8534 		rdev->accel_working = false;
8535 		return r;
8536 	}
8537 
8538 	return r;
8539 
8540 }
8541 
8542 /**
8543  * cik_suspend - suspend the asic
8544  *
8545  * @rdev: radeon_device pointer
8546  *
8547  * Bring the chip into a state suitable for suspend (CIK).
8548  * Called at suspend.
8549  * Returns 0 for success.
8550  */
8551 int cik_suspend(struct radeon_device *rdev)
8552 {
8553 	radeon_pm_suspend(rdev);
8554 	dce6_audio_fini(rdev);
8555 	radeon_vm_manager_fini(rdev);
8556 	cik_cp_enable(rdev, false);
8557 	cik_sdma_enable(rdev, false);
8558 	uvd_v1_0_fini(rdev);
8559 	radeon_uvd_suspend(rdev);
8560 	radeon_vce_suspend(rdev);
8561 	cik_fini_pg(rdev);
8562 	cik_fini_cg(rdev);
8563 	cik_irq_suspend(rdev);
8564 	radeon_wb_disable(rdev);
8565 	cik_pcie_gart_disable(rdev);
8566 	return 0;
8567 }
8568 
8569 /* Plan is to move initialization in that function and use
8570  * helper function so that radeon_device_init pretty much
8571  * do nothing more than calling asic specific function. This
8572  * should also allow to remove a bunch of callback function
8573  * like vram_info.
8574  */
8575 /**
8576  * cik_init - asic specific driver and hw init
8577  *
8578  * @rdev: radeon_device pointer
8579  *
8580  * Setup asic specific driver variables and program the hw
8581  * to a functional state (CIK).
8582  * Called at driver startup.
8583  * Returns 0 for success, errors for failure.
8584  */
8585 int cik_init(struct radeon_device *rdev)
8586 {
8587 	struct radeon_ring *ring;
8588 	int r;
8589 
8590 	/* Read BIOS */
8591 	if (!radeon_get_bios(rdev)) {
8592 		if (ASIC_IS_AVIVO(rdev))
8593 			return -EINVAL;
8594 	}
8595 	/* Must be an ATOMBIOS */
8596 	if (!rdev->is_atom_bios) {
8597 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8598 		return -EINVAL;
8599 	}
8600 	r = radeon_atombios_init(rdev);
8601 	if (r)
8602 		return r;
8603 
8604 	/* Post card if necessary */
8605 	if (!radeon_card_posted(rdev)) {
8606 		if (!rdev->bios) {
8607 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8608 			return -EINVAL;
8609 		}
8610 		DRM_INFO("GPU not posted. posting now...\n");
8611 		atom_asic_init(rdev->mode_info.atom_context);
8612 	}
8613 	/* init golden registers */
8614 	cik_init_golden_registers(rdev);
8615 	/* Initialize scratch registers */
8616 	cik_scratch_init(rdev);
8617 	/* Initialize surface registers */
8618 	radeon_surface_init(rdev);
8619 	/* Initialize clocks */
8620 	radeon_get_clock_info(rdev->ddev);
8621 
8622 	/* Fence driver */
8623 	r = radeon_fence_driver_init(rdev);
8624 	if (r)
8625 		return r;
8626 
8627 	/* initialize memory controller */
8628 	r = cik_mc_init(rdev);
8629 	if (r)
8630 		return r;
8631 	/* Memory manager */
8632 	r = radeon_bo_init(rdev);
8633 	if (r)
8634 		return r;
8635 
8636 	if (rdev->flags & RADEON_IS_IGP) {
8637 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8638 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8639 			r = cik_init_microcode(rdev);
8640 			if (r) {
8641 				DRM_ERROR("Failed to load firmware!\n");
8642 				return r;
8643 			}
8644 		}
8645 	} else {
8646 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8647 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8648 		    !rdev->mc_fw) {
8649 			r = cik_init_microcode(rdev);
8650 			if (r) {
8651 				DRM_ERROR("Failed to load firmware!\n");
8652 				return r;
8653 			}
8654 		}
8655 	}
8656 
8657 	/* Initialize power management */
8658 	radeon_pm_init(rdev);
8659 
8660 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8661 	ring->ring_obj = NULL;
8662 	r600_ring_init(rdev, ring, 1024 * 1024);
8663 
8664 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8665 	ring->ring_obj = NULL;
8666 	r600_ring_init(rdev, ring, 1024 * 1024);
8667 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8668 	if (r)
8669 		return r;
8670 
8671 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8672 	ring->ring_obj = NULL;
8673 	r600_ring_init(rdev, ring, 1024 * 1024);
8674 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8675 	if (r)
8676 		return r;
8677 
8678 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8679 	ring->ring_obj = NULL;
8680 	r600_ring_init(rdev, ring, 256 * 1024);
8681 
8682 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8683 	ring->ring_obj = NULL;
8684 	r600_ring_init(rdev, ring, 256 * 1024);
8685 
8686 	r = radeon_uvd_init(rdev);
8687 	if (!r) {
8688 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8689 		ring->ring_obj = NULL;
8690 		r600_ring_init(rdev, ring, 4096);
8691 	}
8692 
8693 	r = radeon_vce_init(rdev);
8694 	if (!r) {
8695 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8696 		ring->ring_obj = NULL;
8697 		r600_ring_init(rdev, ring, 4096);
8698 
8699 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8700 		ring->ring_obj = NULL;
8701 		r600_ring_init(rdev, ring, 4096);
8702 	}
8703 
8704 	rdev->ih.ring_obj = NULL;
8705 	r600_ih_ring_init(rdev, 64 * 1024);
8706 
8707 	r = r600_pcie_gart_init(rdev);
8708 	if (r)
8709 		return r;
8710 
8711 	rdev->accel_working = true;
8712 	r = cik_startup(rdev);
8713 	if (r) {
8714 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8715 		cik_cp_fini(rdev);
8716 		cik_sdma_fini(rdev);
8717 		cik_irq_fini(rdev);
8718 		sumo_rlc_fini(rdev);
8719 		cik_mec_fini(rdev);
8720 		radeon_wb_fini(rdev);
8721 		radeon_ib_pool_fini(rdev);
8722 		radeon_vm_manager_fini(rdev);
8723 		radeon_irq_kms_fini(rdev);
8724 		cik_pcie_gart_fini(rdev);
8725 		rdev->accel_working = false;
8726 	}
8727 
8728 	/* Don't start up if the MC ucode is missing.
8729 	 * The default clocks and voltages before the MC ucode
8730 	 * is loaded are not suffient for advanced operations.
8731 	 */
8732 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8733 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8734 		return -EINVAL;
8735 	}
8736 
8737 	return 0;
8738 }
8739 
8740 /**
8741  * cik_fini - asic specific driver and hw fini
8742  *
8743  * @rdev: radeon_device pointer
8744  *
8745  * Tear down the asic specific driver variables and program the hw
8746  * to an idle state (CIK).
8747  * Called at driver unload.
8748  */
8749 void cik_fini(struct radeon_device *rdev)
8750 {
8751 	radeon_pm_fini(rdev);
8752 	cik_cp_fini(rdev);
8753 	cik_sdma_fini(rdev);
8754 	cik_fini_pg(rdev);
8755 	cik_fini_cg(rdev);
8756 	cik_irq_fini(rdev);
8757 	sumo_rlc_fini(rdev);
8758 	cik_mec_fini(rdev);
8759 	radeon_wb_fini(rdev);
8760 	radeon_vm_manager_fini(rdev);
8761 	radeon_ib_pool_fini(rdev);
8762 	radeon_irq_kms_fini(rdev);
8763 	uvd_v1_0_fini(rdev);
8764 	radeon_uvd_fini(rdev);
8765 	radeon_vce_fini(rdev);
8766 	cik_pcie_gart_fini(rdev);
8767 	r600_vram_scratch_fini(rdev);
8768 	radeon_gem_fini(rdev);
8769 	radeon_fence_driver_fini(rdev);
8770 	radeon_bo_fini(rdev);
8771 	radeon_atombios_fini(rdev);
8772 	kfree(rdev->bios);
8773 	rdev->bios = NULL;
8774 }
8775 
8776 void dce8_program_fmt(struct drm_encoder *encoder)
8777 {
8778 	struct drm_device *dev = encoder->dev;
8779 	struct radeon_device *rdev = dev->dev_private;
8780 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8781 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8782 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8783 	int bpc = 0;
8784 	u32 tmp = 0;
8785 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8786 
8787 	if (connector) {
8788 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8789 		bpc = radeon_get_monitor_bpc(connector);
8790 		dither = radeon_connector->dither;
8791 	}
8792 
8793 	/* LVDS/eDP FMT is set up by atom */
8794 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8795 		return;
8796 
8797 	/* not needed for analog */
8798 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8799 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8800 		return;
8801 
8802 	if (bpc == 0)
8803 		return;
8804 
8805 	switch (bpc) {
8806 	case 6:
8807 		if (dither == RADEON_FMT_DITHER_ENABLE)
8808 			/* XXX sort out optimal dither settings */
8809 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8810 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8811 		else
8812 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8813 		break;
8814 	case 8:
8815 		if (dither == RADEON_FMT_DITHER_ENABLE)
8816 			/* XXX sort out optimal dither settings */
8817 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8818 				FMT_RGB_RANDOM_ENABLE |
8819 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8820 		else
8821 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8822 		break;
8823 	case 10:
8824 		if (dither == RADEON_FMT_DITHER_ENABLE)
8825 			/* XXX sort out optimal dither settings */
8826 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8827 				FMT_RGB_RANDOM_ENABLE |
8828 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8829 		else
8830 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8831 		break;
8832 	default:
8833 		/* not needed */
8834 		break;
8835 	}
8836 
8837 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8838 }
8839 
8840 /* display watermark setup */
8841 /**
8842  * dce8_line_buffer_adjust - Set up the line buffer
8843  *
8844  * @rdev: radeon_device pointer
8845  * @radeon_crtc: the selected display controller
8846  * @mode: the current display mode on the selected display
8847  * controller
8848  *
8849  * Setup up the line buffer allocation for
8850  * the selected display controller (CIK).
8851  * Returns the line buffer size in pixels.
8852  */
8853 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8854 				   struct radeon_crtc *radeon_crtc,
8855 				   struct drm_display_mode *mode)
8856 {
8857 	u32 tmp, buffer_alloc, i;
8858 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8859 	/*
8860 	 * Line Buffer Setup
8861 	 * There are 6 line buffers, one for each display controllers.
8862 	 * There are 3 partitions per LB. Select the number of partitions
8863 	 * to enable based on the display width.  For display widths larger
8864 	 * than 4096, you need use to use 2 display controllers and combine
8865 	 * them using the stereo blender.
8866 	 */
8867 	if (radeon_crtc->base.enabled && mode) {
8868 		if (mode->crtc_hdisplay < 1920) {
8869 			tmp = 1;
8870 			buffer_alloc = 2;
8871 		} else if (mode->crtc_hdisplay < 2560) {
8872 			tmp = 2;
8873 			buffer_alloc = 2;
8874 		} else if (mode->crtc_hdisplay < 4096) {
8875 			tmp = 0;
8876 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8877 		} else {
8878 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8879 			tmp = 0;
8880 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8881 		}
8882 	} else {
8883 		tmp = 1;
8884 		buffer_alloc = 0;
8885 	}
8886 
8887 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8888 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8889 
8890 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8891 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8892 	for (i = 0; i < rdev->usec_timeout; i++) {
8893 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8894 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8895 			break;
8896 		udelay(1);
8897 	}
8898 
8899 	if (radeon_crtc->base.enabled && mode) {
8900 		switch (tmp) {
8901 		case 0:
8902 		default:
8903 			return 4096 * 2;
8904 		case 1:
8905 			return 1920 * 2;
8906 		case 2:
8907 			return 2560 * 2;
8908 		}
8909 	}
8910 
8911 	/* controller not enabled, so no lb used */
8912 	return 0;
8913 }
8914 
8915 /**
8916  * cik_get_number_of_dram_channels - get the number of dram channels
8917  *
8918  * @rdev: radeon_device pointer
8919  *
8920  * Look up the number of video ram channels (CIK).
8921  * Used for display watermark bandwidth calculations
8922  * Returns the number of dram channels
8923  */
8924 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8925 {
8926 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8927 
8928 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8929 	case 0:
8930 	default:
8931 		return 1;
8932 	case 1:
8933 		return 2;
8934 	case 2:
8935 		return 4;
8936 	case 3:
8937 		return 8;
8938 	case 4:
8939 		return 3;
8940 	case 5:
8941 		return 6;
8942 	case 6:
8943 		return 10;
8944 	case 7:
8945 		return 12;
8946 	case 8:
8947 		return 16;
8948 	}
8949 }
8950 
8951 struct dce8_wm_params {
8952 	u32 dram_channels; /* number of dram channels */
8953 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8954 	u32 sclk;          /* engine clock in kHz */
8955 	u32 disp_clk;      /* display clock in kHz */
8956 	u32 src_width;     /* viewport width */
8957 	u32 active_time;   /* active display time in ns */
8958 	u32 blank_time;    /* blank time in ns */
8959 	bool interlaced;    /* mode is interlaced */
8960 	fixed20_12 vsc;    /* vertical scale ratio */
8961 	u32 num_heads;     /* number of active crtcs */
8962 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8963 	u32 lb_size;       /* line buffer allocated to pipe */
8964 	u32 vtaps;         /* vertical scaler taps */
8965 };
8966 
8967 /**
8968  * dce8_dram_bandwidth - get the dram bandwidth
8969  *
8970  * @wm: watermark calculation data
8971  *
8972  * Calculate the raw dram bandwidth (CIK).
8973  * Used for display watermark bandwidth calculations
8974  * Returns the dram bandwidth in MBytes/s
8975  */
8976 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8977 {
8978 	/* Calculate raw DRAM Bandwidth */
8979 	fixed20_12 dram_efficiency; /* 0.7 */
8980 	fixed20_12 yclk, dram_channels, bandwidth;
8981 	fixed20_12 a;
8982 
8983 	a.full = dfixed_const(1000);
8984 	yclk.full = dfixed_const(wm->yclk);
8985 	yclk.full = dfixed_div(yclk, a);
8986 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8987 	a.full = dfixed_const(10);
8988 	dram_efficiency.full = dfixed_const(7);
8989 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8990 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8991 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8992 
8993 	return dfixed_trunc(bandwidth);
8994 }
8995 
8996 /**
8997  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8998  *
8999  * @wm: watermark calculation data
9000  *
9001  * Calculate the dram bandwidth used for display (CIK).
9002  * Used for display watermark bandwidth calculations
9003  * Returns the dram bandwidth for display in MBytes/s
9004  */
9005 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9006 {
9007 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9008 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9009 	fixed20_12 yclk, dram_channels, bandwidth;
9010 	fixed20_12 a;
9011 
9012 	a.full = dfixed_const(1000);
9013 	yclk.full = dfixed_const(wm->yclk);
9014 	yclk.full = dfixed_div(yclk, a);
9015 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9016 	a.full = dfixed_const(10);
9017 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9018 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9019 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9020 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9021 
9022 	return dfixed_trunc(bandwidth);
9023 }
9024 
9025 /**
9026  * dce8_data_return_bandwidth - get the data return bandwidth
9027  *
9028  * @wm: watermark calculation data
9029  *
9030  * Calculate the data return bandwidth used for display (CIK).
9031  * Used for display watermark bandwidth calculations
9032  * Returns the data return bandwidth in MBytes/s
9033  */
9034 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9035 {
9036 	/* Calculate the display Data return Bandwidth */
9037 	fixed20_12 return_efficiency; /* 0.8 */
9038 	fixed20_12 sclk, bandwidth;
9039 	fixed20_12 a;
9040 
9041 	a.full = dfixed_const(1000);
9042 	sclk.full = dfixed_const(wm->sclk);
9043 	sclk.full = dfixed_div(sclk, a);
9044 	a.full = dfixed_const(10);
9045 	return_efficiency.full = dfixed_const(8);
9046 	return_efficiency.full = dfixed_div(return_efficiency, a);
9047 	a.full = dfixed_const(32);
9048 	bandwidth.full = dfixed_mul(a, sclk);
9049 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9050 
9051 	return dfixed_trunc(bandwidth);
9052 }
9053 
9054 /**
9055  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9056  *
9057  * @wm: watermark calculation data
9058  *
9059  * Calculate the dmif bandwidth used for display (CIK).
9060  * Used for display watermark bandwidth calculations
9061  * Returns the dmif bandwidth in MBytes/s
9062  */
9063 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9064 {
9065 	/* Calculate the DMIF Request Bandwidth */
9066 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9067 	fixed20_12 disp_clk, bandwidth;
9068 	fixed20_12 a, b;
9069 
9070 	a.full = dfixed_const(1000);
9071 	disp_clk.full = dfixed_const(wm->disp_clk);
9072 	disp_clk.full = dfixed_div(disp_clk, a);
9073 	a.full = dfixed_const(32);
9074 	b.full = dfixed_mul(a, disp_clk);
9075 
9076 	a.full = dfixed_const(10);
9077 	disp_clk_request_efficiency.full = dfixed_const(8);
9078 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9079 
9080 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9081 
9082 	return dfixed_trunc(bandwidth);
9083 }
9084 
9085 /**
9086  * dce8_available_bandwidth - get the min available bandwidth
9087  *
9088  * @wm: watermark calculation data
9089  *
9090  * Calculate the min available bandwidth used for display (CIK).
9091  * Used for display watermark bandwidth calculations
9092  * Returns the min available bandwidth in MBytes/s
9093  */
9094 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9095 {
9096 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9097 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9098 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9099 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9100 
9101 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9102 }
9103 
9104 /**
9105  * dce8_average_bandwidth - get the average available bandwidth
9106  *
9107  * @wm: watermark calculation data
9108  *
9109  * Calculate the average available bandwidth used for display (CIK).
9110  * Used for display watermark bandwidth calculations
9111  * Returns the average available bandwidth in MBytes/s
9112  */
9113 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9114 {
9115 	/* Calculate the display mode Average Bandwidth
9116 	 * DisplayMode should contain the source and destination dimensions,
9117 	 * timing, etc.
9118 	 */
9119 	fixed20_12 bpp;
9120 	fixed20_12 line_time;
9121 	fixed20_12 src_width;
9122 	fixed20_12 bandwidth;
9123 	fixed20_12 a;
9124 
9125 	a.full = dfixed_const(1000);
9126 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9127 	line_time.full = dfixed_div(line_time, a);
9128 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9129 	src_width.full = dfixed_const(wm->src_width);
9130 	bandwidth.full = dfixed_mul(src_width, bpp);
9131 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9132 	bandwidth.full = dfixed_div(bandwidth, line_time);
9133 
9134 	return dfixed_trunc(bandwidth);
9135 }
9136 
9137 /**
9138  * dce8_latency_watermark - get the latency watermark
9139  *
9140  * @wm: watermark calculation data
9141  *
9142  * Calculate the latency watermark (CIK).
9143  * Used for display watermark bandwidth calculations
9144  * Returns the latency watermark in ns
9145  */
9146 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9147 {
9148 	/* First calculate the latency in ns */
9149 	u32 mc_latency = 2000; /* 2000 ns. */
9150 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9151 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9152 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9153 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9154 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9155 		(wm->num_heads * cursor_line_pair_return_time);
9156 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9157 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9158 	u32 tmp, dmif_size = 12288;
9159 	fixed20_12 a, b, c;
9160 
9161 	if (wm->num_heads == 0)
9162 		return 0;
9163 
9164 	a.full = dfixed_const(2);
9165 	b.full = dfixed_const(1);
9166 	if ((wm->vsc.full > a.full) ||
9167 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9168 	    (wm->vtaps >= 5) ||
9169 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9170 		max_src_lines_per_dst_line = 4;
9171 	else
9172 		max_src_lines_per_dst_line = 2;
9173 
9174 	a.full = dfixed_const(available_bandwidth);
9175 	b.full = dfixed_const(wm->num_heads);
9176 	a.full = dfixed_div(a, b);
9177 
9178 	b.full = dfixed_const(mc_latency + 512);
9179 	c.full = dfixed_const(wm->disp_clk);
9180 	b.full = dfixed_div(b, c);
9181 
9182 	c.full = dfixed_const(dmif_size);
9183 	b.full = dfixed_div(c, b);
9184 
9185 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9186 
9187 	b.full = dfixed_const(1000);
9188 	c.full = dfixed_const(wm->disp_clk);
9189 	b.full = dfixed_div(c, b);
9190 	c.full = dfixed_const(wm->bytes_per_pixel);
9191 	b.full = dfixed_mul(b, c);
9192 
9193 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9194 
9195 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9196 	b.full = dfixed_const(1000);
9197 	c.full = dfixed_const(lb_fill_bw);
9198 	b.full = dfixed_div(c, b);
9199 	a.full = dfixed_div(a, b);
9200 	line_fill_time = dfixed_trunc(a);
9201 
9202 	if (line_fill_time < wm->active_time)
9203 		return latency;
9204 	else
9205 		return latency + (line_fill_time - wm->active_time);
9206 
9207 }
9208 
9209 /**
9210  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9211  * average and available dram bandwidth
9212  *
9213  * @wm: watermark calculation data
9214  *
9215  * Check if the display average bandwidth fits in the display
9216  * dram bandwidth (CIK).
9217  * Used for display watermark bandwidth calculations
9218  * Returns true if the display fits, false if not.
9219  */
9220 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9221 {
9222 	if (dce8_average_bandwidth(wm) <=
9223 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9224 		return true;
9225 	else
9226 		return false;
9227 }
9228 
9229 /**
9230  * dce8_average_bandwidth_vs_available_bandwidth - check
9231  * average and available bandwidth
9232  *
9233  * @wm: watermark calculation data
9234  *
9235  * Check if the display average bandwidth fits in the display
9236  * available bandwidth (CIK).
9237  * Used for display watermark bandwidth calculations
9238  * Returns true if the display fits, false if not.
9239  */
9240 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9241 {
9242 	if (dce8_average_bandwidth(wm) <=
9243 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9244 		return true;
9245 	else
9246 		return false;
9247 }
9248 
9249 /**
9250  * dce8_check_latency_hiding - check latency hiding
9251  *
9252  * @wm: watermark calculation data
9253  *
9254  * Check latency hiding (CIK).
9255  * Used for display watermark bandwidth calculations
9256  * Returns true if the display fits, false if not.
9257  */
9258 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9259 {
9260 	u32 lb_partitions = wm->lb_size / wm->src_width;
9261 	u32 line_time = wm->active_time + wm->blank_time;
9262 	u32 latency_tolerant_lines;
9263 	u32 latency_hiding;
9264 	fixed20_12 a;
9265 
9266 	a.full = dfixed_const(1);
9267 	if (wm->vsc.full > a.full)
9268 		latency_tolerant_lines = 1;
9269 	else {
9270 		if (lb_partitions <= (wm->vtaps + 1))
9271 			latency_tolerant_lines = 1;
9272 		else
9273 			latency_tolerant_lines = 2;
9274 	}
9275 
9276 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9277 
9278 	if (dce8_latency_watermark(wm) <= latency_hiding)
9279 		return true;
9280 	else
9281 		return false;
9282 }
9283 
9284 /**
9285  * dce8_program_watermarks - program display watermarks
9286  *
9287  * @rdev: radeon_device pointer
9288  * @radeon_crtc: the selected display controller
9289  * @lb_size: line buffer size
9290  * @num_heads: number of display controllers in use
9291  *
9292  * Calculate and program the display watermarks for the
9293  * selected display controller (CIK).
9294  */
9295 static void dce8_program_watermarks(struct radeon_device *rdev,
9296 				    struct radeon_crtc *radeon_crtc,
9297 				    u32 lb_size, u32 num_heads)
9298 {
9299 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9300 	struct dce8_wm_params wm_low, wm_high;
9301 	u32 pixel_period;
9302 	u32 line_time = 0;
9303 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9304 	u32 tmp, wm_mask;
9305 
9306 	if (radeon_crtc->base.enabled && num_heads && mode) {
9307 		pixel_period = 1000000 / (u32)mode->clock;
9308 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9309 
9310 		/* watermark for high clocks */
9311 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9312 		    rdev->pm.dpm_enabled) {
9313 			wm_high.yclk =
9314 				radeon_dpm_get_mclk(rdev, false) * 10;
9315 			wm_high.sclk =
9316 				radeon_dpm_get_sclk(rdev, false) * 10;
9317 		} else {
9318 			wm_high.yclk = rdev->pm.current_mclk * 10;
9319 			wm_high.sclk = rdev->pm.current_sclk * 10;
9320 		}
9321 
9322 		wm_high.disp_clk = mode->clock;
9323 		wm_high.src_width = mode->crtc_hdisplay;
9324 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9325 		wm_high.blank_time = line_time - wm_high.active_time;
9326 		wm_high.interlaced = false;
9327 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9328 			wm_high.interlaced = true;
9329 		wm_high.vsc = radeon_crtc->vsc;
9330 		wm_high.vtaps = 1;
9331 		if (radeon_crtc->rmx_type != RMX_OFF)
9332 			wm_high.vtaps = 2;
9333 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9334 		wm_high.lb_size = lb_size;
9335 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9336 		wm_high.num_heads = num_heads;
9337 
9338 		/* set for high clocks */
9339 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9340 
9341 		/* possibly force display priority to high */
9342 		/* should really do this at mode validation time... */
9343 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9344 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9345 		    !dce8_check_latency_hiding(&wm_high) ||
9346 		    (rdev->disp_priority == 2)) {
9347 			DRM_DEBUG_KMS("force priority to high\n");
9348 		}
9349 
9350 		/* watermark for low clocks */
9351 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9352 		    rdev->pm.dpm_enabled) {
9353 			wm_low.yclk =
9354 				radeon_dpm_get_mclk(rdev, true) * 10;
9355 			wm_low.sclk =
9356 				radeon_dpm_get_sclk(rdev, true) * 10;
9357 		} else {
9358 			wm_low.yclk = rdev->pm.current_mclk * 10;
9359 			wm_low.sclk = rdev->pm.current_sclk * 10;
9360 		}
9361 
9362 		wm_low.disp_clk = mode->clock;
9363 		wm_low.src_width = mode->crtc_hdisplay;
9364 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9365 		wm_low.blank_time = line_time - wm_low.active_time;
9366 		wm_low.interlaced = false;
9367 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9368 			wm_low.interlaced = true;
9369 		wm_low.vsc = radeon_crtc->vsc;
9370 		wm_low.vtaps = 1;
9371 		if (radeon_crtc->rmx_type != RMX_OFF)
9372 			wm_low.vtaps = 2;
9373 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9374 		wm_low.lb_size = lb_size;
9375 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9376 		wm_low.num_heads = num_heads;
9377 
9378 		/* set for low clocks */
9379 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9380 
9381 		/* possibly force display priority to high */
9382 		/* should really do this at mode validation time... */
9383 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9384 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9385 		    !dce8_check_latency_hiding(&wm_low) ||
9386 		    (rdev->disp_priority == 2)) {
9387 			DRM_DEBUG_KMS("force priority to high\n");
9388 		}
9389 	}
9390 
9391 	/* select wm A */
9392 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9393 	tmp = wm_mask;
9394 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9395 	tmp |= LATENCY_WATERMARK_MASK(1);
9396 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9397 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9398 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9399 		LATENCY_HIGH_WATERMARK(line_time)));
9400 	/* select wm B */
9401 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9402 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9403 	tmp |= LATENCY_WATERMARK_MASK(2);
9404 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9405 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9406 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9407 		LATENCY_HIGH_WATERMARK(line_time)));
9408 	/* restore original selection */
9409 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9410 
9411 	/* save values for DPM */
9412 	radeon_crtc->line_time = line_time;
9413 	radeon_crtc->wm_high = latency_watermark_a;
9414 	radeon_crtc->wm_low = latency_watermark_b;
9415 }
9416 
9417 /**
9418  * dce8_bandwidth_update - program display watermarks
9419  *
9420  * @rdev: radeon_device pointer
9421  *
9422  * Calculate and program the display watermarks and line
9423  * buffer allocation (CIK).
9424  */
9425 void dce8_bandwidth_update(struct radeon_device *rdev)
9426 {
9427 	struct drm_display_mode *mode = NULL;
9428 	u32 num_heads = 0, lb_size;
9429 	int i;
9430 
9431 	if (!rdev->mode_info.mode_config_initialized)
9432 		return;
9433 
9434 	radeon_update_display_priority(rdev);
9435 
9436 	for (i = 0; i < rdev->num_crtc; i++) {
9437 		if (rdev->mode_info.crtcs[i]->base.enabled)
9438 			num_heads++;
9439 	}
9440 	for (i = 0; i < rdev->num_crtc; i++) {
9441 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9442 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9443 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9444 	}
9445 }
9446 
9447 /**
9448  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9449  *
9450  * @rdev: radeon_device pointer
9451  *
9452  * Fetches a GPU clock counter snapshot (SI).
9453  * Returns the 64 bit clock counter snapshot.
9454  */
9455 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9456 {
9457 	uint64_t clock;
9458 
9459 	spin_lock(&rdev->gpu_clock_mutex);
9460 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9461 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9462 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9463 	spin_unlock(&rdev->gpu_clock_mutex);
9464 	return clock;
9465 }
9466 
9467 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9468                               u32 cntl_reg, u32 status_reg)
9469 {
9470 	int r, i;
9471 	struct atom_clock_dividers dividers;
9472 	uint32_t tmp;
9473 
9474 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9475 					   clock, false, &dividers);
9476 	if (r)
9477 		return r;
9478 
9479 	tmp = RREG32_SMC(cntl_reg);
9480 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9481 	tmp |= dividers.post_divider;
9482 	WREG32_SMC(cntl_reg, tmp);
9483 
9484 	for (i = 0; i < 100; i++) {
9485 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9486 			break;
9487 		mdelay(10);
9488 	}
9489 	if (i == 100)
9490 		return -ETIMEDOUT;
9491 
9492 	return 0;
9493 }
9494 
9495 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9496 {
9497 	int r = 0;
9498 
9499 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9500 	if (r)
9501 		return r;
9502 
9503 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9504 	return r;
9505 }
9506 
9507 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9508 {
9509 	int r, i;
9510 	struct atom_clock_dividers dividers;
9511 	u32 tmp;
9512 
9513 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9514 					   ecclk, false, &dividers);
9515 	if (r)
9516 		return r;
9517 
9518 	for (i = 0; i < 100; i++) {
9519 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9520 			break;
9521 		mdelay(10);
9522 	}
9523 	if (i == 100)
9524 		return -ETIMEDOUT;
9525 
9526 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9527 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9528 	tmp |= dividers.post_divider;
9529 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9530 
9531 	for (i = 0; i < 100; i++) {
9532 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9533 			break;
9534 		mdelay(10);
9535 	}
9536 	if (i == 100)
9537 		return -ETIMEDOUT;
9538 
9539 	return 0;
9540 }
9541 
9542 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9543 {
9544 	struct pci_dev *root = rdev->pdev->bus->self;
9545 	int bridge_pos, gpu_pos;
9546 	u32 speed_cntl, mask, current_data_rate;
9547 	int ret, i;
9548 	u16 tmp16;
9549 
9550 	if (radeon_pcie_gen2 == 0)
9551 		return;
9552 
9553 	if (rdev->flags & RADEON_IS_IGP)
9554 		return;
9555 
9556 	if (!(rdev->flags & RADEON_IS_PCIE))
9557 		return;
9558 
9559 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9560 	if (ret != 0)
9561 		return;
9562 
9563 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9564 		return;
9565 
9566 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9567 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9568 		LC_CURRENT_DATA_RATE_SHIFT;
9569 	if (mask & DRM_PCIE_SPEED_80) {
9570 		if (current_data_rate == 2) {
9571 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9572 			return;
9573 		}
9574 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9575 	} else if (mask & DRM_PCIE_SPEED_50) {
9576 		if (current_data_rate == 1) {
9577 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9578 			return;
9579 		}
9580 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9581 	}
9582 
9583 	bridge_pos = pci_get_pciecap_ptr(root->dev);
9584 	if (!bridge_pos)
9585 		return;
9586 
9587 	gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev);
9588 	if (!gpu_pos)
9589 		return;
9590 
9591 	if (mask & DRM_PCIE_SPEED_80) {
9592 		/* re-try equalization if gen3 is not already enabled */
9593 		if (current_data_rate != 2) {
9594 			u16 bridge_cfg, gpu_cfg;
9595 			u16 bridge_cfg2, gpu_cfg2;
9596 			u32 max_lw, current_lw, tmp;
9597 
9598 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9599 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9600 
9601 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9602 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9603 
9604 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9605 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9606 
9607 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9608 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9609 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9610 
9611 			if (current_lw < max_lw) {
9612 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9613 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9614 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9615 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9616 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9617 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9618 				}
9619 			}
9620 
9621 			for (i = 0; i < 10; i++) {
9622 				/* check status */
9623 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9624 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9625 					break;
9626 
9627 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9628 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9629 
9630 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9631 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9632 
9633 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9634 				tmp |= LC_SET_QUIESCE;
9635 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9636 
9637 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9638 				tmp |= LC_REDO_EQ;
9639 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9640 
9641 				mdelay(100);
9642 
9643 				/* linkctl */
9644 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9645 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9646 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9647 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9648 
9649 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9650 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9651 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9652 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9653 
9654 				/* linkctl2 */
9655 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9656 				tmp16 &= ~((1 << 4) | (7 << 9));
9657 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9658 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9659 
9660 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9661 				tmp16 &= ~((1 << 4) | (7 << 9));
9662 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9663 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9664 
9665 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9666 				tmp &= ~LC_SET_QUIESCE;
9667 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9668 			}
9669 		}
9670 	}
9671 
9672 	/* set the link speed */
9673 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9674 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9675 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9676 
9677 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9678 	tmp16 &= ~0xf;
9679 	if (mask & DRM_PCIE_SPEED_80)
9680 		tmp16 |= 3; /* gen3 */
9681 	else if (mask & DRM_PCIE_SPEED_50)
9682 		tmp16 |= 2; /* gen2 */
9683 	else
9684 		tmp16 |= 1; /* gen1 */
9685 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9686 
9687 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9688 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9689 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9690 
9691 	for (i = 0; i < rdev->usec_timeout; i++) {
9692 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9693 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9694 			break;
9695 		udelay(1);
9696 	}
9697 }
9698 
9699 static void cik_program_aspm(struct radeon_device *rdev)
9700 {
9701 	u32 data, orig;
9702 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9703 	bool disable_clkreq = false;
9704 
9705 	if (radeon_aspm == 0)
9706 		return;
9707 
9708 	/* XXX double check IGPs */
9709 	if (rdev->flags & RADEON_IS_IGP)
9710 		return;
9711 
9712 	if (!(rdev->flags & RADEON_IS_PCIE))
9713 		return;
9714 
9715 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9716 	data &= ~LC_XMIT_N_FTS_MASK;
9717 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9718 	if (orig != data)
9719 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9720 
9721 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9722 	data |= LC_GO_TO_RECOVERY;
9723 	if (orig != data)
9724 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9725 
9726 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9727 	data |= P_IGNORE_EDB_ERR;
9728 	if (orig != data)
9729 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9730 
9731 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9732 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9733 	data |= LC_PMI_TO_L1_DIS;
9734 	if (!disable_l0s)
9735 		data |= LC_L0S_INACTIVITY(7);
9736 
9737 	if (!disable_l1) {
9738 		data |= LC_L1_INACTIVITY(7);
9739 		data &= ~LC_PMI_TO_L1_DIS;
9740 		if (orig != data)
9741 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9742 
9743 		if (!disable_plloff_in_l1) {
9744 			bool clk_req_support;
9745 
9746 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9747 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9748 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9749 			if (orig != data)
9750 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9751 
9752 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9753 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9754 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9755 			if (orig != data)
9756 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9757 
9758 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9759 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9760 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9761 			if (orig != data)
9762 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9763 
9764 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9765 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9766 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9767 			if (orig != data)
9768 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9769 
9770 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9771 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9772 			data |= LC_DYN_LANES_PWR_STATE(3);
9773 			if (orig != data)
9774 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9775 
9776 			if (!disable_clkreq) {
9777 #ifdef zMN_TODO
9778 				struct pci_dev *root = rdev->pdev->bus->self;
9779 				u32 lnkcap;
9780 
9781 				clk_req_support = false;
9782 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9783 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9784 					clk_req_support = true;
9785 #else
9786 				clk_req_support = false;
9787 #endif
9788 			} else {
9789 				clk_req_support = false;
9790 			}
9791 
9792 			if (clk_req_support) {
9793 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9794 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9795 				if (orig != data)
9796 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9797 
9798 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9799 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9800 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9801 				if (orig != data)
9802 					WREG32_SMC(THM_CLK_CNTL, data);
9803 
9804 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9805 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9806 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9807 				if (orig != data)
9808 					WREG32_SMC(MISC_CLK_CTRL, data);
9809 
9810 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9811 				data &= ~BCLK_AS_XCLK;
9812 				if (orig != data)
9813 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9814 
9815 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9816 				data &= ~FORCE_BIF_REFCLK_EN;
9817 				if (orig != data)
9818 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9819 
9820 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9821 				data &= ~MPLL_CLKOUT_SEL_MASK;
9822 				data |= MPLL_CLKOUT_SEL(4);
9823 				if (orig != data)
9824 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9825 			}
9826 		}
9827 	} else {
9828 		if (orig != data)
9829 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9830 	}
9831 
9832 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9833 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9834 	if (orig != data)
9835 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9836 
9837 	if (!disable_l0s) {
9838 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9839 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9840 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9841 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9842 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9843 				data &= ~LC_L0S_INACTIVITY_MASK;
9844 				if (orig != data)
9845 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9846 			}
9847 		}
9848 	}
9849 }
9850