xref: /dragonfly/sys/dev/drm/radeon/cik.c (revision ef2687d4)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "cikd.h"
30 #include "atom.h"
31 #include "cik_blit_shaders.h"
32 #include "radeon_ucode.h"
33 #include "clearstate_ci.h"
34 
35 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
36 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
52 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
59 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
60 MODULE_FIRMWARE("radeon/KABINI_me.bin");
61 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
62 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
63 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
64 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
65 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
66 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
67 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
68 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
69 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
70 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
71 
72 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
73 static void cik_rlc_stop(struct radeon_device *rdev);
74 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
75 static void cik_program_aspm(struct radeon_device *rdev);
76 static void cik_init_pg(struct radeon_device *rdev);
77 static void cik_init_cg(struct radeon_device *rdev);
78 static void cik_fini_pg(struct radeon_device *rdev);
79 static void cik_fini_cg(struct radeon_device *rdev);
80 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
81 					  bool enable);
82 
83 /* get temperature in millidegrees */
84 int ci_get_temp(struct radeon_device *rdev)
85 {
86 	u32 temp;
87 	int actual_temp = 0;
88 
89 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
90 		CTF_TEMP_SHIFT;
91 
92 	if (temp & 0x200)
93 		actual_temp = 255;
94 	else
95 		actual_temp = temp & 0x1ff;
96 
97 	actual_temp = actual_temp * 1000;
98 
99 	return actual_temp;
100 }
101 
102 /* get temperature in millidegrees */
103 int kv_get_temp(struct radeon_device *rdev)
104 {
105 	u32 temp;
106 	int actual_temp = 0;
107 
108 	temp = RREG32_SMC(0xC0300E0C);
109 
110 	if (temp)
111 		actual_temp = (temp / 8) - 49;
112 	else
113 		actual_temp = 0;
114 
115 	actual_temp = actual_temp * 1000;
116 
117 	return actual_temp;
118 }
119 
120 /*
121  * Indirect registers accessor
122  */
123 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
124 {
125 	u32 r;
126 
127 	spin_lock(&rdev->pciep_idx_lock);
128 	WREG32(PCIE_INDEX, reg);
129 	(void)RREG32(PCIE_INDEX);
130 	r = RREG32(PCIE_DATA);
131 	spin_unlock(&rdev->pciep_idx_lock);
132 	return r;
133 }
134 
135 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
136 {
137 	spin_lock(&rdev->pciep_idx_lock);
138 	WREG32(PCIE_INDEX, reg);
139 	(void)RREG32(PCIE_INDEX);
140 	WREG32(PCIE_DATA, v);
141 	(void)RREG32(PCIE_DATA);
142 	spin_unlock(&rdev->pciep_idx_lock);
143 }
144 
145 static const u32 spectre_rlc_save_restore_register_list[] =
146 {
147 	(0x0e00 << 16) | (0xc12c >> 2),
148 	0x00000000,
149 	(0x0e00 << 16) | (0xc140 >> 2),
150 	0x00000000,
151 	(0x0e00 << 16) | (0xc150 >> 2),
152 	0x00000000,
153 	(0x0e00 << 16) | (0xc15c >> 2),
154 	0x00000000,
155 	(0x0e00 << 16) | (0xc168 >> 2),
156 	0x00000000,
157 	(0x0e00 << 16) | (0xc170 >> 2),
158 	0x00000000,
159 	(0x0e00 << 16) | (0xc178 >> 2),
160 	0x00000000,
161 	(0x0e00 << 16) | (0xc204 >> 2),
162 	0x00000000,
163 	(0x0e00 << 16) | (0xc2b4 >> 2),
164 	0x00000000,
165 	(0x0e00 << 16) | (0xc2b8 >> 2),
166 	0x00000000,
167 	(0x0e00 << 16) | (0xc2bc >> 2),
168 	0x00000000,
169 	(0x0e00 << 16) | (0xc2c0 >> 2),
170 	0x00000000,
171 	(0x0e00 << 16) | (0x8228 >> 2),
172 	0x00000000,
173 	(0x0e00 << 16) | (0x829c >> 2),
174 	0x00000000,
175 	(0x0e00 << 16) | (0x869c >> 2),
176 	0x00000000,
177 	(0x0600 << 16) | (0x98f4 >> 2),
178 	0x00000000,
179 	(0x0e00 << 16) | (0x98f8 >> 2),
180 	0x00000000,
181 	(0x0e00 << 16) | (0x9900 >> 2),
182 	0x00000000,
183 	(0x0e00 << 16) | (0xc260 >> 2),
184 	0x00000000,
185 	(0x0e00 << 16) | (0x90e8 >> 2),
186 	0x00000000,
187 	(0x0e00 << 16) | (0x3c000 >> 2),
188 	0x00000000,
189 	(0x0e00 << 16) | (0x3c00c >> 2),
190 	0x00000000,
191 	(0x0e00 << 16) | (0x8c1c >> 2),
192 	0x00000000,
193 	(0x0e00 << 16) | (0x9700 >> 2),
194 	0x00000000,
195 	(0x0e00 << 16) | (0xcd20 >> 2),
196 	0x00000000,
197 	(0x4e00 << 16) | (0xcd20 >> 2),
198 	0x00000000,
199 	(0x5e00 << 16) | (0xcd20 >> 2),
200 	0x00000000,
201 	(0x6e00 << 16) | (0xcd20 >> 2),
202 	0x00000000,
203 	(0x7e00 << 16) | (0xcd20 >> 2),
204 	0x00000000,
205 	(0x8e00 << 16) | (0xcd20 >> 2),
206 	0x00000000,
207 	(0x9e00 << 16) | (0xcd20 >> 2),
208 	0x00000000,
209 	(0xae00 << 16) | (0xcd20 >> 2),
210 	0x00000000,
211 	(0xbe00 << 16) | (0xcd20 >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0x89bc >> 2),
214 	0x00000000,
215 	(0x0e00 << 16) | (0x8900 >> 2),
216 	0x00000000,
217 	0x3,
218 	(0x0e00 << 16) | (0xc130 >> 2),
219 	0x00000000,
220 	(0x0e00 << 16) | (0xc134 >> 2),
221 	0x00000000,
222 	(0x0e00 << 16) | (0xc1fc >> 2),
223 	0x00000000,
224 	(0x0e00 << 16) | (0xc208 >> 2),
225 	0x00000000,
226 	(0x0e00 << 16) | (0xc264 >> 2),
227 	0x00000000,
228 	(0x0e00 << 16) | (0xc268 >> 2),
229 	0x00000000,
230 	(0x0e00 << 16) | (0xc26c >> 2),
231 	0x00000000,
232 	(0x0e00 << 16) | (0xc270 >> 2),
233 	0x00000000,
234 	(0x0e00 << 16) | (0xc274 >> 2),
235 	0x00000000,
236 	(0x0e00 << 16) | (0xc278 >> 2),
237 	0x00000000,
238 	(0x0e00 << 16) | (0xc27c >> 2),
239 	0x00000000,
240 	(0x0e00 << 16) | (0xc280 >> 2),
241 	0x00000000,
242 	(0x0e00 << 16) | (0xc284 >> 2),
243 	0x00000000,
244 	(0x0e00 << 16) | (0xc288 >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc28c >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0xc290 >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc294 >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc298 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc29c >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc2a0 >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc2a4 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc2a8 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc2ac  >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc2b0 >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0x301d0 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0x30238 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0x30250 >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0x30254 >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0x30258 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0x3025c >> 2),
277 	0x00000000,
278 	(0x4e00 << 16) | (0xc900 >> 2),
279 	0x00000000,
280 	(0x5e00 << 16) | (0xc900 >> 2),
281 	0x00000000,
282 	(0x6e00 << 16) | (0xc900 >> 2),
283 	0x00000000,
284 	(0x7e00 << 16) | (0xc900 >> 2),
285 	0x00000000,
286 	(0x8e00 << 16) | (0xc900 >> 2),
287 	0x00000000,
288 	(0x9e00 << 16) | (0xc900 >> 2),
289 	0x00000000,
290 	(0xae00 << 16) | (0xc900 >> 2),
291 	0x00000000,
292 	(0xbe00 << 16) | (0xc900 >> 2),
293 	0x00000000,
294 	(0x4e00 << 16) | (0xc904 >> 2),
295 	0x00000000,
296 	(0x5e00 << 16) | (0xc904 >> 2),
297 	0x00000000,
298 	(0x6e00 << 16) | (0xc904 >> 2),
299 	0x00000000,
300 	(0x7e00 << 16) | (0xc904 >> 2),
301 	0x00000000,
302 	(0x8e00 << 16) | (0xc904 >> 2),
303 	0x00000000,
304 	(0x9e00 << 16) | (0xc904 >> 2),
305 	0x00000000,
306 	(0xae00 << 16) | (0xc904 >> 2),
307 	0x00000000,
308 	(0xbe00 << 16) | (0xc904 >> 2),
309 	0x00000000,
310 	(0x4e00 << 16) | (0xc908 >> 2),
311 	0x00000000,
312 	(0x5e00 << 16) | (0xc908 >> 2),
313 	0x00000000,
314 	(0x6e00 << 16) | (0xc908 >> 2),
315 	0x00000000,
316 	(0x7e00 << 16) | (0xc908 >> 2),
317 	0x00000000,
318 	(0x8e00 << 16) | (0xc908 >> 2),
319 	0x00000000,
320 	(0x9e00 << 16) | (0xc908 >> 2),
321 	0x00000000,
322 	(0xae00 << 16) | (0xc908 >> 2),
323 	0x00000000,
324 	(0xbe00 << 16) | (0xc908 >> 2),
325 	0x00000000,
326 	(0x4e00 << 16) | (0xc90c >> 2),
327 	0x00000000,
328 	(0x5e00 << 16) | (0xc90c >> 2),
329 	0x00000000,
330 	(0x6e00 << 16) | (0xc90c >> 2),
331 	0x00000000,
332 	(0x7e00 << 16) | (0xc90c >> 2),
333 	0x00000000,
334 	(0x8e00 << 16) | (0xc90c >> 2),
335 	0x00000000,
336 	(0x9e00 << 16) | (0xc90c >> 2),
337 	0x00000000,
338 	(0xae00 << 16) | (0xc90c >> 2),
339 	0x00000000,
340 	(0xbe00 << 16) | (0xc90c >> 2),
341 	0x00000000,
342 	(0x4e00 << 16) | (0xc910 >> 2),
343 	0x00000000,
344 	(0x5e00 << 16) | (0xc910 >> 2),
345 	0x00000000,
346 	(0x6e00 << 16) | (0xc910 >> 2),
347 	0x00000000,
348 	(0x7e00 << 16) | (0xc910 >> 2),
349 	0x00000000,
350 	(0x8e00 << 16) | (0xc910 >> 2),
351 	0x00000000,
352 	(0x9e00 << 16) | (0xc910 >> 2),
353 	0x00000000,
354 	(0xae00 << 16) | (0xc910 >> 2),
355 	0x00000000,
356 	(0xbe00 << 16) | (0xc910 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc99c >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0x9834 >> 2),
361 	0x00000000,
362 	(0x0000 << 16) | (0x30f00 >> 2),
363 	0x00000000,
364 	(0x0001 << 16) | (0x30f00 >> 2),
365 	0x00000000,
366 	(0x0000 << 16) | (0x30f04 >> 2),
367 	0x00000000,
368 	(0x0001 << 16) | (0x30f04 >> 2),
369 	0x00000000,
370 	(0x0000 << 16) | (0x30f08 >> 2),
371 	0x00000000,
372 	(0x0001 << 16) | (0x30f08 >> 2),
373 	0x00000000,
374 	(0x0000 << 16) | (0x30f0c >> 2),
375 	0x00000000,
376 	(0x0001 << 16) | (0x30f0c >> 2),
377 	0x00000000,
378 	(0x0600 << 16) | (0x9b7c >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0x8a14 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0x8a18 >> 2),
383 	0x00000000,
384 	(0x0600 << 16) | (0x30a00 >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0x8bf0 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x8bcc >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x8b24 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x30a04 >> 2),
393 	0x00000000,
394 	(0x0600 << 16) | (0x30a10 >> 2),
395 	0x00000000,
396 	(0x0600 << 16) | (0x30a14 >> 2),
397 	0x00000000,
398 	(0x0600 << 16) | (0x30a18 >> 2),
399 	0x00000000,
400 	(0x0600 << 16) | (0x30a2c >> 2),
401 	0x00000000,
402 	(0x0e00 << 16) | (0xc700 >> 2),
403 	0x00000000,
404 	(0x0e00 << 16) | (0xc704 >> 2),
405 	0x00000000,
406 	(0x0e00 << 16) | (0xc708 >> 2),
407 	0x00000000,
408 	(0x0e00 << 16) | (0xc768 >> 2),
409 	0x00000000,
410 	(0x0400 << 16) | (0xc770 >> 2),
411 	0x00000000,
412 	(0x0400 << 16) | (0xc774 >> 2),
413 	0x00000000,
414 	(0x0400 << 16) | (0xc778 >> 2),
415 	0x00000000,
416 	(0x0400 << 16) | (0xc77c >> 2),
417 	0x00000000,
418 	(0x0400 << 16) | (0xc780 >> 2),
419 	0x00000000,
420 	(0x0400 << 16) | (0xc784 >> 2),
421 	0x00000000,
422 	(0x0400 << 16) | (0xc788 >> 2),
423 	0x00000000,
424 	(0x0400 << 16) | (0xc78c >> 2),
425 	0x00000000,
426 	(0x0400 << 16) | (0xc798 >> 2),
427 	0x00000000,
428 	(0x0400 << 16) | (0xc79c >> 2),
429 	0x00000000,
430 	(0x0400 << 16) | (0xc7a0 >> 2),
431 	0x00000000,
432 	(0x0400 << 16) | (0xc7a4 >> 2),
433 	0x00000000,
434 	(0x0400 << 16) | (0xc7a8 >> 2),
435 	0x00000000,
436 	(0x0400 << 16) | (0xc7ac >> 2),
437 	0x00000000,
438 	(0x0400 << 16) | (0xc7b0 >> 2),
439 	0x00000000,
440 	(0x0400 << 16) | (0xc7b4 >> 2),
441 	0x00000000,
442 	(0x0e00 << 16) | (0x9100 >> 2),
443 	0x00000000,
444 	(0x0e00 << 16) | (0x3c010 >> 2),
445 	0x00000000,
446 	(0x0e00 << 16) | (0x92a8 >> 2),
447 	0x00000000,
448 	(0x0e00 << 16) | (0x92ac >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x92b4 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x92b8 >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x92bc >> 2),
455 	0x00000000,
456 	(0x0e00 << 16) | (0x92c0 >> 2),
457 	0x00000000,
458 	(0x0e00 << 16) | (0x92c4 >> 2),
459 	0x00000000,
460 	(0x0e00 << 16) | (0x92c8 >> 2),
461 	0x00000000,
462 	(0x0e00 << 16) | (0x92cc >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0x92d0 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0x8c00 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0x8c04 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0x8c20 >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0x8c38 >> 2),
473 	0x00000000,
474 	(0x0e00 << 16) | (0x8c3c >> 2),
475 	0x00000000,
476 	(0x0e00 << 16) | (0xae00 >> 2),
477 	0x00000000,
478 	(0x0e00 << 16) | (0x9604 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0xac08 >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0xac0c >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0xac10 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0xac14 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0xac58 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0xac68 >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0xac6c >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0xac70 >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0xac74 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0xac78 >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0xac7c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0xac80 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0xac84 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0xac88 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0xac8c >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x970c >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x9714 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x9718 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0x971c >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x31068 >> 2),
519 	0x00000000,
520 	(0x4e00 << 16) | (0x31068 >> 2),
521 	0x00000000,
522 	(0x5e00 << 16) | (0x31068 >> 2),
523 	0x00000000,
524 	(0x6e00 << 16) | (0x31068 >> 2),
525 	0x00000000,
526 	(0x7e00 << 16) | (0x31068 >> 2),
527 	0x00000000,
528 	(0x8e00 << 16) | (0x31068 >> 2),
529 	0x00000000,
530 	(0x9e00 << 16) | (0x31068 >> 2),
531 	0x00000000,
532 	(0xae00 << 16) | (0x31068 >> 2),
533 	0x00000000,
534 	(0xbe00 << 16) | (0x31068 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0xcd10 >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0xcd14 >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0x88b0 >> 2),
541 	0x00000000,
542 	(0x0e00 << 16) | (0x88b4 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0x88b8 >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0x88bc >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0x89c0 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0x88c4 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0x88c8 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0x88d0 >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0x88d4 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x88d8 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x8980 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x30938 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x3093c >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x30940 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x89a0 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x30900 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x30904 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x89b4 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x3c210 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x3c214 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x3c218 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x8904 >> 2),
583 	0x00000000,
584 	0x5,
585 	(0x0e00 << 16) | (0x8c28 >> 2),
586 	(0x0e00 << 16) | (0x8c2c >> 2),
587 	(0x0e00 << 16) | (0x8c30 >> 2),
588 	(0x0e00 << 16) | (0x8c34 >> 2),
589 	(0x0e00 << 16) | (0x9600 >> 2),
590 };
591 
592 static const u32 kalindi_rlc_save_restore_register_list[] =
593 {
594 	(0x0e00 << 16) | (0xc12c >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0xc140 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xc150 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xc15c >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xc168 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xc170 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xc204 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xc2b4 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xc2b8 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xc2bc >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xc2c0 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0x8228 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x829c >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x869c >> 2),
621 	0x00000000,
622 	(0x0600 << 16) | (0x98f4 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x98f8 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x9900 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xc260 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0x90e8 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x3c000 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x3c00c >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x8c1c >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x9700 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0xcd20 >> 2),
641 	0x00000000,
642 	(0x4e00 << 16) | (0xcd20 >> 2),
643 	0x00000000,
644 	(0x5e00 << 16) | (0xcd20 >> 2),
645 	0x00000000,
646 	(0x6e00 << 16) | (0xcd20 >> 2),
647 	0x00000000,
648 	(0x7e00 << 16) | (0xcd20 >> 2),
649 	0x00000000,
650 	(0x0e00 << 16) | (0x89bc >> 2),
651 	0x00000000,
652 	(0x0e00 << 16) | (0x8900 >> 2),
653 	0x00000000,
654 	0x3,
655 	(0x0e00 << 16) | (0xc130 >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0xc134 >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0xc1fc >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0xc208 >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0xc264 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0xc268 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0xc26c >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0xc270 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0xc274 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0xc28c >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0xc290 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0xc294 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0xc298 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0xc2a0 >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0xc2a4 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0xc2a8 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0xc2ac >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0x301d0 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0x30238 >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0x30250 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0x30254 >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0x30258 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0x3025c >> 2),
700 	0x00000000,
701 	(0x4e00 << 16) | (0xc900 >> 2),
702 	0x00000000,
703 	(0x5e00 << 16) | (0xc900 >> 2),
704 	0x00000000,
705 	(0x6e00 << 16) | (0xc900 >> 2),
706 	0x00000000,
707 	(0x7e00 << 16) | (0xc900 >> 2),
708 	0x00000000,
709 	(0x4e00 << 16) | (0xc904 >> 2),
710 	0x00000000,
711 	(0x5e00 << 16) | (0xc904 >> 2),
712 	0x00000000,
713 	(0x6e00 << 16) | (0xc904 >> 2),
714 	0x00000000,
715 	(0x7e00 << 16) | (0xc904 >> 2),
716 	0x00000000,
717 	(0x4e00 << 16) | (0xc908 >> 2),
718 	0x00000000,
719 	(0x5e00 << 16) | (0xc908 >> 2),
720 	0x00000000,
721 	(0x6e00 << 16) | (0xc908 >> 2),
722 	0x00000000,
723 	(0x7e00 << 16) | (0xc908 >> 2),
724 	0x00000000,
725 	(0x4e00 << 16) | (0xc90c >> 2),
726 	0x00000000,
727 	(0x5e00 << 16) | (0xc90c >> 2),
728 	0x00000000,
729 	(0x6e00 << 16) | (0xc90c >> 2),
730 	0x00000000,
731 	(0x7e00 << 16) | (0xc90c >> 2),
732 	0x00000000,
733 	(0x4e00 << 16) | (0xc910 >> 2),
734 	0x00000000,
735 	(0x5e00 << 16) | (0xc910 >> 2),
736 	0x00000000,
737 	(0x6e00 << 16) | (0xc910 >> 2),
738 	0x00000000,
739 	(0x7e00 << 16) | (0xc910 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xc99c >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0x9834 >> 2),
744 	0x00000000,
745 	(0x0000 << 16) | (0x30f00 >> 2),
746 	0x00000000,
747 	(0x0000 << 16) | (0x30f04 >> 2),
748 	0x00000000,
749 	(0x0000 << 16) | (0x30f08 >> 2),
750 	0x00000000,
751 	(0x0000 << 16) | (0x30f0c >> 2),
752 	0x00000000,
753 	(0x0600 << 16) | (0x9b7c >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x8a14 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x8a18 >> 2),
758 	0x00000000,
759 	(0x0600 << 16) | (0x30a00 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x8bf0 >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0x8bcc >> 2),
764 	0x00000000,
765 	(0x0e00 << 16) | (0x8b24 >> 2),
766 	0x00000000,
767 	(0x0e00 << 16) | (0x30a04 >> 2),
768 	0x00000000,
769 	(0x0600 << 16) | (0x30a10 >> 2),
770 	0x00000000,
771 	(0x0600 << 16) | (0x30a14 >> 2),
772 	0x00000000,
773 	(0x0600 << 16) | (0x30a18 >> 2),
774 	0x00000000,
775 	(0x0600 << 16) | (0x30a2c >> 2),
776 	0x00000000,
777 	(0x0e00 << 16) | (0xc700 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0xc704 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0xc708 >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc768 >> 2),
784 	0x00000000,
785 	(0x0400 << 16) | (0xc770 >> 2),
786 	0x00000000,
787 	(0x0400 << 16) | (0xc774 >> 2),
788 	0x00000000,
789 	(0x0400 << 16) | (0xc798 >> 2),
790 	0x00000000,
791 	(0x0400 << 16) | (0xc79c >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0x9100 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0x3c010 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0x8c00 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0x8c04 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0x8c20 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0x8c38 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x8c3c >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xae00 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0x9604 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0xac08 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0xac0c >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0xac10 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0xac14 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0xac58 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0xac68 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0xac6c >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xac70 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0xac74 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0xac78 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0xac7c >> 2),
832 	0x00000000,
833 	(0x0e00 << 16) | (0xac80 >> 2),
834 	0x00000000,
835 	(0x0e00 << 16) | (0xac84 >> 2),
836 	0x00000000,
837 	(0x0e00 << 16) | (0xac88 >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0xac8c >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0x970c >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0x9714 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0x9718 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0x971c >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0x31068 >> 2),
850 	0x00000000,
851 	(0x4e00 << 16) | (0x31068 >> 2),
852 	0x00000000,
853 	(0x5e00 << 16) | (0x31068 >> 2),
854 	0x00000000,
855 	(0x6e00 << 16) | (0x31068 >> 2),
856 	0x00000000,
857 	(0x7e00 << 16) | (0x31068 >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0xcd10 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0xcd14 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x88b0 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x88b4 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x88b8 >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0x88bc >> 2),
870 	0x00000000,
871 	(0x0400 << 16) | (0x89c0 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x88c4 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0x88c8 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x88d0 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x88d4 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x88d8 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x8980 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x30938 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x3093c >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x30940 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x89a0 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x30900 >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0x30904 >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0x89b4 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0x3e1fc >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0x3c210 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x3c214 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x3c218 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x8904 >> 2),
908 	0x00000000,
909 	0x5,
910 	(0x0e00 << 16) | (0x8c28 >> 2),
911 	(0x0e00 << 16) | (0x8c2c >> 2),
912 	(0x0e00 << 16) | (0x8c30 >> 2),
913 	(0x0e00 << 16) | (0x8c34 >> 2),
914 	(0x0e00 << 16) | (0x9600 >> 2),
915 };
916 
917 static const u32 bonaire_golden_spm_registers[] =
918 {
919 	0x30800, 0xe0ffffff, 0xe0000000
920 };
921 
922 static const u32 bonaire_golden_common_registers[] =
923 {
924 	0xc770, 0xffffffff, 0x00000800,
925 	0xc774, 0xffffffff, 0x00000800,
926 	0xc798, 0xffffffff, 0x00007fbf,
927 	0xc79c, 0xffffffff, 0x00007faf
928 };
929 
930 static const u32 bonaire_golden_registers[] =
931 {
932 	0x3354, 0x00000333, 0x00000333,
933 	0x3350, 0x000c0fc0, 0x00040200,
934 	0x9a10, 0x00010000, 0x00058208,
935 	0x3c000, 0xffff1fff, 0x00140000,
936 	0x3c200, 0xfdfc0fff, 0x00000100,
937 	0x3c234, 0x40000000, 0x40000200,
938 	0x9830, 0xffffffff, 0x00000000,
939 	0x9834, 0xf00fffff, 0x00000400,
940 	0x9838, 0x0002021c, 0x00020200,
941 	0xc78, 0x00000080, 0x00000000,
942 	0x5bb0, 0x000000f0, 0x00000070,
943 	0x5bc0, 0xf0311fff, 0x80300000,
944 	0x98f8, 0x73773777, 0x12010001,
945 	0x350c, 0x00810000, 0x408af000,
946 	0x7030, 0x31000111, 0x00000011,
947 	0x2f48, 0x73773777, 0x12010001,
948 	0x220c, 0x00007fb6, 0x0021a1b1,
949 	0x2210, 0x00007fb6, 0x002021b1,
950 	0x2180, 0x00007fb6, 0x00002191,
951 	0x2218, 0x00007fb6, 0x002121b1,
952 	0x221c, 0x00007fb6, 0x002021b1,
953 	0x21dc, 0x00007fb6, 0x00002191,
954 	0x21e0, 0x00007fb6, 0x00002191,
955 	0x3628, 0x0000003f, 0x0000000a,
956 	0x362c, 0x0000003f, 0x0000000a,
957 	0x2ae4, 0x00073ffe, 0x000022a2,
958 	0x240c, 0x000007ff, 0x00000000,
959 	0x8a14, 0xf000003f, 0x00000007,
960 	0x8bf0, 0x00002001, 0x00000001,
961 	0x8b24, 0xffffffff, 0x00ffffff,
962 	0x30a04, 0x0000ff0f, 0x00000000,
963 	0x28a4c, 0x07ffffff, 0x06000000,
964 	0x4d8, 0x00000fff, 0x00000100,
965 	0x3e78, 0x00000001, 0x00000002,
966 	0x9100, 0x03000000, 0x0362c688,
967 	0x8c00, 0x000000ff, 0x00000001,
968 	0xe40, 0x00001fff, 0x00001fff,
969 	0x9060, 0x0000007f, 0x00000020,
970 	0x9508, 0x00010000, 0x00010000,
971 	0xac14, 0x000003ff, 0x000000f3,
972 	0xac0c, 0xffffffff, 0x00001032
973 };
974 
975 static const u32 bonaire_mgcg_cgcg_init[] =
976 {
977 	0xc420, 0xffffffff, 0xfffffffc,
978 	0x30800, 0xffffffff, 0xe0000000,
979 	0x3c2a0, 0xffffffff, 0x00000100,
980 	0x3c208, 0xffffffff, 0x00000100,
981 	0x3c2c0, 0xffffffff, 0xc0000100,
982 	0x3c2c8, 0xffffffff, 0xc0000100,
983 	0x3c2c4, 0xffffffff, 0xc0000100,
984 	0x55e4, 0xffffffff, 0x00600100,
985 	0x3c280, 0xffffffff, 0x00000100,
986 	0x3c214, 0xffffffff, 0x06000100,
987 	0x3c220, 0xffffffff, 0x00000100,
988 	0x3c218, 0xffffffff, 0x06000100,
989 	0x3c204, 0xffffffff, 0x00000100,
990 	0x3c2e0, 0xffffffff, 0x00000100,
991 	0x3c224, 0xffffffff, 0x00000100,
992 	0x3c200, 0xffffffff, 0x00000100,
993 	0x3c230, 0xffffffff, 0x00000100,
994 	0x3c234, 0xffffffff, 0x00000100,
995 	0x3c250, 0xffffffff, 0x00000100,
996 	0x3c254, 0xffffffff, 0x00000100,
997 	0x3c258, 0xffffffff, 0x00000100,
998 	0x3c25c, 0xffffffff, 0x00000100,
999 	0x3c260, 0xffffffff, 0x00000100,
1000 	0x3c27c, 0xffffffff, 0x00000100,
1001 	0x3c278, 0xffffffff, 0x00000100,
1002 	0x3c210, 0xffffffff, 0x06000100,
1003 	0x3c290, 0xffffffff, 0x00000100,
1004 	0x3c274, 0xffffffff, 0x00000100,
1005 	0x3c2b4, 0xffffffff, 0x00000100,
1006 	0x3c2b0, 0xffffffff, 0x00000100,
1007 	0x3c270, 0xffffffff, 0x00000100,
1008 	0x30800, 0xffffffff, 0xe0000000,
1009 	0x3c020, 0xffffffff, 0x00010000,
1010 	0x3c024, 0xffffffff, 0x00030002,
1011 	0x3c028, 0xffffffff, 0x00040007,
1012 	0x3c02c, 0xffffffff, 0x00060005,
1013 	0x3c030, 0xffffffff, 0x00090008,
1014 	0x3c034, 0xffffffff, 0x00010000,
1015 	0x3c038, 0xffffffff, 0x00030002,
1016 	0x3c03c, 0xffffffff, 0x00040007,
1017 	0x3c040, 0xffffffff, 0x00060005,
1018 	0x3c044, 0xffffffff, 0x00090008,
1019 	0x3c048, 0xffffffff, 0x00010000,
1020 	0x3c04c, 0xffffffff, 0x00030002,
1021 	0x3c050, 0xffffffff, 0x00040007,
1022 	0x3c054, 0xffffffff, 0x00060005,
1023 	0x3c058, 0xffffffff, 0x00090008,
1024 	0x3c05c, 0xffffffff, 0x00010000,
1025 	0x3c060, 0xffffffff, 0x00030002,
1026 	0x3c064, 0xffffffff, 0x00040007,
1027 	0x3c068, 0xffffffff, 0x00060005,
1028 	0x3c06c, 0xffffffff, 0x00090008,
1029 	0x3c070, 0xffffffff, 0x00010000,
1030 	0x3c074, 0xffffffff, 0x00030002,
1031 	0x3c078, 0xffffffff, 0x00040007,
1032 	0x3c07c, 0xffffffff, 0x00060005,
1033 	0x3c080, 0xffffffff, 0x00090008,
1034 	0x3c084, 0xffffffff, 0x00010000,
1035 	0x3c088, 0xffffffff, 0x00030002,
1036 	0x3c08c, 0xffffffff, 0x00040007,
1037 	0x3c090, 0xffffffff, 0x00060005,
1038 	0x3c094, 0xffffffff, 0x00090008,
1039 	0x3c098, 0xffffffff, 0x00010000,
1040 	0x3c09c, 0xffffffff, 0x00030002,
1041 	0x3c0a0, 0xffffffff, 0x00040007,
1042 	0x3c0a4, 0xffffffff, 0x00060005,
1043 	0x3c0a8, 0xffffffff, 0x00090008,
1044 	0x3c000, 0xffffffff, 0x96e00200,
1045 	0x8708, 0xffffffff, 0x00900100,
1046 	0xc424, 0xffffffff, 0x0020003f,
1047 	0x38, 0xffffffff, 0x0140001c,
1048 	0x3c, 0x000f0000, 0x000f0000,
1049 	0x220, 0xffffffff, 0xC060000C,
1050 	0x224, 0xc0000fff, 0x00000100,
1051 	0xf90, 0xffffffff, 0x00000100,
1052 	0xf98, 0x00000101, 0x00000000,
1053 	0x20a8, 0xffffffff, 0x00000104,
1054 	0x55e4, 0xff000fff, 0x00000100,
1055 	0x30cc, 0xc0000fff, 0x00000104,
1056 	0xc1e4, 0x00000001, 0x00000001,
1057 	0xd00c, 0xff000ff0, 0x00000100,
1058 	0xd80c, 0xff000ff0, 0x00000100
1059 };
1060 
1061 static const u32 spectre_golden_spm_registers[] =
1062 {
1063 	0x30800, 0xe0ffffff, 0xe0000000
1064 };
1065 
1066 static const u32 spectre_golden_common_registers[] =
1067 {
1068 	0xc770, 0xffffffff, 0x00000800,
1069 	0xc774, 0xffffffff, 0x00000800,
1070 	0xc798, 0xffffffff, 0x00007fbf,
1071 	0xc79c, 0xffffffff, 0x00007faf
1072 };
1073 
1074 static const u32 spectre_golden_registers[] =
1075 {
1076 	0x3c000, 0xffff1fff, 0x96940200,
1077 	0x3c00c, 0xffff0001, 0xff000000,
1078 	0x3c200, 0xfffc0fff, 0x00000100,
1079 	0x6ed8, 0x00010101, 0x00010000,
1080 	0x9834, 0xf00fffff, 0x00000400,
1081 	0x9838, 0xfffffffc, 0x00020200,
1082 	0x5bb0, 0x000000f0, 0x00000070,
1083 	0x5bc0, 0xf0311fff, 0x80300000,
1084 	0x98f8, 0x73773777, 0x12010001,
1085 	0x9b7c, 0x00ff0000, 0x00fc0000,
1086 	0x2f48, 0x73773777, 0x12010001,
1087 	0x8a14, 0xf000003f, 0x00000007,
1088 	0x8b24, 0xffffffff, 0x00ffffff,
1089 	0x28350, 0x3f3f3fff, 0x00000082,
1090 	0x28354, 0x0000003f, 0x00000000,
1091 	0x3e78, 0x00000001, 0x00000002,
1092 	0x913c, 0xffff03df, 0x00000004,
1093 	0xc768, 0x00000008, 0x00000008,
1094 	0x8c00, 0x000008ff, 0x00000800,
1095 	0x9508, 0x00010000, 0x00010000,
1096 	0xac0c, 0xffffffff, 0x54763210,
1097 	0x214f8, 0x01ff01ff, 0x00000002,
1098 	0x21498, 0x007ff800, 0x00200000,
1099 	0x2015c, 0xffffffff, 0x00000f40,
1100 	0x30934, 0xffffffff, 0x00000001
1101 };
1102 
1103 static const u32 spectre_mgcg_cgcg_init[] =
1104 {
1105 	0xc420, 0xffffffff, 0xfffffffc,
1106 	0x30800, 0xffffffff, 0xe0000000,
1107 	0x3c2a0, 0xffffffff, 0x00000100,
1108 	0x3c208, 0xffffffff, 0x00000100,
1109 	0x3c2c0, 0xffffffff, 0x00000100,
1110 	0x3c2c8, 0xffffffff, 0x00000100,
1111 	0x3c2c4, 0xffffffff, 0x00000100,
1112 	0x55e4, 0xffffffff, 0x00600100,
1113 	0x3c280, 0xffffffff, 0x00000100,
1114 	0x3c214, 0xffffffff, 0x06000100,
1115 	0x3c220, 0xffffffff, 0x00000100,
1116 	0x3c218, 0xffffffff, 0x06000100,
1117 	0x3c204, 0xffffffff, 0x00000100,
1118 	0x3c2e0, 0xffffffff, 0x00000100,
1119 	0x3c224, 0xffffffff, 0x00000100,
1120 	0x3c200, 0xffffffff, 0x00000100,
1121 	0x3c230, 0xffffffff, 0x00000100,
1122 	0x3c234, 0xffffffff, 0x00000100,
1123 	0x3c250, 0xffffffff, 0x00000100,
1124 	0x3c254, 0xffffffff, 0x00000100,
1125 	0x3c258, 0xffffffff, 0x00000100,
1126 	0x3c25c, 0xffffffff, 0x00000100,
1127 	0x3c260, 0xffffffff, 0x00000100,
1128 	0x3c27c, 0xffffffff, 0x00000100,
1129 	0x3c278, 0xffffffff, 0x00000100,
1130 	0x3c210, 0xffffffff, 0x06000100,
1131 	0x3c290, 0xffffffff, 0x00000100,
1132 	0x3c274, 0xffffffff, 0x00000100,
1133 	0x3c2b4, 0xffffffff, 0x00000100,
1134 	0x3c2b0, 0xffffffff, 0x00000100,
1135 	0x3c270, 0xffffffff, 0x00000100,
1136 	0x30800, 0xffffffff, 0xe0000000,
1137 	0x3c020, 0xffffffff, 0x00010000,
1138 	0x3c024, 0xffffffff, 0x00030002,
1139 	0x3c028, 0xffffffff, 0x00040007,
1140 	0x3c02c, 0xffffffff, 0x00060005,
1141 	0x3c030, 0xffffffff, 0x00090008,
1142 	0x3c034, 0xffffffff, 0x00010000,
1143 	0x3c038, 0xffffffff, 0x00030002,
1144 	0x3c03c, 0xffffffff, 0x00040007,
1145 	0x3c040, 0xffffffff, 0x00060005,
1146 	0x3c044, 0xffffffff, 0x00090008,
1147 	0x3c048, 0xffffffff, 0x00010000,
1148 	0x3c04c, 0xffffffff, 0x00030002,
1149 	0x3c050, 0xffffffff, 0x00040007,
1150 	0x3c054, 0xffffffff, 0x00060005,
1151 	0x3c058, 0xffffffff, 0x00090008,
1152 	0x3c05c, 0xffffffff, 0x00010000,
1153 	0x3c060, 0xffffffff, 0x00030002,
1154 	0x3c064, 0xffffffff, 0x00040007,
1155 	0x3c068, 0xffffffff, 0x00060005,
1156 	0x3c06c, 0xffffffff, 0x00090008,
1157 	0x3c070, 0xffffffff, 0x00010000,
1158 	0x3c074, 0xffffffff, 0x00030002,
1159 	0x3c078, 0xffffffff, 0x00040007,
1160 	0x3c07c, 0xffffffff, 0x00060005,
1161 	0x3c080, 0xffffffff, 0x00090008,
1162 	0x3c084, 0xffffffff, 0x00010000,
1163 	0x3c088, 0xffffffff, 0x00030002,
1164 	0x3c08c, 0xffffffff, 0x00040007,
1165 	0x3c090, 0xffffffff, 0x00060005,
1166 	0x3c094, 0xffffffff, 0x00090008,
1167 	0x3c098, 0xffffffff, 0x00010000,
1168 	0x3c09c, 0xffffffff, 0x00030002,
1169 	0x3c0a0, 0xffffffff, 0x00040007,
1170 	0x3c0a4, 0xffffffff, 0x00060005,
1171 	0x3c0a8, 0xffffffff, 0x00090008,
1172 	0x3c0ac, 0xffffffff, 0x00010000,
1173 	0x3c0b0, 0xffffffff, 0x00030002,
1174 	0x3c0b4, 0xffffffff, 0x00040007,
1175 	0x3c0b8, 0xffffffff, 0x00060005,
1176 	0x3c0bc, 0xffffffff, 0x00090008,
1177 	0x3c000, 0xffffffff, 0x96e00200,
1178 	0x8708, 0xffffffff, 0x00900100,
1179 	0xc424, 0xffffffff, 0x0020003f,
1180 	0x38, 0xffffffff, 0x0140001c,
1181 	0x3c, 0x000f0000, 0x000f0000,
1182 	0x220, 0xffffffff, 0xC060000C,
1183 	0x224, 0xc0000fff, 0x00000100,
1184 	0xf90, 0xffffffff, 0x00000100,
1185 	0xf98, 0x00000101, 0x00000000,
1186 	0x20a8, 0xffffffff, 0x00000104,
1187 	0x55e4, 0xff000fff, 0x00000100,
1188 	0x30cc, 0xc0000fff, 0x00000104,
1189 	0xc1e4, 0x00000001, 0x00000001,
1190 	0xd00c, 0xff000ff0, 0x00000100,
1191 	0xd80c, 0xff000ff0, 0x00000100
1192 };
1193 
1194 static const u32 kalindi_golden_spm_registers[] =
1195 {
1196 	0x30800, 0xe0ffffff, 0xe0000000
1197 };
1198 
1199 static const u32 kalindi_golden_common_registers[] =
1200 {
1201 	0xc770, 0xffffffff, 0x00000800,
1202 	0xc774, 0xffffffff, 0x00000800,
1203 	0xc798, 0xffffffff, 0x00007fbf,
1204 	0xc79c, 0xffffffff, 0x00007faf
1205 };
1206 
1207 static const u32 kalindi_golden_registers[] =
1208 {
1209 	0x3c000, 0xffffdfff, 0x6e944040,
1210 	0x55e4, 0xff607fff, 0xfc000100,
1211 	0x3c220, 0xff000fff, 0x00000100,
1212 	0x3c224, 0xff000fff, 0x00000100,
1213 	0x3c200, 0xfffc0fff, 0x00000100,
1214 	0x6ed8, 0x00010101, 0x00010000,
1215 	0x9830, 0xffffffff, 0x00000000,
1216 	0x9834, 0xf00fffff, 0x00000400,
1217 	0x5bb0, 0x000000f0, 0x00000070,
1218 	0x5bc0, 0xf0311fff, 0x80300000,
1219 	0x98f8, 0x73773777, 0x12010001,
1220 	0x98fc, 0xffffffff, 0x00000010,
1221 	0x9b7c, 0x00ff0000, 0x00fc0000,
1222 	0x8030, 0x00001f0f, 0x0000100a,
1223 	0x2f48, 0x73773777, 0x12010001,
1224 	0x2408, 0x000fffff, 0x000c007f,
1225 	0x8a14, 0xf000003f, 0x00000007,
1226 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1227 	0x30a04, 0x0000ff0f, 0x00000000,
1228 	0x28a4c, 0x07ffffff, 0x06000000,
1229 	0x4d8, 0x00000fff, 0x00000100,
1230 	0x3e78, 0x00000001, 0x00000002,
1231 	0xc768, 0x00000008, 0x00000008,
1232 	0x8c00, 0x000000ff, 0x00000003,
1233 	0x214f8, 0x01ff01ff, 0x00000002,
1234 	0x21498, 0x007ff800, 0x00200000,
1235 	0x2015c, 0xffffffff, 0x00000f40,
1236 	0x88c4, 0x001f3ae3, 0x00000082,
1237 	0x88d4, 0x0000001f, 0x00000010,
1238 	0x30934, 0xffffffff, 0x00000000
1239 };
1240 
1241 static const u32 kalindi_mgcg_cgcg_init[] =
1242 {
1243 	0xc420, 0xffffffff, 0xfffffffc,
1244 	0x30800, 0xffffffff, 0xe0000000,
1245 	0x3c2a0, 0xffffffff, 0x00000100,
1246 	0x3c208, 0xffffffff, 0x00000100,
1247 	0x3c2c0, 0xffffffff, 0x00000100,
1248 	0x3c2c8, 0xffffffff, 0x00000100,
1249 	0x3c2c4, 0xffffffff, 0x00000100,
1250 	0x55e4, 0xffffffff, 0x00600100,
1251 	0x3c280, 0xffffffff, 0x00000100,
1252 	0x3c214, 0xffffffff, 0x06000100,
1253 	0x3c220, 0xffffffff, 0x00000100,
1254 	0x3c218, 0xffffffff, 0x06000100,
1255 	0x3c204, 0xffffffff, 0x00000100,
1256 	0x3c2e0, 0xffffffff, 0x00000100,
1257 	0x3c224, 0xffffffff, 0x00000100,
1258 	0x3c200, 0xffffffff, 0x00000100,
1259 	0x3c230, 0xffffffff, 0x00000100,
1260 	0x3c234, 0xffffffff, 0x00000100,
1261 	0x3c250, 0xffffffff, 0x00000100,
1262 	0x3c254, 0xffffffff, 0x00000100,
1263 	0x3c258, 0xffffffff, 0x00000100,
1264 	0x3c25c, 0xffffffff, 0x00000100,
1265 	0x3c260, 0xffffffff, 0x00000100,
1266 	0x3c27c, 0xffffffff, 0x00000100,
1267 	0x3c278, 0xffffffff, 0x00000100,
1268 	0x3c210, 0xffffffff, 0x06000100,
1269 	0x3c290, 0xffffffff, 0x00000100,
1270 	0x3c274, 0xffffffff, 0x00000100,
1271 	0x3c2b4, 0xffffffff, 0x00000100,
1272 	0x3c2b0, 0xffffffff, 0x00000100,
1273 	0x3c270, 0xffffffff, 0x00000100,
1274 	0x30800, 0xffffffff, 0xe0000000,
1275 	0x3c020, 0xffffffff, 0x00010000,
1276 	0x3c024, 0xffffffff, 0x00030002,
1277 	0x3c028, 0xffffffff, 0x00040007,
1278 	0x3c02c, 0xffffffff, 0x00060005,
1279 	0x3c030, 0xffffffff, 0x00090008,
1280 	0x3c034, 0xffffffff, 0x00010000,
1281 	0x3c038, 0xffffffff, 0x00030002,
1282 	0x3c03c, 0xffffffff, 0x00040007,
1283 	0x3c040, 0xffffffff, 0x00060005,
1284 	0x3c044, 0xffffffff, 0x00090008,
1285 	0x3c000, 0xffffffff, 0x96e00200,
1286 	0x8708, 0xffffffff, 0x00900100,
1287 	0xc424, 0xffffffff, 0x0020003f,
1288 	0x38, 0xffffffff, 0x0140001c,
1289 	0x3c, 0x000f0000, 0x000f0000,
1290 	0x220, 0xffffffff, 0xC060000C,
1291 	0x224, 0xc0000fff, 0x00000100,
1292 	0x20a8, 0xffffffff, 0x00000104,
1293 	0x55e4, 0xff000fff, 0x00000100,
1294 	0x30cc, 0xc0000fff, 0x00000104,
1295 	0xc1e4, 0x00000001, 0x00000001,
1296 	0xd00c, 0xff000ff0, 0x00000100,
1297 	0xd80c, 0xff000ff0, 0x00000100
1298 };
1299 
1300 static const u32 hawaii_golden_spm_registers[] =
1301 {
1302 	0x30800, 0xe0ffffff, 0xe0000000
1303 };
1304 
1305 static const u32 hawaii_golden_common_registers[] =
1306 {
1307 	0x30800, 0xffffffff, 0xe0000000,
1308 	0x28350, 0xffffffff, 0x3a00161a,
1309 	0x28354, 0xffffffff, 0x0000002e,
1310 	0x9a10, 0xffffffff, 0x00018208,
1311 	0x98f8, 0xffffffff, 0x12011003
1312 };
1313 
1314 static const u32 hawaii_golden_registers[] =
1315 {
1316 	0x3354, 0x00000333, 0x00000333,
1317 	0x9a10, 0x00010000, 0x00058208,
1318 	0x9830, 0xffffffff, 0x00000000,
1319 	0x9834, 0xf00fffff, 0x00000400,
1320 	0x9838, 0x0002021c, 0x00020200,
1321 	0xc78, 0x00000080, 0x00000000,
1322 	0x5bb0, 0x000000f0, 0x00000070,
1323 	0x5bc0, 0xf0311fff, 0x80300000,
1324 	0x350c, 0x00810000, 0x408af000,
1325 	0x7030, 0x31000111, 0x00000011,
1326 	0x2f48, 0x73773777, 0x12010001,
1327 	0x2120, 0x0000007f, 0x0000001b,
1328 	0x21dc, 0x00007fb6, 0x00002191,
1329 	0x3628, 0x0000003f, 0x0000000a,
1330 	0x362c, 0x0000003f, 0x0000000a,
1331 	0x2ae4, 0x00073ffe, 0x000022a2,
1332 	0x240c, 0x000007ff, 0x00000000,
1333 	0x8bf0, 0x00002001, 0x00000001,
1334 	0x8b24, 0xffffffff, 0x00ffffff,
1335 	0x30a04, 0x0000ff0f, 0x00000000,
1336 	0x28a4c, 0x07ffffff, 0x06000000,
1337 	0x3e78, 0x00000001, 0x00000002,
1338 	0xc768, 0x00000008, 0x00000008,
1339 	0xc770, 0x00000f00, 0x00000800,
1340 	0xc774, 0x00000f00, 0x00000800,
1341 	0xc798, 0x00ffffff, 0x00ff7fbf,
1342 	0xc79c, 0x00ffffff, 0x00ff7faf,
1343 	0x8c00, 0x000000ff, 0x00000800,
1344 	0xe40, 0x00001fff, 0x00001fff,
1345 	0x9060, 0x0000007f, 0x00000020,
1346 	0x9508, 0x00010000, 0x00010000,
1347 	0xae00, 0x00100000, 0x000ff07c,
1348 	0xac14, 0x000003ff, 0x0000000f,
1349 	0xac10, 0xffffffff, 0x7564fdec,
1350 	0xac0c, 0xffffffff, 0x3120b9a8,
1351 	0xac08, 0x20000000, 0x0f9c0000
1352 };
1353 
1354 static const u32 hawaii_mgcg_cgcg_init[] =
1355 {
1356 	0xc420, 0xffffffff, 0xfffffffd,
1357 	0x30800, 0xffffffff, 0xe0000000,
1358 	0x3c2a0, 0xffffffff, 0x00000100,
1359 	0x3c208, 0xffffffff, 0x00000100,
1360 	0x3c2c0, 0xffffffff, 0x00000100,
1361 	0x3c2c8, 0xffffffff, 0x00000100,
1362 	0x3c2c4, 0xffffffff, 0x00000100,
1363 	0x55e4, 0xffffffff, 0x00200100,
1364 	0x3c280, 0xffffffff, 0x00000100,
1365 	0x3c214, 0xffffffff, 0x06000100,
1366 	0x3c220, 0xffffffff, 0x00000100,
1367 	0x3c218, 0xffffffff, 0x06000100,
1368 	0x3c204, 0xffffffff, 0x00000100,
1369 	0x3c2e0, 0xffffffff, 0x00000100,
1370 	0x3c224, 0xffffffff, 0x00000100,
1371 	0x3c200, 0xffffffff, 0x00000100,
1372 	0x3c230, 0xffffffff, 0x00000100,
1373 	0x3c234, 0xffffffff, 0x00000100,
1374 	0x3c250, 0xffffffff, 0x00000100,
1375 	0x3c254, 0xffffffff, 0x00000100,
1376 	0x3c258, 0xffffffff, 0x00000100,
1377 	0x3c25c, 0xffffffff, 0x00000100,
1378 	0x3c260, 0xffffffff, 0x00000100,
1379 	0x3c27c, 0xffffffff, 0x00000100,
1380 	0x3c278, 0xffffffff, 0x00000100,
1381 	0x3c210, 0xffffffff, 0x06000100,
1382 	0x3c290, 0xffffffff, 0x00000100,
1383 	0x3c274, 0xffffffff, 0x00000100,
1384 	0x3c2b4, 0xffffffff, 0x00000100,
1385 	0x3c2b0, 0xffffffff, 0x00000100,
1386 	0x3c270, 0xffffffff, 0x00000100,
1387 	0x30800, 0xffffffff, 0xe0000000,
1388 	0x3c020, 0xffffffff, 0x00010000,
1389 	0x3c024, 0xffffffff, 0x00030002,
1390 	0x3c028, 0xffffffff, 0x00040007,
1391 	0x3c02c, 0xffffffff, 0x00060005,
1392 	0x3c030, 0xffffffff, 0x00090008,
1393 	0x3c034, 0xffffffff, 0x00010000,
1394 	0x3c038, 0xffffffff, 0x00030002,
1395 	0x3c03c, 0xffffffff, 0x00040007,
1396 	0x3c040, 0xffffffff, 0x00060005,
1397 	0x3c044, 0xffffffff, 0x00090008,
1398 	0x3c048, 0xffffffff, 0x00010000,
1399 	0x3c04c, 0xffffffff, 0x00030002,
1400 	0x3c050, 0xffffffff, 0x00040007,
1401 	0x3c054, 0xffffffff, 0x00060005,
1402 	0x3c058, 0xffffffff, 0x00090008,
1403 	0x3c05c, 0xffffffff, 0x00010000,
1404 	0x3c060, 0xffffffff, 0x00030002,
1405 	0x3c064, 0xffffffff, 0x00040007,
1406 	0x3c068, 0xffffffff, 0x00060005,
1407 	0x3c06c, 0xffffffff, 0x00090008,
1408 	0x3c070, 0xffffffff, 0x00010000,
1409 	0x3c074, 0xffffffff, 0x00030002,
1410 	0x3c078, 0xffffffff, 0x00040007,
1411 	0x3c07c, 0xffffffff, 0x00060005,
1412 	0x3c080, 0xffffffff, 0x00090008,
1413 	0x3c084, 0xffffffff, 0x00010000,
1414 	0x3c088, 0xffffffff, 0x00030002,
1415 	0x3c08c, 0xffffffff, 0x00040007,
1416 	0x3c090, 0xffffffff, 0x00060005,
1417 	0x3c094, 0xffffffff, 0x00090008,
1418 	0x3c098, 0xffffffff, 0x00010000,
1419 	0x3c09c, 0xffffffff, 0x00030002,
1420 	0x3c0a0, 0xffffffff, 0x00040007,
1421 	0x3c0a4, 0xffffffff, 0x00060005,
1422 	0x3c0a8, 0xffffffff, 0x00090008,
1423 	0x3c0ac, 0xffffffff, 0x00010000,
1424 	0x3c0b0, 0xffffffff, 0x00030002,
1425 	0x3c0b4, 0xffffffff, 0x00040007,
1426 	0x3c0b8, 0xffffffff, 0x00060005,
1427 	0x3c0bc, 0xffffffff, 0x00090008,
1428 	0x3c0c0, 0xffffffff, 0x00010000,
1429 	0x3c0c4, 0xffffffff, 0x00030002,
1430 	0x3c0c8, 0xffffffff, 0x00040007,
1431 	0x3c0cc, 0xffffffff, 0x00060005,
1432 	0x3c0d0, 0xffffffff, 0x00090008,
1433 	0x3c0d4, 0xffffffff, 0x00010000,
1434 	0x3c0d8, 0xffffffff, 0x00030002,
1435 	0x3c0dc, 0xffffffff, 0x00040007,
1436 	0x3c0e0, 0xffffffff, 0x00060005,
1437 	0x3c0e4, 0xffffffff, 0x00090008,
1438 	0x3c0e8, 0xffffffff, 0x00010000,
1439 	0x3c0ec, 0xffffffff, 0x00030002,
1440 	0x3c0f0, 0xffffffff, 0x00040007,
1441 	0x3c0f4, 0xffffffff, 0x00060005,
1442 	0x3c0f8, 0xffffffff, 0x00090008,
1443 	0xc318, 0xffffffff, 0x00020200,
1444 	0x3350, 0xffffffff, 0x00000200,
1445 	0x15c0, 0xffffffff, 0x00000400,
1446 	0x55e8, 0xffffffff, 0x00000000,
1447 	0x2f50, 0xffffffff, 0x00000902,
1448 	0x3c000, 0xffffffff, 0x96940200,
1449 	0x8708, 0xffffffff, 0x00900100,
1450 	0xc424, 0xffffffff, 0x0020003f,
1451 	0x38, 0xffffffff, 0x0140001c,
1452 	0x3c, 0x000f0000, 0x000f0000,
1453 	0x220, 0xffffffff, 0xc060000c,
1454 	0x224, 0xc0000fff, 0x00000100,
1455 	0xf90, 0xffffffff, 0x00000100,
1456 	0xf98, 0x00000101, 0x00000000,
1457 	0x20a8, 0xffffffff, 0x00000104,
1458 	0x55e4, 0xff000fff, 0x00000100,
1459 	0x30cc, 0xc0000fff, 0x00000104,
1460 	0xc1e4, 0x00000001, 0x00000001,
1461 	0xd00c, 0xff000ff0, 0x00000100,
1462 	0xd80c, 0xff000ff0, 0x00000100
1463 };
1464 
1465 static const u32 godavari_golden_registers[] =
1466 {
1467 	0x55e4, 0xff607fff, 0xfc000100,
1468 	0x6ed8, 0x00010101, 0x00010000,
1469 	0x9830, 0xffffffff, 0x00000000,
1470 	0x98302, 0xf00fffff, 0x00000400,
1471 	0x6130, 0xffffffff, 0x00010000,
1472 	0x5bb0, 0x000000f0, 0x00000070,
1473 	0x5bc0, 0xf0311fff, 0x80300000,
1474 	0x98f8, 0x73773777, 0x12010001,
1475 	0x98fc, 0xffffffff, 0x00000010,
1476 	0x8030, 0x00001f0f, 0x0000100a,
1477 	0x2f48, 0x73773777, 0x12010001,
1478 	0x2408, 0x000fffff, 0x000c007f,
1479 	0x8a14, 0xf000003f, 0x00000007,
1480 	0x8b24, 0xffffffff, 0x00ff0fff,
1481 	0x30a04, 0x0000ff0f, 0x00000000,
1482 	0x28a4c, 0x07ffffff, 0x06000000,
1483 	0x4d8, 0x00000fff, 0x00000100,
1484 	0xd014, 0x00010000, 0x00810001,
1485 	0xd814, 0x00010000, 0x00810001,
1486 	0x3e78, 0x00000001, 0x00000002,
1487 	0xc768, 0x00000008, 0x00000008,
1488 	0xc770, 0x00000f00, 0x00000800,
1489 	0xc774, 0x00000f00, 0x00000800,
1490 	0xc798, 0x00ffffff, 0x00ff7fbf,
1491 	0xc79c, 0x00ffffff, 0x00ff7faf,
1492 	0x8c00, 0x000000ff, 0x00000001,
1493 	0x214f8, 0x01ff01ff, 0x00000002,
1494 	0x21498, 0x007ff800, 0x00200000,
1495 	0x2015c, 0xffffffff, 0x00000f40,
1496 	0x88c4, 0x001f3ae3, 0x00000082,
1497 	0x88d4, 0x0000001f, 0x00000010,
1498 	0x30934, 0xffffffff, 0x00000000
1499 };
1500 
1501 
1502 static void cik_init_golden_registers(struct radeon_device *rdev)
1503 {
1504 	switch (rdev->family) {
1505 	case CHIP_BONAIRE:
1506 		radeon_program_register_sequence(rdev,
1507 						 bonaire_mgcg_cgcg_init,
1508 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1509 		radeon_program_register_sequence(rdev,
1510 						 bonaire_golden_registers,
1511 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1512 		radeon_program_register_sequence(rdev,
1513 						 bonaire_golden_common_registers,
1514 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1515 		radeon_program_register_sequence(rdev,
1516 						 bonaire_golden_spm_registers,
1517 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1518 		break;
1519 	case CHIP_KABINI:
1520 		radeon_program_register_sequence(rdev,
1521 						 kalindi_mgcg_cgcg_init,
1522 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1523 		radeon_program_register_sequence(rdev,
1524 						 kalindi_golden_registers,
1525 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1526 		radeon_program_register_sequence(rdev,
1527 						 kalindi_golden_common_registers,
1528 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1529 		radeon_program_register_sequence(rdev,
1530 						 kalindi_golden_spm_registers,
1531 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1532 		break;
1533 	case CHIP_MULLINS:
1534 		radeon_program_register_sequence(rdev,
1535 						 kalindi_mgcg_cgcg_init,
1536 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1537 		radeon_program_register_sequence(rdev,
1538 						 godavari_golden_registers,
1539 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1540 		radeon_program_register_sequence(rdev,
1541 						 kalindi_golden_common_registers,
1542 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1543 		radeon_program_register_sequence(rdev,
1544 						 kalindi_golden_spm_registers,
1545 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1546 		break;
1547 	case CHIP_KAVERI:
1548 		radeon_program_register_sequence(rdev,
1549 						 spectre_mgcg_cgcg_init,
1550 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1551 		radeon_program_register_sequence(rdev,
1552 						 spectre_golden_registers,
1553 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1554 		radeon_program_register_sequence(rdev,
1555 						 spectre_golden_common_registers,
1556 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1557 		radeon_program_register_sequence(rdev,
1558 						 spectre_golden_spm_registers,
1559 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1560 		break;
1561 	case CHIP_HAWAII:
1562 		radeon_program_register_sequence(rdev,
1563 						 hawaii_mgcg_cgcg_init,
1564 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1565 		radeon_program_register_sequence(rdev,
1566 						 hawaii_golden_registers,
1567 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1568 		radeon_program_register_sequence(rdev,
1569 						 hawaii_golden_common_registers,
1570 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1571 		radeon_program_register_sequence(rdev,
1572 						 hawaii_golden_spm_registers,
1573 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1574 		break;
1575 	default:
1576 		break;
1577 	}
1578 }
1579 
1580 /**
1581  * cik_get_xclk - get the xclk
1582  *
1583  * @rdev: radeon_device pointer
1584  *
1585  * Returns the reference clock used by the gfx engine
1586  * (CIK).
1587  */
1588 u32 cik_get_xclk(struct radeon_device *rdev)
1589 {
1590         u32 reference_clock = rdev->clock.spll.reference_freq;
1591 
1592 	if (rdev->flags & RADEON_IS_IGP) {
1593 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1594 			return reference_clock / 2;
1595 	} else {
1596 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1597 			return reference_clock / 4;
1598 	}
1599 	return reference_clock;
1600 }
1601 
1602 /**
1603  * cik_mm_rdoorbell - read a doorbell dword
1604  *
1605  * @rdev: radeon_device pointer
1606  * @index: doorbell index
1607  *
1608  * Returns the value in the doorbell aperture at the
1609  * requested doorbell index (CIK).
1610  */
1611 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1612 {
1613 	if (index < rdev->doorbell.num_doorbells) {
1614 		return readl(rdev->doorbell.ptr + index);
1615 	} else {
1616 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1617 		return 0;
1618 	}
1619 }
1620 
1621 /**
1622  * cik_mm_wdoorbell - write a doorbell dword
1623  *
1624  * @rdev: radeon_device pointer
1625  * @index: doorbell index
1626  * @v: value to write
1627  *
1628  * Writes @v to the doorbell aperture at the
1629  * requested doorbell index (CIK).
1630  */
1631 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1632 {
1633 	if (index < rdev->doorbell.num_doorbells) {
1634 		writel(v, rdev->doorbell.ptr + index);
1635 	} else {
1636 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1637 	}
1638 }
1639 
1640 #define BONAIRE_IO_MC_REGS_SIZE 36
1641 
1642 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1643 {
1644 	{0x00000070, 0x04400000},
1645 	{0x00000071, 0x80c01803},
1646 	{0x00000072, 0x00004004},
1647 	{0x00000073, 0x00000100},
1648 	{0x00000074, 0x00ff0000},
1649 	{0x00000075, 0x34000000},
1650 	{0x00000076, 0x08000014},
1651 	{0x00000077, 0x00cc08ec},
1652 	{0x00000078, 0x00000400},
1653 	{0x00000079, 0x00000000},
1654 	{0x0000007a, 0x04090000},
1655 	{0x0000007c, 0x00000000},
1656 	{0x0000007e, 0x4408a8e8},
1657 	{0x0000007f, 0x00000304},
1658 	{0x00000080, 0x00000000},
1659 	{0x00000082, 0x00000001},
1660 	{0x00000083, 0x00000002},
1661 	{0x00000084, 0xf3e4f400},
1662 	{0x00000085, 0x052024e3},
1663 	{0x00000087, 0x00000000},
1664 	{0x00000088, 0x01000000},
1665 	{0x0000008a, 0x1c0a0000},
1666 	{0x0000008b, 0xff010000},
1667 	{0x0000008d, 0xffffefff},
1668 	{0x0000008e, 0xfff3efff},
1669 	{0x0000008f, 0xfff3efbf},
1670 	{0x00000092, 0xf7ffffff},
1671 	{0x00000093, 0xffffff7f},
1672 	{0x00000095, 0x00101101},
1673 	{0x00000096, 0x00000fff},
1674 	{0x00000097, 0x00116fff},
1675 	{0x00000098, 0x60010000},
1676 	{0x00000099, 0x10010000},
1677 	{0x0000009a, 0x00006000},
1678 	{0x0000009b, 0x00001000},
1679 	{0x0000009f, 0x00b48000}
1680 };
1681 
1682 #define HAWAII_IO_MC_REGS_SIZE 22
1683 
1684 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1685 {
1686 	{0x0000007d, 0x40000000},
1687 	{0x0000007e, 0x40180304},
1688 	{0x0000007f, 0x0000ff00},
1689 	{0x00000081, 0x00000000},
1690 	{0x00000083, 0x00000800},
1691 	{0x00000086, 0x00000000},
1692 	{0x00000087, 0x00000100},
1693 	{0x00000088, 0x00020100},
1694 	{0x00000089, 0x00000000},
1695 	{0x0000008b, 0x00040000},
1696 	{0x0000008c, 0x00000100},
1697 	{0x0000008e, 0xff010000},
1698 	{0x00000090, 0xffffefff},
1699 	{0x00000091, 0xfff3efff},
1700 	{0x00000092, 0xfff3efbf},
1701 	{0x00000093, 0xf7ffffff},
1702 	{0x00000094, 0xffffff7f},
1703 	{0x00000095, 0x00000fff},
1704 	{0x00000096, 0x00116fff},
1705 	{0x00000097, 0x60010000},
1706 	{0x00000098, 0x10010000},
1707 	{0x0000009f, 0x00c79000}
1708 };
1709 
1710 
1711 /**
1712  * cik_srbm_select - select specific register instances
1713  *
1714  * @rdev: radeon_device pointer
1715  * @me: selected ME (micro engine)
1716  * @pipe: pipe
1717  * @queue: queue
1718  * @vmid: VMID
1719  *
1720  * Switches the currently active registers instances.  Some
1721  * registers are instanced per VMID, others are instanced per
1722  * me/pipe/queue combination.
1723  */
1724 static void cik_srbm_select(struct radeon_device *rdev,
1725 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1726 {
1727 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1728 			     MEID(me & 0x3) |
1729 			     VMID(vmid & 0xf) |
1730 			     QUEUEID(queue & 0x7));
1731 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1732 }
1733 
1734 /* ucode loading */
1735 /**
1736  * ci_mc_load_microcode - load MC ucode into the hw
1737  *
1738  * @rdev: radeon_device pointer
1739  *
1740  * Load the GDDR MC ucode into the hw (CIK).
1741  * Returns 0 on success, error on failure.
1742  */
1743 int ci_mc_load_microcode(struct radeon_device *rdev)
1744 {
1745 	const __be32 *fw_data;
1746 	u32 running, blackout = 0;
1747 	u32 *io_mc_regs;
1748 	int i, regs_size, ucode_size;
1749 
1750 	if (!rdev->mc_fw)
1751 		return -EINVAL;
1752 
1753 	ucode_size = rdev->mc_fw->datasize / 4;
1754 
1755 	switch (rdev->family) {
1756 	case CHIP_BONAIRE:
1757 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1758 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1759 		break;
1760 	case CHIP_HAWAII:
1761 		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1762 		regs_size = HAWAII_IO_MC_REGS_SIZE;
1763 		break;
1764 	default:
1765 		return -EINVAL;
1766 	}
1767 
1768 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1769 
1770 	if (running == 0) {
1771 		if (running) {
1772 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1773 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1774 		}
1775 
1776 		/* reset the engine and set to writable */
1777 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1778 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1779 
1780 		/* load mc io regs */
1781 		for (i = 0; i < regs_size; i++) {
1782 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1783 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1784 		}
1785 		/* load the MC ucode */
1786 		fw_data = (const __be32 *)rdev->mc_fw->data;
1787 		for (i = 0; i < ucode_size; i++)
1788 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1789 
1790 		/* put the engine back into the active state */
1791 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1792 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1793 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1794 
1795 		/* wait for training to complete */
1796 		for (i = 0; i < rdev->usec_timeout; i++) {
1797 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1798 				break;
1799 			udelay(1);
1800 		}
1801 		for (i = 0; i < rdev->usec_timeout; i++) {
1802 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1803 				break;
1804 			udelay(1);
1805 		}
1806 
1807 		if (running)
1808 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1809 	}
1810 
1811 	return 0;
1812 }
1813 
1814 /**
1815  * cik_init_microcode - load ucode images from disk
1816  *
1817  * @rdev: radeon_device pointer
1818  *
1819  * Use the firmware interface to load the ucode images into
1820  * the driver (not loaded into hw).
1821  * Returns 0 on success, error on failure.
1822  */
1823 static int cik_init_microcode(struct radeon_device *rdev)
1824 {
1825 	const char *chip_name;
1826 	size_t pfp_req_size, me_req_size, ce_req_size,
1827 		mec_req_size, rlc_req_size, mc_req_size = 0,
1828 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1829 	char fw_name[30];
1830 	int err;
1831 
1832 	DRM_DEBUG("\n");
1833 
1834 	switch (rdev->family) {
1835 	case CHIP_BONAIRE:
1836 		chip_name = "BONAIRE";
1837 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1838 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1839 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1840 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1841 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1842 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1843 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1844 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1845 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1846 		break;
1847 	case CHIP_HAWAII:
1848 		chip_name = "HAWAII";
1849 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1850 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1851 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1852 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1853 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1854 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1855 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1856 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1857 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1858 		break;
1859 	case CHIP_KAVERI:
1860 		chip_name = "KAVERI";
1861 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1862 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1863 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1864 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1865 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1866 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1867 		break;
1868 	case CHIP_KABINI:
1869 		chip_name = "KABINI";
1870 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1871 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1872 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1873 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1874 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1875 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1876 		break;
1877 	case CHIP_MULLINS:
1878 		chip_name = "MULLINS";
1879 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1880 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1881 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1882 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1883 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1884 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1885 		break;
1886 	default: BUG();
1887 	}
1888 
1889 	DRM_INFO("Loading %s Microcode\n", chip_name);
1890 
1891 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1892 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1893 	if (err)
1894 		goto out;
1895 	if (rdev->pfp_fw->datasize != pfp_req_size) {
1896 		printk(KERN_ERR
1897 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1898 		       rdev->pfp_fw->datasize, fw_name);
1899 		err = -EINVAL;
1900 		goto out;
1901 	}
1902 
1903 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1904 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1905 	if (err)
1906 		goto out;
1907 	if (rdev->me_fw->datasize != me_req_size) {
1908 		printk(KERN_ERR
1909 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1910 		       rdev->me_fw->datasize, fw_name);
1911 		err = -EINVAL;
1912 	}
1913 
1914 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1915 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1916 	if (err)
1917 		goto out;
1918 	if (rdev->ce_fw->datasize != ce_req_size) {
1919 		printk(KERN_ERR
1920 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1921 		       rdev->ce_fw->datasize, fw_name);
1922 		err = -EINVAL;
1923 	}
1924 
1925 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
1926 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1927 	if (err)
1928 		goto out;
1929 	if (rdev->mec_fw->datasize != mec_req_size) {
1930 		printk(KERN_ERR
1931 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1932 		       rdev->mec_fw->datasize, fw_name);
1933 		err = -EINVAL;
1934 	}
1935 
1936 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
1937 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1938 	if (err)
1939 		goto out;
1940 	if (rdev->rlc_fw->datasize != rlc_req_size) {
1941 		printk(KERN_ERR
1942 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1943 		       rdev->rlc_fw->datasize, fw_name);
1944 		err = -EINVAL;
1945 	}
1946 
1947 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
1948 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1949 	if (err)
1950 		goto out;
1951 	if (rdev->sdma_fw->datasize != sdma_req_size) {
1952 		printk(KERN_ERR
1953 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1954 		       rdev->sdma_fw->datasize, fw_name);
1955 		err = -EINVAL;
1956 	}
1957 
1958 	/* No SMC, MC ucode on APUs */
1959 	if (!(rdev->flags & RADEON_IS_IGP)) {
1960 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
1961 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1962 		if (err) {
1963 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1964 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1965 			if (err)
1966 				goto out;
1967 		}
1968 		if ((rdev->mc_fw->datasize != mc_req_size) &&
1969 		    (rdev->mc_fw->datasize != mc2_req_size)){
1970 			printk(KERN_ERR
1971 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1972 			       rdev->mc_fw->datasize, fw_name);
1973 			err = -EINVAL;
1974 		}
1975 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
1976 
1977 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1978 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1979 		if (err) {
1980 			printk(KERN_ERR
1981 			       "smc: error loading firmware \"%s\"\n",
1982 			       fw_name);
1983 			release_firmware(rdev->smc_fw);
1984 			rdev->smc_fw = NULL;
1985 			err = 0;
1986 		} else if (rdev->smc_fw->datasize != smc_req_size) {
1987 			printk(KERN_ERR
1988 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1989 			       rdev->smc_fw->datasize, fw_name);
1990 			err = -EINVAL;
1991 		}
1992 	}
1993 
1994 out:
1995 	if (err) {
1996 		if (err != -EINVAL)
1997 			printk(KERN_ERR
1998 			       "cik_cp: Failed to load firmware \"%s\"\n",
1999 			       fw_name);
2000 		release_firmware(rdev->pfp_fw);
2001 		rdev->pfp_fw = NULL;
2002 		release_firmware(rdev->me_fw);
2003 		rdev->me_fw = NULL;
2004 		release_firmware(rdev->ce_fw);
2005 		rdev->ce_fw = NULL;
2006 		release_firmware(rdev->mec_fw);
2007 		rdev->mec_fw = NULL;
2008 		release_firmware(rdev->rlc_fw);
2009 		rdev->rlc_fw = NULL;
2010 		release_firmware(rdev->sdma_fw);
2011 		rdev->sdma_fw = NULL;
2012 		release_firmware(rdev->mc_fw);
2013 		rdev->mc_fw = NULL;
2014 		release_firmware(rdev->smc_fw);
2015 		rdev->smc_fw = NULL;
2016 	}
2017 	return err;
2018 }
2019 
2020 /*
2021  * Core functions
2022  */
2023 /**
2024  * cik_tiling_mode_table_init - init the hw tiling table
2025  *
2026  * @rdev: radeon_device pointer
2027  *
2028  * Starting with SI, the tiling setup is done globally in a
2029  * set of 32 tiling modes.  Rather than selecting each set of
2030  * parameters per surface as on older asics, we just select
2031  * which index in the tiling table we want to use, and the
2032  * surface uses those parameters (CIK).
2033  */
2034 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2035 {
2036 	const u32 num_tile_mode_states = 32;
2037 	const u32 num_secondary_tile_mode_states = 16;
2038 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2039 	u32 num_pipe_configs;
2040 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2041 		rdev->config.cik.max_shader_engines;
2042 
2043 	switch (rdev->config.cik.mem_row_size_in_kb) {
2044 	case 1:
2045 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2046 		break;
2047 	case 2:
2048 	default:
2049 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2050 		break;
2051 	case 4:
2052 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2053 		break;
2054 	}
2055 
2056 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2057 	if (num_pipe_configs > 8)
2058 		num_pipe_configs = 16;
2059 
2060 	if (num_pipe_configs == 16) {
2061 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2062 			switch (reg_offset) {
2063 			case 0:
2064 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2065 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2066 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2067 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2068 				break;
2069 			case 1:
2070 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2071 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2072 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2073 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2074 				break;
2075 			case 2:
2076 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2078 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2080 				break;
2081 			case 3:
2082 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2083 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2084 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2085 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2086 				break;
2087 			case 4:
2088 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2089 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2090 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091 						 TILE_SPLIT(split_equal_to_row_size));
2092 				break;
2093 			case 5:
2094 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2096 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2097 				break;
2098 			case 6:
2099 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2100 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2101 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2102 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2103 				break;
2104 			case 7:
2105 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2106 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2107 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2108 						 TILE_SPLIT(split_equal_to_row_size));
2109 				break;
2110 			case 8:
2111 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2112 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2113 				break;
2114 			case 9:
2115 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2116 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2117 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2118 				break;
2119 			case 10:
2120 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2121 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2122 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2123 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2124 				break;
2125 			case 11:
2126 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2127 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2128 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2129 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2130 				break;
2131 			case 12:
2132 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2133 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2134 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2135 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2136 				break;
2137 			case 13:
2138 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2139 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2140 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2141 				break;
2142 			case 14:
2143 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2144 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2145 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2146 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2147 				break;
2148 			case 16:
2149 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2150 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2151 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2152 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2153 				break;
2154 			case 17:
2155 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2156 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2157 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2158 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2159 				break;
2160 			case 27:
2161 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2162 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2163 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2164 				break;
2165 			case 28:
2166 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2167 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2168 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2169 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2170 				break;
2171 			case 29:
2172 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2173 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2174 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2175 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2176 				break;
2177 			case 30:
2178 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2179 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2180 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2181 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182 				break;
2183 			default:
2184 				gb_tile_moden = 0;
2185 				break;
2186 			}
2187 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2188 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2189 		}
2190 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2191 			switch (reg_offset) {
2192 			case 0:
2193 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2194 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2195 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196 						 NUM_BANKS(ADDR_SURF_16_BANK));
2197 				break;
2198 			case 1:
2199 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2200 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2201 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2202 						 NUM_BANKS(ADDR_SURF_16_BANK));
2203 				break;
2204 			case 2:
2205 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2208 						 NUM_BANKS(ADDR_SURF_16_BANK));
2209 				break;
2210 			case 3:
2211 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2212 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2213 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2214 						 NUM_BANKS(ADDR_SURF_16_BANK));
2215 				break;
2216 			case 4:
2217 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2218 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2219 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2220 						 NUM_BANKS(ADDR_SURF_8_BANK));
2221 				break;
2222 			case 5:
2223 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2224 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2225 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2226 						 NUM_BANKS(ADDR_SURF_4_BANK));
2227 				break;
2228 			case 6:
2229 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2231 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2232 						 NUM_BANKS(ADDR_SURF_2_BANK));
2233 				break;
2234 			case 8:
2235 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2236 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2237 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2238 						 NUM_BANKS(ADDR_SURF_16_BANK));
2239 				break;
2240 			case 9:
2241 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2243 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244 						 NUM_BANKS(ADDR_SURF_16_BANK));
2245 				break;
2246 			case 10:
2247 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2248 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2249 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2250 						 NUM_BANKS(ADDR_SURF_16_BANK));
2251 				break;
2252 			case 11:
2253 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2254 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2255 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2256 						 NUM_BANKS(ADDR_SURF_8_BANK));
2257 				break;
2258 			case 12:
2259 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2260 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2261 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2262 						 NUM_BANKS(ADDR_SURF_4_BANK));
2263 				break;
2264 			case 13:
2265 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2267 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2268 						 NUM_BANKS(ADDR_SURF_2_BANK));
2269 				break;
2270 			case 14:
2271 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2273 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2274 						 NUM_BANKS(ADDR_SURF_2_BANK));
2275 				break;
2276 			default:
2277 				gb_tile_moden = 0;
2278 				break;
2279 			}
2280 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2281 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2282 		}
2283 	} else if (num_pipe_configs == 8) {
2284 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2285 			switch (reg_offset) {
2286 			case 0:
2287 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2288 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2289 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2290 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2291 				break;
2292 			case 1:
2293 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2294 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2295 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2296 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2297 				break;
2298 			case 2:
2299 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2300 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2301 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2302 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2303 				break;
2304 			case 3:
2305 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2306 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2307 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2308 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2309 				break;
2310 			case 4:
2311 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2313 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2314 						 TILE_SPLIT(split_equal_to_row_size));
2315 				break;
2316 			case 5:
2317 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2318 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2319 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2320 				break;
2321 			case 6:
2322 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2323 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2324 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2325 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2326 				break;
2327 			case 7:
2328 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2329 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2330 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331 						 TILE_SPLIT(split_equal_to_row_size));
2332 				break;
2333 			case 8:
2334 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2335 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2336 				break;
2337 			case 9:
2338 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2339 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2340 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2341 				break;
2342 			case 10:
2343 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2344 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2345 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2346 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2347 				break;
2348 			case 11:
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2350 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2351 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2352 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2353 				break;
2354 			case 12:
2355 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2356 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2357 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2358 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359 				break;
2360 			case 13:
2361 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2362 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2363 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2364 				break;
2365 			case 14:
2366 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2367 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2368 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2369 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2370 				break;
2371 			case 16:
2372 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2374 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2375 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2376 				break;
2377 			case 17:
2378 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2379 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2381 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382 				break;
2383 			case 27:
2384 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2385 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2386 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2387 				break;
2388 			case 28:
2389 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2391 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2392 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2393 				break;
2394 			case 29:
2395 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2396 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2397 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2398 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2399 				break;
2400 			case 30:
2401 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2404 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 				break;
2406 			default:
2407 				gb_tile_moden = 0;
2408 				break;
2409 			}
2410 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2411 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2412 		}
2413 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2414 			switch (reg_offset) {
2415 			case 0:
2416 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2417 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2418 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2419 						 NUM_BANKS(ADDR_SURF_16_BANK));
2420 				break;
2421 			case 1:
2422 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2424 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2425 						 NUM_BANKS(ADDR_SURF_16_BANK));
2426 				break;
2427 			case 2:
2428 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2429 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2430 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2431 						 NUM_BANKS(ADDR_SURF_16_BANK));
2432 				break;
2433 			case 3:
2434 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2436 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2437 						 NUM_BANKS(ADDR_SURF_16_BANK));
2438 				break;
2439 			case 4:
2440 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2442 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2443 						 NUM_BANKS(ADDR_SURF_8_BANK));
2444 				break;
2445 			case 5:
2446 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2448 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2449 						 NUM_BANKS(ADDR_SURF_4_BANK));
2450 				break;
2451 			case 6:
2452 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455 						 NUM_BANKS(ADDR_SURF_2_BANK));
2456 				break;
2457 			case 8:
2458 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2460 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2461 						 NUM_BANKS(ADDR_SURF_16_BANK));
2462 				break;
2463 			case 9:
2464 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2467 						 NUM_BANKS(ADDR_SURF_16_BANK));
2468 				break;
2469 			case 10:
2470 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2472 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2473 						 NUM_BANKS(ADDR_SURF_16_BANK));
2474 				break;
2475 			case 11:
2476 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479 						 NUM_BANKS(ADDR_SURF_16_BANK));
2480 				break;
2481 			case 12:
2482 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2484 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485 						 NUM_BANKS(ADDR_SURF_8_BANK));
2486 				break;
2487 			case 13:
2488 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491 						 NUM_BANKS(ADDR_SURF_4_BANK));
2492 				break;
2493 			case 14:
2494 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2497 						 NUM_BANKS(ADDR_SURF_2_BANK));
2498 				break;
2499 			default:
2500 				gb_tile_moden = 0;
2501 				break;
2502 			}
2503 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2504 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2505 		}
2506 	} else if (num_pipe_configs == 4) {
2507 		if (num_rbs == 4) {
2508 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2509 				switch (reg_offset) {
2510 				case 0:
2511 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2513 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2514 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2515 					break;
2516 				case 1:
2517 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2520 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2521 					break;
2522 				case 2:
2523 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2525 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2526 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2527 					break;
2528 				case 3:
2529 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2532 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2533 					break;
2534 				case 4:
2535 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2536 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2537 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2538 							 TILE_SPLIT(split_equal_to_row_size));
2539 					break;
2540 				case 5:
2541 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2542 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2544 					break;
2545 				case 6:
2546 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2547 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2548 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2549 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2550 					break;
2551 				case 7:
2552 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2553 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2554 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2555 							 TILE_SPLIT(split_equal_to_row_size));
2556 					break;
2557 				case 8:
2558 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2559 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2560 					break;
2561 				case 9:
2562 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2563 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2565 					break;
2566 				case 10:
2567 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2569 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2570 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2571 					break;
2572 				case 11:
2573 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2574 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2575 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2576 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2577 					break;
2578 				case 12:
2579 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2580 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2581 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2582 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583 					break;
2584 				case 13:
2585 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2586 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2587 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2588 					break;
2589 				case 14:
2590 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2592 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2593 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2594 					break;
2595 				case 16:
2596 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2597 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2598 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2599 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2600 					break;
2601 				case 17:
2602 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2603 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2604 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606 					break;
2607 				case 27:
2608 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2609 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2610 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2611 					break;
2612 				case 28:
2613 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2614 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2615 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2616 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2617 					break;
2618 				case 29:
2619 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2620 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2622 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2623 					break;
2624 				case 30:
2625 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2626 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2627 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2628 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629 					break;
2630 				default:
2631 					gb_tile_moden = 0;
2632 					break;
2633 				}
2634 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2635 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2636 			}
2637 		} else if (num_rbs < 4) {
2638 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2639 				switch (reg_offset) {
2640 				case 0:
2641 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2643 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2644 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2645 					break;
2646 				case 1:
2647 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2649 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2650 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2651 					break;
2652 				case 2:
2653 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2655 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2656 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2657 					break;
2658 				case 3:
2659 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2661 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2662 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2663 					break;
2664 				case 4:
2665 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2668 							 TILE_SPLIT(split_equal_to_row_size));
2669 					break;
2670 				case 5:
2671 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2672 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2673 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2674 					break;
2675 				case 6:
2676 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2677 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2679 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2680 					break;
2681 				case 7:
2682 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2683 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2684 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2685 							 TILE_SPLIT(split_equal_to_row_size));
2686 					break;
2687 				case 8:
2688 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2690 					break;
2691 				case 9:
2692 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2694 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2695 					break;
2696 				case 10:
2697 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701 					break;
2702 				case 11:
2703 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2704 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2705 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2706 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707 					break;
2708 				case 12:
2709 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2710 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2711 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2712 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2713 					break;
2714 				case 13:
2715 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2716 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2718 					break;
2719 				case 14:
2720 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2721 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2723 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2724 					break;
2725 				case 16:
2726 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2728 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2729 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2730 					break;
2731 				case 17:
2732 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2733 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736 					break;
2737 				case 27:
2738 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2739 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2741 					break;
2742 				case 28:
2743 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2744 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2745 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2746 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747 					break;
2748 				case 29:
2749 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2750 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2751 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2753 					break;
2754 				case 30:
2755 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2756 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2757 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2758 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759 					break;
2760 				default:
2761 					gb_tile_moden = 0;
2762 					break;
2763 				}
2764 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2765 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2766 			}
2767 		}
2768 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2769 			switch (reg_offset) {
2770 			case 0:
2771 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2773 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2774 						 NUM_BANKS(ADDR_SURF_16_BANK));
2775 				break;
2776 			case 1:
2777 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2779 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2780 						 NUM_BANKS(ADDR_SURF_16_BANK));
2781 				break;
2782 			case 2:
2783 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2785 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2786 						 NUM_BANKS(ADDR_SURF_16_BANK));
2787 				break;
2788 			case 3:
2789 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2791 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2792 						 NUM_BANKS(ADDR_SURF_16_BANK));
2793 				break;
2794 			case 4:
2795 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2797 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2798 						 NUM_BANKS(ADDR_SURF_16_BANK));
2799 				break;
2800 			case 5:
2801 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2803 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2804 						 NUM_BANKS(ADDR_SURF_8_BANK));
2805 				break;
2806 			case 6:
2807 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2809 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2810 						 NUM_BANKS(ADDR_SURF_4_BANK));
2811 				break;
2812 			case 8:
2813 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2814 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2815 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816 						 NUM_BANKS(ADDR_SURF_16_BANK));
2817 				break;
2818 			case 9:
2819 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2820 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2821 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2822 						 NUM_BANKS(ADDR_SURF_16_BANK));
2823 				break;
2824 			case 10:
2825 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2827 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2828 						 NUM_BANKS(ADDR_SURF_16_BANK));
2829 				break;
2830 			case 11:
2831 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2833 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834 						 NUM_BANKS(ADDR_SURF_16_BANK));
2835 				break;
2836 			case 12:
2837 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840 						 NUM_BANKS(ADDR_SURF_16_BANK));
2841 				break;
2842 			case 13:
2843 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2845 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2846 						 NUM_BANKS(ADDR_SURF_8_BANK));
2847 				break;
2848 			case 14:
2849 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2852 						 NUM_BANKS(ADDR_SURF_4_BANK));
2853 				break;
2854 			default:
2855 				gb_tile_moden = 0;
2856 				break;
2857 			}
2858 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2859 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2860 		}
2861 	} else if (num_pipe_configs == 2) {
2862 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2863 			switch (reg_offset) {
2864 			case 0:
2865 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2867 						 PIPE_CONFIG(ADDR_SURF_P2) |
2868 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2869 				break;
2870 			case 1:
2871 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2873 						 PIPE_CONFIG(ADDR_SURF_P2) |
2874 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2875 				break;
2876 			case 2:
2877 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2879 						 PIPE_CONFIG(ADDR_SURF_P2) |
2880 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2881 				break;
2882 			case 3:
2883 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2885 						 PIPE_CONFIG(ADDR_SURF_P2) |
2886 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2887 				break;
2888 			case 4:
2889 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891 						 PIPE_CONFIG(ADDR_SURF_P2) |
2892 						 TILE_SPLIT(split_equal_to_row_size));
2893 				break;
2894 			case 5:
2895 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2896 						 PIPE_CONFIG(ADDR_SURF_P2) |
2897 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2898 				break;
2899 			case 6:
2900 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2901 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902 						 PIPE_CONFIG(ADDR_SURF_P2) |
2903 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2904 				break;
2905 			case 7:
2906 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2907 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2908 						 PIPE_CONFIG(ADDR_SURF_P2) |
2909 						 TILE_SPLIT(split_equal_to_row_size));
2910 				break;
2911 			case 8:
2912 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2913 						PIPE_CONFIG(ADDR_SURF_P2);
2914 				break;
2915 			case 9:
2916 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2918 						 PIPE_CONFIG(ADDR_SURF_P2));
2919 				break;
2920 			case 10:
2921 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2922 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923 						 PIPE_CONFIG(ADDR_SURF_P2) |
2924 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925 				break;
2926 			case 11:
2927 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2928 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2929 						 PIPE_CONFIG(ADDR_SURF_P2) |
2930 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931 				break;
2932 			case 12:
2933 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2934 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2935 						 PIPE_CONFIG(ADDR_SURF_P2) |
2936 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2937 				break;
2938 			case 13:
2939 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2940 						 PIPE_CONFIG(ADDR_SURF_P2) |
2941 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2942 				break;
2943 			case 14:
2944 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2946 						 PIPE_CONFIG(ADDR_SURF_P2) |
2947 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948 				break;
2949 			case 16:
2950 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2951 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952 						 PIPE_CONFIG(ADDR_SURF_P2) |
2953 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954 				break;
2955 			case 17:
2956 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2957 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958 						 PIPE_CONFIG(ADDR_SURF_P2) |
2959 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960 				break;
2961 			case 27:
2962 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2963 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964 						 PIPE_CONFIG(ADDR_SURF_P2));
2965 				break;
2966 			case 28:
2967 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2968 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2969 						 PIPE_CONFIG(ADDR_SURF_P2) |
2970 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2971 				break;
2972 			case 29:
2973 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2974 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2975 						 PIPE_CONFIG(ADDR_SURF_P2) |
2976 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2977 				break;
2978 			case 30:
2979 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2980 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2981 						 PIPE_CONFIG(ADDR_SURF_P2) |
2982 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2983 				break;
2984 			default:
2985 				gb_tile_moden = 0;
2986 				break;
2987 			}
2988 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2989 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2990 		}
2991 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2992 			switch (reg_offset) {
2993 			case 0:
2994 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2995 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2996 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2997 						 NUM_BANKS(ADDR_SURF_16_BANK));
2998 				break;
2999 			case 1:
3000 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 						 NUM_BANKS(ADDR_SURF_16_BANK));
3004 				break;
3005 			case 2:
3006 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3007 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3008 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3009 						 NUM_BANKS(ADDR_SURF_16_BANK));
3010 				break;
3011 			case 3:
3012 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3015 						 NUM_BANKS(ADDR_SURF_16_BANK));
3016 				break;
3017 			case 4:
3018 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3020 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3021 						 NUM_BANKS(ADDR_SURF_16_BANK));
3022 				break;
3023 			case 5:
3024 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3026 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 						 NUM_BANKS(ADDR_SURF_16_BANK));
3028 				break;
3029 			case 6:
3030 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3032 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3033 						 NUM_BANKS(ADDR_SURF_8_BANK));
3034 				break;
3035 			case 8:
3036 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3037 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3038 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039 						 NUM_BANKS(ADDR_SURF_16_BANK));
3040 				break;
3041 			case 9:
3042 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3043 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3044 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3045 						 NUM_BANKS(ADDR_SURF_16_BANK));
3046 				break;
3047 			case 10:
3048 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3049 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3050 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3051 						 NUM_BANKS(ADDR_SURF_16_BANK));
3052 				break;
3053 			case 11:
3054 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3055 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3056 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3057 						 NUM_BANKS(ADDR_SURF_16_BANK));
3058 				break;
3059 			case 12:
3060 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3062 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3063 						 NUM_BANKS(ADDR_SURF_16_BANK));
3064 				break;
3065 			case 13:
3066 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3067 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3068 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3069 						 NUM_BANKS(ADDR_SURF_16_BANK));
3070 				break;
3071 			case 14:
3072 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3073 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3074 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3075 						 NUM_BANKS(ADDR_SURF_8_BANK));
3076 				break;
3077 			default:
3078 				gb_tile_moden = 0;
3079 				break;
3080 			}
3081 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3082 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3083 		}
3084 	} else
3085 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3086 }
3087 
3088 /**
3089  * cik_select_se_sh - select which SE, SH to address
3090  *
3091  * @rdev: radeon_device pointer
3092  * @se_num: shader engine to address
3093  * @sh_num: sh block to address
3094  *
3095  * Select which SE, SH combinations to address. Certain
3096  * registers are instanced per SE or SH.  0xffffffff means
3097  * broadcast to all SEs or SHs (CIK).
3098  */
3099 static void cik_select_se_sh(struct radeon_device *rdev,
3100 			     u32 se_num, u32 sh_num)
3101 {
3102 	u32 data = INSTANCE_BROADCAST_WRITES;
3103 
3104 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3105 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3106 	else if (se_num == 0xffffffff)
3107 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3108 	else if (sh_num == 0xffffffff)
3109 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3110 	else
3111 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3112 	WREG32(GRBM_GFX_INDEX, data);
3113 }
3114 
3115 /**
3116  * cik_create_bitmask - create a bitmask
3117  *
3118  * @bit_width: length of the mask
3119  *
3120  * create a variable length bit mask (CIK).
3121  * Returns the bitmask.
3122  */
3123 static u32 cik_create_bitmask(u32 bit_width)
3124 {
3125 	u32 i, mask = 0;
3126 
3127 	for (i = 0; i < bit_width; i++) {
3128 		mask <<= 1;
3129 		mask |= 1;
3130 	}
3131 	return mask;
3132 }
3133 
3134 /**
3135  * cik_get_rb_disabled - computes the mask of disabled RBs
3136  *
3137  * @rdev: radeon_device pointer
3138  * @max_rb_num: max RBs (render backends) for the asic
3139  * @se_num: number of SEs (shader engines) for the asic
3140  * @sh_per_se: number of SH blocks per SE for the asic
3141  *
3142  * Calculates the bitmask of disabled RBs (CIK).
3143  * Returns the disabled RB bitmask.
3144  */
3145 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3146 			      u32 max_rb_num_per_se,
3147 			      u32 sh_per_se)
3148 {
3149 	u32 data, mask;
3150 
3151 	data = RREG32(CC_RB_BACKEND_DISABLE);
3152 	if (data & 1)
3153 		data &= BACKEND_DISABLE_MASK;
3154 	else
3155 		data = 0;
3156 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3157 
3158 	data >>= BACKEND_DISABLE_SHIFT;
3159 
3160 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3161 
3162 	return data & mask;
3163 }
3164 
3165 /**
3166  * cik_setup_rb - setup the RBs on the asic
3167  *
3168  * @rdev: radeon_device pointer
3169  * @se_num: number of SEs (shader engines) for the asic
3170  * @sh_per_se: number of SH blocks per SE for the asic
3171  * @max_rb_num: max RBs (render backends) for the asic
3172  *
3173  * Configures per-SE/SH RB registers (CIK).
3174  */
3175 static void cik_setup_rb(struct radeon_device *rdev,
3176 			 u32 se_num, u32 sh_per_se,
3177 			 u32 max_rb_num_per_se)
3178 {
3179 	int i, j;
3180 	u32 data, mask;
3181 	u32 disabled_rbs = 0;
3182 	u32 enabled_rbs = 0;
3183 
3184 	for (i = 0; i < se_num; i++) {
3185 		for (j = 0; j < sh_per_se; j++) {
3186 			cik_select_se_sh(rdev, i, j);
3187 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3188 			if (rdev->family == CHIP_HAWAII)
3189 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3190 			else
3191 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3192 		}
3193 	}
3194 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3195 
3196 	mask = 1;
3197 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3198 		if (!(disabled_rbs & mask))
3199 			enabled_rbs |= mask;
3200 		mask <<= 1;
3201 	}
3202 
3203 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3204 
3205 	for (i = 0; i < se_num; i++) {
3206 		cik_select_se_sh(rdev, i, 0xffffffff);
3207 		data = 0;
3208 		for (j = 0; j < sh_per_se; j++) {
3209 			switch (enabled_rbs & 3) {
3210 			case 0:
3211 				if (j == 0)
3212 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3213 				else
3214 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3215 				break;
3216 			case 1:
3217 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3218 				break;
3219 			case 2:
3220 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3221 				break;
3222 			case 3:
3223 			default:
3224 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3225 				break;
3226 			}
3227 			enabled_rbs >>= 2;
3228 		}
3229 		WREG32(PA_SC_RASTER_CONFIG, data);
3230 	}
3231 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3232 }
3233 
3234 /**
3235  * cik_gpu_init - setup the 3D engine
3236  *
3237  * @rdev: radeon_device pointer
3238  *
3239  * Configures the 3D engine and tiling configuration
3240  * registers so that the 3D engine is usable.
3241  */
3242 static void cik_gpu_init(struct radeon_device *rdev)
3243 {
3244 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3245 	u32 mc_shared_chmap, mc_arb_ramcfg;
3246 	u32 hdp_host_path_cntl;
3247 	u32 tmp;
3248 	int i, j;
3249 
3250 	switch (rdev->family) {
3251 	case CHIP_BONAIRE:
3252 		rdev->config.cik.max_shader_engines = 2;
3253 		rdev->config.cik.max_tile_pipes = 4;
3254 		rdev->config.cik.max_cu_per_sh = 7;
3255 		rdev->config.cik.max_sh_per_se = 1;
3256 		rdev->config.cik.max_backends_per_se = 2;
3257 		rdev->config.cik.max_texture_channel_caches = 4;
3258 		rdev->config.cik.max_gprs = 256;
3259 		rdev->config.cik.max_gs_threads = 32;
3260 		rdev->config.cik.max_hw_contexts = 8;
3261 
3262 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3263 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3264 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3265 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3266 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3267 		break;
3268 	case CHIP_HAWAII:
3269 		rdev->config.cik.max_shader_engines = 4;
3270 		rdev->config.cik.max_tile_pipes = 16;
3271 		rdev->config.cik.max_cu_per_sh = 11;
3272 		rdev->config.cik.max_sh_per_se = 1;
3273 		rdev->config.cik.max_backends_per_se = 4;
3274 		rdev->config.cik.max_texture_channel_caches = 16;
3275 		rdev->config.cik.max_gprs = 256;
3276 		rdev->config.cik.max_gs_threads = 32;
3277 		rdev->config.cik.max_hw_contexts = 8;
3278 
3279 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3280 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3281 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3282 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3283 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3284 		break;
3285 	case CHIP_KAVERI:
3286 		rdev->config.cik.max_shader_engines = 1;
3287 		rdev->config.cik.max_tile_pipes = 4;
3288 		if ((rdev->pdev->device == 0x1304) ||
3289 		    (rdev->pdev->device == 0x1305) ||
3290 		    (rdev->pdev->device == 0x130C) ||
3291 		    (rdev->pdev->device == 0x130F) ||
3292 		    (rdev->pdev->device == 0x1310) ||
3293 		    (rdev->pdev->device == 0x1311) ||
3294 		    (rdev->pdev->device == 0x131C)) {
3295 			rdev->config.cik.max_cu_per_sh = 8;
3296 			rdev->config.cik.max_backends_per_se = 2;
3297 		} else if ((rdev->pdev->device == 0x1309) ||
3298 			   (rdev->pdev->device == 0x130A) ||
3299 			   (rdev->pdev->device == 0x130D) ||
3300 			   (rdev->pdev->device == 0x1313) ||
3301 			   (rdev->pdev->device == 0x131D)) {
3302 			rdev->config.cik.max_cu_per_sh = 6;
3303 			rdev->config.cik.max_backends_per_se = 2;
3304 		} else if ((rdev->pdev->device == 0x1306) ||
3305 			   (rdev->pdev->device == 0x1307) ||
3306 			   (rdev->pdev->device == 0x130B) ||
3307 			   (rdev->pdev->device == 0x130E) ||
3308 			   (rdev->pdev->device == 0x1315) ||
3309 			   (rdev->pdev->device == 0x1318) ||
3310 			   (rdev->pdev->device == 0x131B)) {
3311 			rdev->config.cik.max_cu_per_sh = 4;
3312 			rdev->config.cik.max_backends_per_se = 1;
3313 		} else {
3314 			rdev->config.cik.max_cu_per_sh = 3;
3315 			rdev->config.cik.max_backends_per_se = 1;
3316 		}
3317 		rdev->config.cik.max_sh_per_se = 1;
3318 		rdev->config.cik.max_texture_channel_caches = 4;
3319 		rdev->config.cik.max_gprs = 256;
3320 		rdev->config.cik.max_gs_threads = 16;
3321 		rdev->config.cik.max_hw_contexts = 8;
3322 
3323 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3324 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3325 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3326 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3327 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3328 		break;
3329 	case CHIP_KABINI:
3330 	case CHIP_MULLINS:
3331 	default:
3332 		rdev->config.cik.max_shader_engines = 1;
3333 		rdev->config.cik.max_tile_pipes = 2;
3334 		rdev->config.cik.max_cu_per_sh = 2;
3335 		rdev->config.cik.max_sh_per_se = 1;
3336 		rdev->config.cik.max_backends_per_se = 1;
3337 		rdev->config.cik.max_texture_channel_caches = 2;
3338 		rdev->config.cik.max_gprs = 256;
3339 		rdev->config.cik.max_gs_threads = 16;
3340 		rdev->config.cik.max_hw_contexts = 8;
3341 
3342 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3343 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3344 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3345 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3346 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3347 		break;
3348 	}
3349 
3350 	/* Initialize HDP */
3351 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3352 		WREG32((0x2c14 + j), 0x00000000);
3353 		WREG32((0x2c18 + j), 0x00000000);
3354 		WREG32((0x2c1c + j), 0x00000000);
3355 		WREG32((0x2c20 + j), 0x00000000);
3356 		WREG32((0x2c24 + j), 0x00000000);
3357 	}
3358 
3359 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3360 
3361 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3362 
3363 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3364 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3365 
3366 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3367 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3368 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3369 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3370 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3371 		rdev->config.cik.mem_row_size_in_kb = 4;
3372 	/* XXX use MC settings? */
3373 	rdev->config.cik.shader_engine_tile_size = 32;
3374 	rdev->config.cik.num_gpus = 1;
3375 	rdev->config.cik.multi_gpu_tile_size = 64;
3376 
3377 	/* fix up row size */
3378 	gb_addr_config &= ~ROW_SIZE_MASK;
3379 	switch (rdev->config.cik.mem_row_size_in_kb) {
3380 	case 1:
3381 	default:
3382 		gb_addr_config |= ROW_SIZE(0);
3383 		break;
3384 	case 2:
3385 		gb_addr_config |= ROW_SIZE(1);
3386 		break;
3387 	case 4:
3388 		gb_addr_config |= ROW_SIZE(2);
3389 		break;
3390 	}
3391 
3392 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3393 	 * not have bank info, so create a custom tiling dword.
3394 	 * bits 3:0   num_pipes
3395 	 * bits 7:4   num_banks
3396 	 * bits 11:8  group_size
3397 	 * bits 15:12 row_size
3398 	 */
3399 	rdev->config.cik.tile_config = 0;
3400 	switch (rdev->config.cik.num_tile_pipes) {
3401 	case 1:
3402 		rdev->config.cik.tile_config |= (0 << 0);
3403 		break;
3404 	case 2:
3405 		rdev->config.cik.tile_config |= (1 << 0);
3406 		break;
3407 	case 4:
3408 		rdev->config.cik.tile_config |= (2 << 0);
3409 		break;
3410 	case 8:
3411 	default:
3412 		/* XXX what about 12? */
3413 		rdev->config.cik.tile_config |= (3 << 0);
3414 		break;
3415 	}
3416 	rdev->config.cik.tile_config |=
3417 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3418 	rdev->config.cik.tile_config |=
3419 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3420 	rdev->config.cik.tile_config |=
3421 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3422 
3423 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3424 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3425 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3426 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3427 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3428 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3429 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3430 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3431 
3432 	cik_tiling_mode_table_init(rdev);
3433 
3434 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3435 		     rdev->config.cik.max_sh_per_se,
3436 		     rdev->config.cik.max_backends_per_se);
3437 
3438 	rdev->config.cik.active_cus = 0;
3439 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3440 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3441 			rdev->config.cik.active_cus +=
3442 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3443 		}
3444 	}
3445 
3446 	/* set HW defaults for 3D engine */
3447 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3448 
3449 	WREG32(SX_DEBUG_1, 0x20);
3450 
3451 	WREG32(TA_CNTL_AUX, 0x00010000);
3452 
3453 	tmp = RREG32(SPI_CONFIG_CNTL);
3454 	tmp |= 0x03000000;
3455 	WREG32(SPI_CONFIG_CNTL, tmp);
3456 
3457 	WREG32(SQ_CONFIG, 1);
3458 
3459 	WREG32(DB_DEBUG, 0);
3460 
3461 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3462 	tmp |= 0x00000400;
3463 	WREG32(DB_DEBUG2, tmp);
3464 
3465 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3466 	tmp |= 0x00020200;
3467 	WREG32(DB_DEBUG3, tmp);
3468 
3469 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3470 	tmp |= 0x00018208;
3471 	WREG32(CB_HW_CONTROL, tmp);
3472 
3473 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3474 
3475 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3476 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3477 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3478 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3479 
3480 	WREG32(VGT_NUM_INSTANCES, 1);
3481 
3482 	WREG32(CP_PERFMON_CNTL, 0);
3483 
3484 	WREG32(SQ_CONFIG, 0);
3485 
3486 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3487 					  FORCE_EOV_MAX_REZ_CNT(255)));
3488 
3489 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3490 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3491 
3492 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3493 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3494 
3495 	tmp = RREG32(HDP_MISC_CNTL);
3496 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3497 	WREG32(HDP_MISC_CNTL, tmp);
3498 
3499 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3500 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3501 
3502 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3503 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3504 
3505 	udelay(50);
3506 }
3507 
3508 /*
3509  * GPU scratch registers helpers function.
3510  */
3511 /**
3512  * cik_scratch_init - setup driver info for CP scratch regs
3513  *
3514  * @rdev: radeon_device pointer
3515  *
3516  * Set up the number and offset of the CP scratch registers.
3517  * NOTE: use of CP scratch registers is a legacy inferface and
3518  * is not used by default on newer asics (r6xx+).  On newer asics,
3519  * memory buffers are used for fences rather than scratch regs.
3520  */
3521 static void cik_scratch_init(struct radeon_device *rdev)
3522 {
3523 	int i;
3524 
3525 	rdev->scratch.num_reg = 7;
3526 	rdev->scratch.reg_base = SCRATCH_REG0;
3527 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3528 		rdev->scratch.free[i] = true;
3529 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3530 	}
3531 }
3532 
3533 /**
3534  * cik_ring_test - basic gfx ring test
3535  *
3536  * @rdev: radeon_device pointer
3537  * @ring: radeon_ring structure holding ring information
3538  *
3539  * Allocate a scratch register and write to it using the gfx ring (CIK).
3540  * Provides a basic gfx ring test to verify that the ring is working.
3541  * Used by cik_cp_gfx_resume();
3542  * Returns 0 on success, error on failure.
3543  */
3544 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3545 {
3546 	uint32_t scratch;
3547 	uint32_t tmp = 0;
3548 	unsigned i;
3549 	int r;
3550 
3551 	r = radeon_scratch_get(rdev, &scratch);
3552 	if (r) {
3553 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3554 		return r;
3555 	}
3556 	WREG32(scratch, 0xCAFEDEAD);
3557 	r = radeon_ring_lock(rdev, ring, 3);
3558 	if (r) {
3559 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3560 		radeon_scratch_free(rdev, scratch);
3561 		return r;
3562 	}
3563 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3564 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3565 	radeon_ring_write(ring, 0xDEADBEEF);
3566 	radeon_ring_unlock_commit(rdev, ring, false);
3567 
3568 	for (i = 0; i < rdev->usec_timeout; i++) {
3569 		tmp = RREG32(scratch);
3570 		if (tmp == 0xDEADBEEF)
3571 			break;
3572 		DRM_UDELAY(1);
3573 	}
3574 	if (i < rdev->usec_timeout) {
3575 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3576 	} else {
3577 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3578 			  ring->idx, scratch, tmp);
3579 		r = -EINVAL;
3580 	}
3581 	radeon_scratch_free(rdev, scratch);
3582 	return r;
3583 }
3584 
3585 /**
3586  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3587  *
3588  * @rdev: radeon_device pointer
3589  * @ridx: radeon ring index
3590  *
3591  * Emits an hdp flush on the cp.
3592  */
3593 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3594 				       int ridx)
3595 {
3596 	struct radeon_ring *ring = &rdev->ring[ridx];
3597 	u32 ref_and_mask;
3598 
3599 	switch (ring->idx) {
3600 	case CAYMAN_RING_TYPE_CP1_INDEX:
3601 	case CAYMAN_RING_TYPE_CP2_INDEX:
3602 	default:
3603 		switch (ring->me) {
3604 		case 0:
3605 			ref_and_mask = CP2 << ring->pipe;
3606 			break;
3607 		case 1:
3608 			ref_and_mask = CP6 << ring->pipe;
3609 			break;
3610 		default:
3611 			return;
3612 		}
3613 		break;
3614 	case RADEON_RING_TYPE_GFX_INDEX:
3615 		ref_and_mask = CP0;
3616 		break;
3617 	}
3618 
3619 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3620 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3621 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3622 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3623 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3624 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3625 	radeon_ring_write(ring, ref_and_mask);
3626 	radeon_ring_write(ring, ref_and_mask);
3627 	radeon_ring_write(ring, 0x20); /* poll interval */
3628 }
3629 
3630 /**
3631  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3632  *
3633  * @rdev: radeon_device pointer
3634  * @fence: radeon fence object
3635  *
3636  * Emits a fence sequnce number on the gfx ring and flushes
3637  * GPU caches.
3638  */
3639 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3640 			     struct radeon_fence *fence)
3641 {
3642 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3643 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3644 
3645 	/* EVENT_WRITE_EOP - flush caches, send int */
3646 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3647 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3648 				 EOP_TC_ACTION_EN |
3649 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3650 				 EVENT_INDEX(5)));
3651 	radeon_ring_write(ring, addr & 0xfffffffc);
3652 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3653 	radeon_ring_write(ring, fence->seq);
3654 	radeon_ring_write(ring, 0);
3655 }
3656 
3657 /**
3658  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3659  *
3660  * @rdev: radeon_device pointer
3661  * @fence: radeon fence object
3662  *
3663  * Emits a fence sequnce number on the compute ring and flushes
3664  * GPU caches.
3665  */
3666 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3667 				 struct radeon_fence *fence)
3668 {
3669 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3670 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3671 
3672 	/* RELEASE_MEM - flush caches, send int */
3673 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3674 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3675 				 EOP_TC_ACTION_EN |
3676 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3677 				 EVENT_INDEX(5)));
3678 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3679 	radeon_ring_write(ring, addr & 0xfffffffc);
3680 	radeon_ring_write(ring, upper_32_bits(addr));
3681 	radeon_ring_write(ring, fence->seq);
3682 	radeon_ring_write(ring, 0);
3683 }
3684 
3685 /**
3686  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3687  *
3688  * @rdev: radeon_device pointer
3689  * @ring: radeon ring buffer object
3690  * @semaphore: radeon semaphore object
3691  * @emit_wait: Is this a sempahore wait?
3692  *
3693  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3694  * from running ahead of semaphore waits.
3695  */
3696 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3697 			     struct radeon_ring *ring,
3698 			     struct radeon_semaphore *semaphore,
3699 			     bool emit_wait)
3700 {
3701 	uint64_t addr = semaphore->gpu_addr;
3702 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3703 
3704 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3705 	radeon_ring_write(ring, lower_32_bits(addr));
3706 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3707 
3708 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3709 		/* Prevent the PFP from running ahead of the semaphore wait */
3710 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3711 		radeon_ring_write(ring, 0x0);
3712 	}
3713 
3714 	return true;
3715 }
3716 
3717 /**
3718  * cik_copy_cpdma - copy pages using the CP DMA engine
3719  *
3720  * @rdev: radeon_device pointer
3721  * @src_offset: src GPU address
3722  * @dst_offset: dst GPU address
3723  * @num_gpu_pages: number of GPU pages to xfer
3724  * @fence: radeon fence object
3725  *
3726  * Copy GPU paging using the CP DMA engine (CIK+).
3727  * Used by the radeon ttm implementation to move pages if
3728  * registered as the asic copy callback.
3729  */
3730 int cik_copy_cpdma(struct radeon_device *rdev,
3731 		   uint64_t src_offset, uint64_t dst_offset,
3732 		   unsigned num_gpu_pages,
3733 		   struct radeon_fence **fence)
3734 {
3735 	struct radeon_semaphore *sem = NULL;
3736 	int ring_index = rdev->asic->copy.blit_ring_index;
3737 	struct radeon_ring *ring = &rdev->ring[ring_index];
3738 	u32 size_in_bytes, cur_size_in_bytes, control;
3739 	int i, num_loops;
3740 	int r = 0;
3741 
3742 	r = radeon_semaphore_create(rdev, &sem);
3743 	if (r) {
3744 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3745 		return r;
3746 	}
3747 
3748 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3749 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3750 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3751 	if (r) {
3752 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3753 		radeon_semaphore_free(rdev, &sem, NULL);
3754 		return r;
3755 	}
3756 
3757 	radeon_semaphore_sync_to(sem, *fence);
3758 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3759 
3760 	for (i = 0; i < num_loops; i++) {
3761 		cur_size_in_bytes = size_in_bytes;
3762 		if (cur_size_in_bytes > 0x1fffff)
3763 			cur_size_in_bytes = 0x1fffff;
3764 		size_in_bytes -= cur_size_in_bytes;
3765 		control = 0;
3766 		if (size_in_bytes == 0)
3767 			control |= PACKET3_DMA_DATA_CP_SYNC;
3768 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3769 		radeon_ring_write(ring, control);
3770 		radeon_ring_write(ring, lower_32_bits(src_offset));
3771 		radeon_ring_write(ring, upper_32_bits(src_offset));
3772 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3773 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3774 		radeon_ring_write(ring, cur_size_in_bytes);
3775 		src_offset += cur_size_in_bytes;
3776 		dst_offset += cur_size_in_bytes;
3777 	}
3778 
3779 	r = radeon_fence_emit(rdev, fence, ring->idx);
3780 	if (r) {
3781 		radeon_ring_unlock_undo(rdev, ring);
3782 		radeon_semaphore_free(rdev, &sem, NULL);
3783 		return r;
3784 	}
3785 
3786 	radeon_ring_unlock_commit(rdev, ring, false);
3787 	radeon_semaphore_free(rdev, &sem, *fence);
3788 
3789 	return r;
3790 }
3791 
3792 /*
3793  * IB stuff
3794  */
3795 /**
3796  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3797  *
3798  * @rdev: radeon_device pointer
3799  * @ib: radeon indirect buffer object
3800  *
3801  * Emits an DE (drawing engine) or CE (constant engine) IB
3802  * on the gfx ring.  IBs are usually generated by userspace
3803  * acceleration drivers and submitted to the kernel for
3804  * sheduling on the ring.  This function schedules the IB
3805  * on the gfx ring for execution by the GPU.
3806  */
3807 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3808 {
3809 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3810 	u32 header, control = INDIRECT_BUFFER_VALID;
3811 
3812 	if (ib->is_const_ib) {
3813 		/* set switch buffer packet before const IB */
3814 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3815 		radeon_ring_write(ring, 0);
3816 
3817 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3818 	} else {
3819 		u32 next_rptr;
3820 		if (ring->rptr_save_reg) {
3821 			next_rptr = ring->wptr + 3 + 4;
3822 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3823 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3824 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3825 			radeon_ring_write(ring, next_rptr);
3826 		} else if (rdev->wb.enabled) {
3827 			next_rptr = ring->wptr + 5 + 4;
3828 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3829 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3830 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3831 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3832 			radeon_ring_write(ring, next_rptr);
3833 		}
3834 
3835 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3836 	}
3837 
3838 	control |= ib->length_dw |
3839 		(ib->vm ? (ib->vm->id << 24) : 0);
3840 
3841 	radeon_ring_write(ring, header);
3842 	radeon_ring_write(ring,
3843 #ifdef __BIG_ENDIAN
3844 			  (2 << 0) |
3845 #endif
3846 			  (ib->gpu_addr & 0xFFFFFFFC));
3847 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3848 	radeon_ring_write(ring, control);
3849 }
3850 
3851 /**
3852  * cik_ib_test - basic gfx ring IB test
3853  *
3854  * @rdev: radeon_device pointer
3855  * @ring: radeon_ring structure holding ring information
3856  *
3857  * Allocate an IB and execute it on the gfx ring (CIK).
3858  * Provides a basic gfx ring test to verify that IBs are working.
3859  * Returns 0 on success, error on failure.
3860  */
3861 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3862 {
3863 	struct radeon_ib ib;
3864 	uint32_t scratch;
3865 	uint32_t tmp = 0;
3866 	unsigned i;
3867 	int r;
3868 
3869 	r = radeon_scratch_get(rdev, &scratch);
3870 	if (r) {
3871 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3872 		return r;
3873 	}
3874 	WREG32(scratch, 0xCAFEDEAD);
3875 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3876 	if (r) {
3877 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3878 		radeon_scratch_free(rdev, scratch);
3879 		return r;
3880 	}
3881 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3882 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3883 	ib.ptr[2] = 0xDEADBEEF;
3884 	ib.length_dw = 3;
3885 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3886 	if (r) {
3887 		radeon_scratch_free(rdev, scratch);
3888 		radeon_ib_free(rdev, &ib);
3889 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3890 		return r;
3891 	}
3892 	r = radeon_fence_wait(ib.fence, false);
3893 	if (r) {
3894 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3895 		radeon_scratch_free(rdev, scratch);
3896 		radeon_ib_free(rdev, &ib);
3897 		return r;
3898 	}
3899 	for (i = 0; i < rdev->usec_timeout; i++) {
3900 		tmp = RREG32(scratch);
3901 		if (tmp == 0xDEADBEEF)
3902 			break;
3903 		DRM_UDELAY(1);
3904 	}
3905 	if (i < rdev->usec_timeout) {
3906 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3907 	} else {
3908 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3909 			  scratch, tmp);
3910 		r = -EINVAL;
3911 	}
3912 	radeon_scratch_free(rdev, scratch);
3913 	radeon_ib_free(rdev, &ib);
3914 	return r;
3915 }
3916 
3917 /*
3918  * CP.
3919  * On CIK, gfx and compute now have independant command processors.
3920  *
3921  * GFX
3922  * Gfx consists of a single ring and can process both gfx jobs and
3923  * compute jobs.  The gfx CP consists of three microengines (ME):
3924  * PFP - Pre-Fetch Parser
3925  * ME - Micro Engine
3926  * CE - Constant Engine
3927  * The PFP and ME make up what is considered the Drawing Engine (DE).
3928  * The CE is an asynchronous engine used for updating buffer desciptors
3929  * used by the DE so that they can be loaded into cache in parallel
3930  * while the DE is processing state update packets.
3931  *
3932  * Compute
3933  * The compute CP consists of two microengines (ME):
3934  * MEC1 - Compute MicroEngine 1
3935  * MEC2 - Compute MicroEngine 2
3936  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3937  * The queues are exposed to userspace and are programmed directly
3938  * by the compute runtime.
3939  */
3940 /**
3941  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3942  *
3943  * @rdev: radeon_device pointer
3944  * @enable: enable or disable the MEs
3945  *
3946  * Halts or unhalts the gfx MEs.
3947  */
3948 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3949 {
3950 	if (enable)
3951 		WREG32(CP_ME_CNTL, 0);
3952 	else {
3953 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3954 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3955 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3956 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3957 	}
3958 	udelay(50);
3959 }
3960 
3961 /**
3962  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3963  *
3964  * @rdev: radeon_device pointer
3965  *
3966  * Loads the gfx PFP, ME, and CE ucode.
3967  * Returns 0 for success, -EINVAL if the ucode is not available.
3968  */
3969 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3970 {
3971 	const __be32 *fw_data;
3972 	int i;
3973 
3974 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3975 		return -EINVAL;
3976 
3977 	cik_cp_gfx_enable(rdev, false);
3978 
3979 	/* PFP */
3980 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3981 	WREG32(CP_PFP_UCODE_ADDR, 0);
3982 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3983 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3984 	WREG32(CP_PFP_UCODE_ADDR, 0);
3985 
3986 	/* CE */
3987 	fw_data = (const __be32 *)rdev->ce_fw->data;
3988 	WREG32(CP_CE_UCODE_ADDR, 0);
3989 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3990 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3991 	WREG32(CP_CE_UCODE_ADDR, 0);
3992 
3993 	/* ME */
3994 	fw_data = (const __be32 *)rdev->me_fw->data;
3995 	WREG32(CP_ME_RAM_WADDR, 0);
3996 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3997 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3998 	WREG32(CP_ME_RAM_WADDR, 0);
3999 
4000 	WREG32(CP_PFP_UCODE_ADDR, 0);
4001 	WREG32(CP_CE_UCODE_ADDR, 0);
4002 	WREG32(CP_ME_RAM_WADDR, 0);
4003 	WREG32(CP_ME_RAM_RADDR, 0);
4004 	return 0;
4005 }
4006 
4007 /**
4008  * cik_cp_gfx_start - start the gfx ring
4009  *
4010  * @rdev: radeon_device pointer
4011  *
4012  * Enables the ring and loads the clear state context and other
4013  * packets required to init the ring.
4014  * Returns 0 for success, error for failure.
4015  */
4016 static int cik_cp_gfx_start(struct radeon_device *rdev)
4017 {
4018 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4019 	int r, i;
4020 
4021 	/* init the CP */
4022 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4023 	WREG32(CP_ENDIAN_SWAP, 0);
4024 	WREG32(CP_DEVICE_ID, 1);
4025 
4026 	cik_cp_gfx_enable(rdev, true);
4027 
4028 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4029 	if (r) {
4030 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4031 		return r;
4032 	}
4033 
4034 	/* init the CE partitions.  CE only used for gfx on CIK */
4035 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4036 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4037 	radeon_ring_write(ring, 0x8000);
4038 	radeon_ring_write(ring, 0x8000);
4039 
4040 	/* setup clear context state */
4041 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4042 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4043 
4044 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4045 	radeon_ring_write(ring, 0x80000000);
4046 	radeon_ring_write(ring, 0x80000000);
4047 
4048 	for (i = 0; i < cik_default_size; i++)
4049 		radeon_ring_write(ring, cik_default_state[i]);
4050 
4051 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4052 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4053 
4054 	/* set clear context state */
4055 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4056 	radeon_ring_write(ring, 0);
4057 
4058 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4059 	radeon_ring_write(ring, 0x00000316);
4060 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4061 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4062 
4063 	radeon_ring_unlock_commit(rdev, ring, false);
4064 
4065 	return 0;
4066 }
4067 
4068 /**
4069  * cik_cp_gfx_fini - stop the gfx ring
4070  *
4071  * @rdev: radeon_device pointer
4072  *
4073  * Stop the gfx ring and tear down the driver ring
4074  * info.
4075  */
4076 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4077 {
4078 	cik_cp_gfx_enable(rdev, false);
4079 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4080 }
4081 
4082 /**
4083  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4084  *
4085  * @rdev: radeon_device pointer
4086  *
4087  * Program the location and size of the gfx ring buffer
4088  * and test it to make sure it's working.
4089  * Returns 0 for success, error for failure.
4090  */
4091 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4092 {
4093 	struct radeon_ring *ring;
4094 	u32 tmp;
4095 	u32 rb_bufsz;
4096 	u64 rb_addr;
4097 	int r;
4098 
4099 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4100 	if (rdev->family != CHIP_HAWAII)
4101 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4102 
4103 	/* Set the write pointer delay */
4104 	WREG32(CP_RB_WPTR_DELAY, 0);
4105 
4106 	/* set the RB to use vmid 0 */
4107 	WREG32(CP_RB_VMID, 0);
4108 
4109 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4110 
4111 	/* ring 0 - compute and gfx */
4112 	/* Set ring buffer size */
4113 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4114 	rb_bufsz = order_base_2(ring->ring_size / 8);
4115 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4116 #ifdef __BIG_ENDIAN
4117 	tmp |= BUF_SWAP_32BIT;
4118 #endif
4119 	WREG32(CP_RB0_CNTL, tmp);
4120 
4121 	/* Initialize the ring buffer's read and write pointers */
4122 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4123 	ring->wptr = 0;
4124 	WREG32(CP_RB0_WPTR, ring->wptr);
4125 
4126 	/* set the wb address wether it's enabled or not */
4127 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4128 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4129 
4130 	/* scratch register shadowing is no longer supported */
4131 	WREG32(SCRATCH_UMSK, 0);
4132 
4133 	if (!rdev->wb.enabled)
4134 		tmp |= RB_NO_UPDATE;
4135 
4136 	mdelay(1);
4137 	WREG32(CP_RB0_CNTL, tmp);
4138 
4139 	rb_addr = ring->gpu_addr >> 8;
4140 	WREG32(CP_RB0_BASE, rb_addr);
4141 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4142 
4143 	/* start the ring */
4144 	cik_cp_gfx_start(rdev);
4145 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4146 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4147 	if (r) {
4148 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4149 		return r;
4150 	}
4151 
4152 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4153 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4154 
4155 	return 0;
4156 }
4157 
4158 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4159 		     struct radeon_ring *ring)
4160 {
4161 	u32 rptr;
4162 
4163 	if (rdev->wb.enabled)
4164 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4165 	else
4166 		rptr = RREG32(CP_RB0_RPTR);
4167 
4168 	return rptr;
4169 }
4170 
4171 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4172 		     struct radeon_ring *ring)
4173 {
4174 	u32 wptr;
4175 
4176 	wptr = RREG32(CP_RB0_WPTR);
4177 
4178 	return wptr;
4179 }
4180 
4181 void cik_gfx_set_wptr(struct radeon_device *rdev,
4182 		      struct radeon_ring *ring)
4183 {
4184 	WREG32(CP_RB0_WPTR, ring->wptr);
4185 	(void)RREG32(CP_RB0_WPTR);
4186 }
4187 
4188 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4189 			 struct radeon_ring *ring)
4190 {
4191 	u32 rptr;
4192 
4193 	if (rdev->wb.enabled) {
4194 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4195 	} else {
4196 		spin_lock(&rdev->srbm_mutex);
4197 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4198 		rptr = RREG32(CP_HQD_PQ_RPTR);
4199 		cik_srbm_select(rdev, 0, 0, 0, 0);
4200 		spin_unlock(&rdev->srbm_mutex);
4201 	}
4202 
4203 	return rptr;
4204 }
4205 
4206 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4207 			 struct radeon_ring *ring)
4208 {
4209 	u32 wptr;
4210 
4211 	if (rdev->wb.enabled) {
4212 		/* XXX check if swapping is necessary on BE */
4213 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4214 	} else {
4215 		spin_lock(&rdev->srbm_mutex);
4216 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4217 		wptr = RREG32(CP_HQD_PQ_WPTR);
4218 		cik_srbm_select(rdev, 0, 0, 0, 0);
4219 		spin_unlock(&rdev->srbm_mutex);
4220 	}
4221 
4222 	return wptr;
4223 }
4224 
4225 void cik_compute_set_wptr(struct radeon_device *rdev,
4226 			  struct radeon_ring *ring)
4227 {
4228 	/* XXX check if swapping is necessary on BE */
4229 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4230 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4231 }
4232 
4233 /**
4234  * cik_cp_compute_enable - enable/disable the compute CP MEs
4235  *
4236  * @rdev: radeon_device pointer
4237  * @enable: enable or disable the MEs
4238  *
4239  * Halts or unhalts the compute MEs.
4240  */
4241 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4242 {
4243 	if (enable)
4244 		WREG32(CP_MEC_CNTL, 0);
4245 	else {
4246 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4247 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4248 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4249 	}
4250 	udelay(50);
4251 }
4252 
4253 /**
4254  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4255  *
4256  * @rdev: radeon_device pointer
4257  *
4258  * Loads the compute MEC1&2 ucode.
4259  * Returns 0 for success, -EINVAL if the ucode is not available.
4260  */
4261 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4262 {
4263 	const __be32 *fw_data;
4264 	int i;
4265 
4266 	if (!rdev->mec_fw)
4267 		return -EINVAL;
4268 
4269 	cik_cp_compute_enable(rdev, false);
4270 
4271 	/* MEC1 */
4272 	fw_data = (const __be32 *)rdev->mec_fw->data;
4273 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4274 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4275 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4276 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4277 
4278 	if (rdev->family == CHIP_KAVERI) {
4279 		/* MEC2 */
4280 		fw_data = (const __be32 *)rdev->mec_fw->data;
4281 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4282 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4283 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4284 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4285 	}
4286 
4287 	return 0;
4288 }
4289 
4290 /**
4291  * cik_cp_compute_start - start the compute queues
4292  *
4293  * @rdev: radeon_device pointer
4294  *
4295  * Enable the compute queues.
4296  * Returns 0 for success, error for failure.
4297  */
4298 static int cik_cp_compute_start(struct radeon_device *rdev)
4299 {
4300 	cik_cp_compute_enable(rdev, true);
4301 
4302 	return 0;
4303 }
4304 
4305 /**
4306  * cik_cp_compute_fini - stop the compute queues
4307  *
4308  * @rdev: radeon_device pointer
4309  *
4310  * Stop the compute queues and tear down the driver queue
4311  * info.
4312  */
4313 static void cik_cp_compute_fini(struct radeon_device *rdev)
4314 {
4315 	int i, idx, r;
4316 
4317 	cik_cp_compute_enable(rdev, false);
4318 
4319 	for (i = 0; i < 2; i++) {
4320 		if (i == 0)
4321 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4322 		else
4323 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4324 
4325 		if (rdev->ring[idx].mqd_obj) {
4326 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4327 			if (unlikely(r != 0))
4328 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4329 
4330 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4331 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4332 
4333 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4334 			rdev->ring[idx].mqd_obj = NULL;
4335 		}
4336 	}
4337 }
4338 
4339 static void cik_mec_fini(struct radeon_device *rdev)
4340 {
4341 	int r;
4342 
4343 	if (rdev->mec.hpd_eop_obj) {
4344 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4345 		if (unlikely(r != 0))
4346 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4347 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4348 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4349 
4350 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4351 		rdev->mec.hpd_eop_obj = NULL;
4352 	}
4353 }
4354 
4355 #define MEC_HPD_SIZE 2048
4356 
4357 static int cik_mec_init(struct radeon_device *rdev)
4358 {
4359 	int r;
4360 	u32 *hpd;
4361 
4362 	/*
4363 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4364 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4365 	 */
4366 	if (rdev->family == CHIP_KAVERI)
4367 		rdev->mec.num_mec = 2;
4368 	else
4369 		rdev->mec.num_mec = 1;
4370 	rdev->mec.num_pipe = 4;
4371 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4372 
4373 	if (rdev->mec.hpd_eop_obj == NULL) {
4374 		r = radeon_bo_create(rdev,
4375 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4376 				     PAGE_SIZE, true,
4377 				     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4378 				     &rdev->mec.hpd_eop_obj);
4379 		if (r) {
4380 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4381 			return r;
4382 		}
4383 	}
4384 
4385 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4386 	if (unlikely(r != 0)) {
4387 		cik_mec_fini(rdev);
4388 		return r;
4389 	}
4390 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4391 			  &rdev->mec.hpd_eop_gpu_addr);
4392 	if (r) {
4393 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4394 		cik_mec_fini(rdev);
4395 		return r;
4396 	}
4397 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4398 	if (r) {
4399 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4400 		cik_mec_fini(rdev);
4401 		return r;
4402 	}
4403 
4404 	/* clear memory.  Not sure if this is required or not */
4405 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4406 
4407 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4408 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4409 
4410 	return 0;
4411 }
4412 
4413 struct hqd_registers
4414 {
4415 	u32 cp_mqd_base_addr;
4416 	u32 cp_mqd_base_addr_hi;
4417 	u32 cp_hqd_active;
4418 	u32 cp_hqd_vmid;
4419 	u32 cp_hqd_persistent_state;
4420 	u32 cp_hqd_pipe_priority;
4421 	u32 cp_hqd_queue_priority;
4422 	u32 cp_hqd_quantum;
4423 	u32 cp_hqd_pq_base;
4424 	u32 cp_hqd_pq_base_hi;
4425 	u32 cp_hqd_pq_rptr;
4426 	u32 cp_hqd_pq_rptr_report_addr;
4427 	u32 cp_hqd_pq_rptr_report_addr_hi;
4428 	u32 cp_hqd_pq_wptr_poll_addr;
4429 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4430 	u32 cp_hqd_pq_doorbell_control;
4431 	u32 cp_hqd_pq_wptr;
4432 	u32 cp_hqd_pq_control;
4433 	u32 cp_hqd_ib_base_addr;
4434 	u32 cp_hqd_ib_base_addr_hi;
4435 	u32 cp_hqd_ib_rptr;
4436 	u32 cp_hqd_ib_control;
4437 	u32 cp_hqd_iq_timer;
4438 	u32 cp_hqd_iq_rptr;
4439 	u32 cp_hqd_dequeue_request;
4440 	u32 cp_hqd_dma_offload;
4441 	u32 cp_hqd_sema_cmd;
4442 	u32 cp_hqd_msg_type;
4443 	u32 cp_hqd_atomic0_preop_lo;
4444 	u32 cp_hqd_atomic0_preop_hi;
4445 	u32 cp_hqd_atomic1_preop_lo;
4446 	u32 cp_hqd_atomic1_preop_hi;
4447 	u32 cp_hqd_hq_scheduler0;
4448 	u32 cp_hqd_hq_scheduler1;
4449 	u32 cp_mqd_control;
4450 };
4451 
4452 struct bonaire_mqd
4453 {
4454 	u32 header;
4455 	u32 dispatch_initiator;
4456 	u32 dimensions[3];
4457 	u32 start_idx[3];
4458 	u32 num_threads[3];
4459 	u32 pipeline_stat_enable;
4460 	u32 perf_counter_enable;
4461 	u32 pgm[2];
4462 	u32 tba[2];
4463 	u32 tma[2];
4464 	u32 pgm_rsrc[2];
4465 	u32 vmid;
4466 	u32 resource_limits;
4467 	u32 static_thread_mgmt01[2];
4468 	u32 tmp_ring_size;
4469 	u32 static_thread_mgmt23[2];
4470 	u32 restart[3];
4471 	u32 thread_trace_enable;
4472 	u32 reserved1;
4473 	u32 user_data[16];
4474 	u32 vgtcs_invoke_count[2];
4475 	struct hqd_registers queue_state;
4476 	u32 dequeue_cntr;
4477 	u32 interrupt_queue[64];
4478 };
4479 
4480 /**
4481  * cik_cp_compute_resume - setup the compute queue registers
4482  *
4483  * @rdev: radeon_device pointer
4484  *
4485  * Program the compute queues and test them to make sure they
4486  * are working.
4487  * Returns 0 for success, error for failure.
4488  */
4489 static int cik_cp_compute_resume(struct radeon_device *rdev)
4490 {
4491 	int r, i, j, idx;
4492 	u32 tmp;
4493 	bool use_doorbell = true;
4494 	u64 hqd_gpu_addr;
4495 	u64 mqd_gpu_addr;
4496 	u64 eop_gpu_addr;
4497 	u64 wb_gpu_addr;
4498 	u32 *buf;
4499 	struct bonaire_mqd *mqd;
4500 
4501 	r = cik_cp_compute_start(rdev);
4502 	if (r)
4503 		return r;
4504 
4505 	/* fix up chicken bits */
4506 	tmp = RREG32(CP_CPF_DEBUG);
4507 	tmp |= (1 << 23);
4508 	WREG32(CP_CPF_DEBUG, tmp);
4509 
4510 	/* init the pipes */
4511 	spin_lock(&rdev->srbm_mutex);
4512 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4513 		int me = (i < 4) ? 1 : 2;
4514 		int pipe = (i < 4) ? i : (i - 4);
4515 
4516 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4517 
4518 		cik_srbm_select(rdev, me, pipe, 0, 0);
4519 
4520 		/* write the EOP addr */
4521 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4522 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4523 
4524 		/* set the VMID assigned */
4525 		WREG32(CP_HPD_EOP_VMID, 0);
4526 
4527 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4528 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4529 		tmp &= ~EOP_SIZE_MASK;
4530 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4531 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4532 	}
4533 	cik_srbm_select(rdev, 0, 0, 0, 0);
4534 	spin_unlock(&rdev->srbm_mutex);
4535 
4536 	/* init the queues.  Just two for now. */
4537 	for (i = 0; i < 2; i++) {
4538 		if (i == 0)
4539 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4540 		else
4541 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4542 
4543 		if (rdev->ring[idx].mqd_obj == NULL) {
4544 			r = radeon_bo_create(rdev,
4545 					     sizeof(struct bonaire_mqd),
4546 					     PAGE_SIZE, true,
4547 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4548 					     &rdev->ring[idx].mqd_obj);
4549 			if (r) {
4550 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4551 				return r;
4552 			}
4553 		}
4554 
4555 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4556 		if (unlikely(r != 0)) {
4557 			cik_cp_compute_fini(rdev);
4558 			return r;
4559 		}
4560 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4561 				  &mqd_gpu_addr);
4562 		if (r) {
4563 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4564 			cik_cp_compute_fini(rdev);
4565 			return r;
4566 		}
4567 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4568 		if (r) {
4569 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4570 			cik_cp_compute_fini(rdev);
4571 			return r;
4572 		}
4573 
4574 		/* init the mqd struct */
4575 		memset(buf, 0, sizeof(struct bonaire_mqd));
4576 
4577 		mqd = (struct bonaire_mqd *)buf;
4578 		mqd->header = 0xC0310800;
4579 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4580 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4581 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4582 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4583 
4584 		spin_lock(&rdev->srbm_mutex);
4585 		cik_srbm_select(rdev, rdev->ring[idx].me,
4586 				rdev->ring[idx].pipe,
4587 				rdev->ring[idx].queue, 0);
4588 
4589 		/* disable wptr polling */
4590 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4591 		tmp &= ~WPTR_POLL_EN;
4592 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4593 
4594 		/* enable doorbell? */
4595 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4596 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4597 		if (use_doorbell)
4598 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4599 		else
4600 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4601 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4602 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4603 
4604 		/* disable the queue if it's active */
4605 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4606 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4607 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4608 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4609 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4610 			for (j = 0; j < rdev->usec_timeout; j++) {
4611 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4612 					break;
4613 				udelay(1);
4614 			}
4615 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4616 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4617 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4618 		}
4619 
4620 		/* set the pointer to the MQD */
4621 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4622 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4623 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4624 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4625 		/* set MQD vmid to 0 */
4626 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4627 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4628 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4629 
4630 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4631 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4632 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4633 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4634 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4635 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4636 
4637 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4638 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4639 		mqd->queue_state.cp_hqd_pq_control &=
4640 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4641 
4642 		mqd->queue_state.cp_hqd_pq_control |=
4643 			order_base_2(rdev->ring[idx].ring_size / 8);
4644 		mqd->queue_state.cp_hqd_pq_control |=
4645 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4646 #ifdef __BIG_ENDIAN
4647 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4648 #endif
4649 		mqd->queue_state.cp_hqd_pq_control &=
4650 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4651 		mqd->queue_state.cp_hqd_pq_control |=
4652 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4653 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4654 
4655 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4656 		if (i == 0)
4657 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4658 		else
4659 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4660 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4661 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4662 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4663 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4664 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4665 
4666 		/* set the wb address wether it's enabled or not */
4667 		if (i == 0)
4668 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4669 		else
4670 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4671 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4672 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4673 			upper_32_bits(wb_gpu_addr) & 0xffff;
4674 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4675 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4676 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4677 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4678 
4679 		/* enable the doorbell if requested */
4680 		if (use_doorbell) {
4681 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4682 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4683 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4684 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4685 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4686 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4687 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4688 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4689 
4690 		} else {
4691 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4692 		}
4693 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4694 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4695 
4696 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4697 		rdev->ring[idx].wptr = 0;
4698 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4699 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4700 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4701 
4702 		/* set the vmid for the queue */
4703 		mqd->queue_state.cp_hqd_vmid = 0;
4704 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4705 
4706 		/* activate the queue */
4707 		mqd->queue_state.cp_hqd_active = 1;
4708 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4709 
4710 		cik_srbm_select(rdev, 0, 0, 0, 0);
4711 		spin_unlock(&rdev->srbm_mutex);
4712 
4713 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4714 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4715 
4716 		rdev->ring[idx].ready = true;
4717 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4718 		if (r)
4719 			rdev->ring[idx].ready = false;
4720 	}
4721 
4722 	return 0;
4723 }
4724 
4725 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4726 {
4727 	cik_cp_gfx_enable(rdev, enable);
4728 	cik_cp_compute_enable(rdev, enable);
4729 }
4730 
4731 static int cik_cp_load_microcode(struct radeon_device *rdev)
4732 {
4733 	int r;
4734 
4735 	r = cik_cp_gfx_load_microcode(rdev);
4736 	if (r)
4737 		return r;
4738 	r = cik_cp_compute_load_microcode(rdev);
4739 	if (r)
4740 		return r;
4741 
4742 	return 0;
4743 }
4744 
4745 static void cik_cp_fini(struct radeon_device *rdev)
4746 {
4747 	cik_cp_gfx_fini(rdev);
4748 	cik_cp_compute_fini(rdev);
4749 }
4750 
4751 static int cik_cp_resume(struct radeon_device *rdev)
4752 {
4753 	int r;
4754 
4755 	cik_enable_gui_idle_interrupt(rdev, false);
4756 
4757 	r = cik_cp_load_microcode(rdev);
4758 	if (r)
4759 		return r;
4760 
4761 	r = cik_cp_gfx_resume(rdev);
4762 	if (r)
4763 		return r;
4764 	r = cik_cp_compute_resume(rdev);
4765 	if (r)
4766 		return r;
4767 
4768 	cik_enable_gui_idle_interrupt(rdev, true);
4769 
4770 	return 0;
4771 }
4772 
4773 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4774 {
4775 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4776 		RREG32(GRBM_STATUS));
4777 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4778 		RREG32(GRBM_STATUS2));
4779 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4780 		RREG32(GRBM_STATUS_SE0));
4781 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4782 		RREG32(GRBM_STATUS_SE1));
4783 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4784 		RREG32(GRBM_STATUS_SE2));
4785 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4786 		RREG32(GRBM_STATUS_SE3));
4787 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4788 		RREG32(SRBM_STATUS));
4789 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4790 		RREG32(SRBM_STATUS2));
4791 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4792 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4793 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4794 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4795 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4796 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4797 		 RREG32(CP_STALLED_STAT1));
4798 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4799 		 RREG32(CP_STALLED_STAT2));
4800 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4801 		 RREG32(CP_STALLED_STAT3));
4802 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4803 		 RREG32(CP_CPF_BUSY_STAT));
4804 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4805 		 RREG32(CP_CPF_STALLED_STAT1));
4806 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4807 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4808 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4809 		 RREG32(CP_CPC_STALLED_STAT1));
4810 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4811 }
4812 
4813 /**
4814  * cik_gpu_check_soft_reset - check which blocks are busy
4815  *
4816  * @rdev: radeon_device pointer
4817  *
4818  * Check which blocks are busy and return the relevant reset
4819  * mask to be used by cik_gpu_soft_reset().
4820  * Returns a mask of the blocks to be reset.
4821  */
4822 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4823 {
4824 	u32 reset_mask = 0;
4825 	u32 tmp;
4826 
4827 	/* GRBM_STATUS */
4828 	tmp = RREG32(GRBM_STATUS);
4829 	if (tmp & (PA_BUSY | SC_BUSY |
4830 		   BCI_BUSY | SX_BUSY |
4831 		   TA_BUSY | VGT_BUSY |
4832 		   DB_BUSY | CB_BUSY |
4833 		   GDS_BUSY | SPI_BUSY |
4834 		   IA_BUSY | IA_BUSY_NO_DMA))
4835 		reset_mask |= RADEON_RESET_GFX;
4836 
4837 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4838 		reset_mask |= RADEON_RESET_CP;
4839 
4840 	/* GRBM_STATUS2 */
4841 	tmp = RREG32(GRBM_STATUS2);
4842 	if (tmp & RLC_BUSY)
4843 		reset_mask |= RADEON_RESET_RLC;
4844 
4845 	/* SDMA0_STATUS_REG */
4846 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4847 	if (!(tmp & SDMA_IDLE))
4848 		reset_mask |= RADEON_RESET_DMA;
4849 
4850 	/* SDMA1_STATUS_REG */
4851 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4852 	if (!(tmp & SDMA_IDLE))
4853 		reset_mask |= RADEON_RESET_DMA1;
4854 
4855 	/* SRBM_STATUS2 */
4856 	tmp = RREG32(SRBM_STATUS2);
4857 	if (tmp & SDMA_BUSY)
4858 		reset_mask |= RADEON_RESET_DMA;
4859 
4860 	if (tmp & SDMA1_BUSY)
4861 		reset_mask |= RADEON_RESET_DMA1;
4862 
4863 	/* SRBM_STATUS */
4864 	tmp = RREG32(SRBM_STATUS);
4865 
4866 	if (tmp & IH_BUSY)
4867 		reset_mask |= RADEON_RESET_IH;
4868 
4869 	if (tmp & SEM_BUSY)
4870 		reset_mask |= RADEON_RESET_SEM;
4871 
4872 	if (tmp & GRBM_RQ_PENDING)
4873 		reset_mask |= RADEON_RESET_GRBM;
4874 
4875 	if (tmp & VMC_BUSY)
4876 		reset_mask |= RADEON_RESET_VMC;
4877 
4878 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4879 		   MCC_BUSY | MCD_BUSY))
4880 		reset_mask |= RADEON_RESET_MC;
4881 
4882 	if (evergreen_is_display_hung(rdev))
4883 		reset_mask |= RADEON_RESET_DISPLAY;
4884 
4885 	/* Skip MC reset as it's mostly likely not hung, just busy */
4886 	if (reset_mask & RADEON_RESET_MC) {
4887 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4888 		reset_mask &= ~RADEON_RESET_MC;
4889 	}
4890 
4891 	return reset_mask;
4892 }
4893 
4894 /**
4895  * cik_gpu_soft_reset - soft reset GPU
4896  *
4897  * @rdev: radeon_device pointer
4898  * @reset_mask: mask of which blocks to reset
4899  *
4900  * Soft reset the blocks specified in @reset_mask.
4901  */
4902 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4903 {
4904 	struct evergreen_mc_save save;
4905 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4906 	u32 tmp;
4907 
4908 	if (reset_mask == 0)
4909 		return;
4910 
4911 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4912 
4913 	cik_print_gpu_status_regs(rdev);
4914 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4915 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4916 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4917 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4918 
4919 	/* disable CG/PG */
4920 	cik_fini_pg(rdev);
4921 	cik_fini_cg(rdev);
4922 
4923 	/* stop the rlc */
4924 	cik_rlc_stop(rdev);
4925 
4926 	/* Disable GFX parsing/prefetching */
4927 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4928 
4929 	/* Disable MEC parsing/prefetching */
4930 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4931 
4932 	if (reset_mask & RADEON_RESET_DMA) {
4933 		/* sdma0 */
4934 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4935 		tmp |= SDMA_HALT;
4936 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4937 	}
4938 	if (reset_mask & RADEON_RESET_DMA1) {
4939 		/* sdma1 */
4940 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4941 		tmp |= SDMA_HALT;
4942 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4943 	}
4944 
4945 	evergreen_mc_stop(rdev, &save);
4946 	if (evergreen_mc_wait_for_idle(rdev)) {
4947 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4948 	}
4949 
4950 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4951 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4952 
4953 	if (reset_mask & RADEON_RESET_CP) {
4954 		grbm_soft_reset |= SOFT_RESET_CP;
4955 
4956 		srbm_soft_reset |= SOFT_RESET_GRBM;
4957 	}
4958 
4959 	if (reset_mask & RADEON_RESET_DMA)
4960 		srbm_soft_reset |= SOFT_RESET_SDMA;
4961 
4962 	if (reset_mask & RADEON_RESET_DMA1)
4963 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4964 
4965 	if (reset_mask & RADEON_RESET_DISPLAY)
4966 		srbm_soft_reset |= SOFT_RESET_DC;
4967 
4968 	if (reset_mask & RADEON_RESET_RLC)
4969 		grbm_soft_reset |= SOFT_RESET_RLC;
4970 
4971 	if (reset_mask & RADEON_RESET_SEM)
4972 		srbm_soft_reset |= SOFT_RESET_SEM;
4973 
4974 	if (reset_mask & RADEON_RESET_IH)
4975 		srbm_soft_reset |= SOFT_RESET_IH;
4976 
4977 	if (reset_mask & RADEON_RESET_GRBM)
4978 		srbm_soft_reset |= SOFT_RESET_GRBM;
4979 
4980 	if (reset_mask & RADEON_RESET_VMC)
4981 		srbm_soft_reset |= SOFT_RESET_VMC;
4982 
4983 	if (!(rdev->flags & RADEON_IS_IGP)) {
4984 		if (reset_mask & RADEON_RESET_MC)
4985 			srbm_soft_reset |= SOFT_RESET_MC;
4986 	}
4987 
4988 	if (grbm_soft_reset) {
4989 		tmp = RREG32(GRBM_SOFT_RESET);
4990 		tmp |= grbm_soft_reset;
4991 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4992 		WREG32(GRBM_SOFT_RESET, tmp);
4993 		tmp = RREG32(GRBM_SOFT_RESET);
4994 
4995 		udelay(50);
4996 
4997 		tmp &= ~grbm_soft_reset;
4998 		WREG32(GRBM_SOFT_RESET, tmp);
4999 		tmp = RREG32(GRBM_SOFT_RESET);
5000 	}
5001 
5002 	if (srbm_soft_reset) {
5003 		tmp = RREG32(SRBM_SOFT_RESET);
5004 		tmp |= srbm_soft_reset;
5005 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5006 		WREG32(SRBM_SOFT_RESET, tmp);
5007 		tmp = RREG32(SRBM_SOFT_RESET);
5008 
5009 		udelay(50);
5010 
5011 		tmp &= ~srbm_soft_reset;
5012 		WREG32(SRBM_SOFT_RESET, tmp);
5013 		tmp = RREG32(SRBM_SOFT_RESET);
5014 	}
5015 
5016 	/* Wait a little for things to settle down */
5017 	udelay(50);
5018 
5019 	evergreen_mc_resume(rdev, &save);
5020 	udelay(50);
5021 
5022 	cik_print_gpu_status_regs(rdev);
5023 }
5024 
5025 struct kv_reset_save_regs {
5026 	u32 gmcon_reng_execute;
5027 	u32 gmcon_misc;
5028 	u32 gmcon_misc3;
5029 };
5030 
5031 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5032 				   struct kv_reset_save_regs *save)
5033 {
5034 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5035 	save->gmcon_misc = RREG32(GMCON_MISC);
5036 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5037 
5038 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5039 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5040 						STCTRL_STUTTER_EN));
5041 }
5042 
5043 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5044 				      struct kv_reset_save_regs *save)
5045 {
5046 	int i;
5047 
5048 	WREG32(GMCON_PGFSM_WRITE, 0);
5049 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5050 
5051 	for (i = 0; i < 5; i++)
5052 		WREG32(GMCON_PGFSM_WRITE, 0);
5053 
5054 	WREG32(GMCON_PGFSM_WRITE, 0);
5055 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5056 
5057 	for (i = 0; i < 5; i++)
5058 		WREG32(GMCON_PGFSM_WRITE, 0);
5059 
5060 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5061 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5062 
5063 	for (i = 0; i < 5; i++)
5064 		WREG32(GMCON_PGFSM_WRITE, 0);
5065 
5066 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5067 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5068 
5069 	for (i = 0; i < 5; i++)
5070 		WREG32(GMCON_PGFSM_WRITE, 0);
5071 
5072 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5073 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5074 
5075 	for (i = 0; i < 5; i++)
5076 		WREG32(GMCON_PGFSM_WRITE, 0);
5077 
5078 	WREG32(GMCON_PGFSM_WRITE, 0);
5079 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5080 
5081 	for (i = 0; i < 5; i++)
5082 		WREG32(GMCON_PGFSM_WRITE, 0);
5083 
5084 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5085 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5086 
5087 	for (i = 0; i < 5; i++)
5088 		WREG32(GMCON_PGFSM_WRITE, 0);
5089 
5090 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5091 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5092 
5093 	for (i = 0; i < 5; i++)
5094 		WREG32(GMCON_PGFSM_WRITE, 0);
5095 
5096 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5097 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5098 
5099 	for (i = 0; i < 5; i++)
5100 		WREG32(GMCON_PGFSM_WRITE, 0);
5101 
5102 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5103 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5104 
5105 	for (i = 0; i < 5; i++)
5106 		WREG32(GMCON_PGFSM_WRITE, 0);
5107 
5108 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5109 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5110 
5111 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5112 	WREG32(GMCON_MISC, save->gmcon_misc);
5113 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5114 }
5115 
5116 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5117 {
5118 	struct evergreen_mc_save save;
5119 	struct kv_reset_save_regs kv_save = { 0 };
5120 	u32 tmp, i;
5121 
5122 	dev_info(rdev->dev, "GPU pci config reset\n");
5123 
5124 	/* disable dpm? */
5125 
5126 	/* disable cg/pg */
5127 	cik_fini_pg(rdev);
5128 	cik_fini_cg(rdev);
5129 
5130 	/* Disable GFX parsing/prefetching */
5131 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5132 
5133 	/* Disable MEC parsing/prefetching */
5134 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5135 
5136 	/* sdma0 */
5137 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5138 	tmp |= SDMA_HALT;
5139 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5140 	/* sdma1 */
5141 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5142 	tmp |= SDMA_HALT;
5143 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5144 	/* XXX other engines? */
5145 
5146 	/* halt the rlc, disable cp internal ints */
5147 	cik_rlc_stop(rdev);
5148 
5149 	udelay(50);
5150 
5151 	/* disable mem access */
5152 	evergreen_mc_stop(rdev, &save);
5153 	if (evergreen_mc_wait_for_idle(rdev)) {
5154 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5155 	}
5156 
5157 	if (rdev->flags & RADEON_IS_IGP)
5158 		kv_save_regs_for_reset(rdev, &kv_save);
5159 
5160 	/* disable BM */
5161 	pci_disable_busmaster(rdev->pdev->dev);
5162 	/* reset */
5163 	radeon_pci_config_reset(rdev);
5164 
5165 	udelay(100);
5166 
5167 	/* wait for asic to come out of reset */
5168 	for (i = 0; i < rdev->usec_timeout; i++) {
5169 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5170 			break;
5171 		udelay(1);
5172 	}
5173 
5174 	/* does asic init need to be run first??? */
5175 	if (rdev->flags & RADEON_IS_IGP)
5176 		kv_restore_regs_for_reset(rdev, &kv_save);
5177 }
5178 
5179 /**
5180  * cik_asic_reset - soft reset GPU
5181  *
5182  * @rdev: radeon_device pointer
5183  *
5184  * Look up which blocks are hung and attempt
5185  * to reset them.
5186  * Returns 0 for success.
5187  */
5188 int cik_asic_reset(struct radeon_device *rdev)
5189 {
5190 	u32 reset_mask;
5191 
5192 	reset_mask = cik_gpu_check_soft_reset(rdev);
5193 
5194 	if (reset_mask)
5195 		r600_set_bios_scratch_engine_hung(rdev, true);
5196 
5197 	/* try soft reset */
5198 	cik_gpu_soft_reset(rdev, reset_mask);
5199 
5200 	reset_mask = cik_gpu_check_soft_reset(rdev);
5201 
5202 	/* try pci config reset */
5203 	if (reset_mask && radeon_hard_reset)
5204 		cik_gpu_pci_config_reset(rdev);
5205 
5206 	reset_mask = cik_gpu_check_soft_reset(rdev);
5207 
5208 	if (!reset_mask)
5209 		r600_set_bios_scratch_engine_hung(rdev, false);
5210 
5211 	return 0;
5212 }
5213 
5214 /**
5215  * cik_gfx_is_lockup - check if the 3D engine is locked up
5216  *
5217  * @rdev: radeon_device pointer
5218  * @ring: radeon_ring structure holding ring information
5219  *
5220  * Check if the 3D engine is locked up (CIK).
5221  * Returns true if the engine is locked, false if not.
5222  */
5223 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5224 {
5225 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5226 
5227 	if (!(reset_mask & (RADEON_RESET_GFX |
5228 			    RADEON_RESET_COMPUTE |
5229 			    RADEON_RESET_CP))) {
5230 		radeon_ring_lockup_update(rdev, ring);
5231 		return false;
5232 	}
5233 	return radeon_ring_test_lockup(rdev, ring);
5234 }
5235 
5236 /* MC */
5237 /**
5238  * cik_mc_program - program the GPU memory controller
5239  *
5240  * @rdev: radeon_device pointer
5241  *
5242  * Set the location of vram, gart, and AGP in the GPU's
5243  * physical address space (CIK).
5244  */
5245 static void cik_mc_program(struct radeon_device *rdev)
5246 {
5247 	struct evergreen_mc_save save;
5248 	u32 tmp;
5249 	int i, j;
5250 
5251 	/* Initialize HDP */
5252 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5253 		WREG32((0x2c14 + j), 0x00000000);
5254 		WREG32((0x2c18 + j), 0x00000000);
5255 		WREG32((0x2c1c + j), 0x00000000);
5256 		WREG32((0x2c20 + j), 0x00000000);
5257 		WREG32((0x2c24 + j), 0x00000000);
5258 	}
5259 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5260 
5261 	evergreen_mc_stop(rdev, &save);
5262 	if (radeon_mc_wait_for_idle(rdev)) {
5263 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5264 	}
5265 	/* Lockout access through VGA aperture*/
5266 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5267 	/* Update configuration */
5268 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5269 	       rdev->mc.vram_start >> 12);
5270 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5271 	       rdev->mc.vram_end >> 12);
5272 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5273 	       rdev->vram_scratch.gpu_addr >> 12);
5274 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5275 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5276 	WREG32(MC_VM_FB_LOCATION, tmp);
5277 	/* XXX double check these! */
5278 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5279 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5280 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5281 	WREG32(MC_VM_AGP_BASE, 0);
5282 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5283 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5284 	if (radeon_mc_wait_for_idle(rdev)) {
5285 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5286 	}
5287 	evergreen_mc_resume(rdev, &save);
5288 	/* we need to own VRAM, so turn off the VGA renderer here
5289 	 * to stop it overwriting our objects */
5290 	rv515_vga_render_disable(rdev);
5291 }
5292 
5293 /**
5294  * cik_mc_init - initialize the memory controller driver params
5295  *
5296  * @rdev: radeon_device pointer
5297  *
5298  * Look up the amount of vram, vram width, and decide how to place
5299  * vram and gart within the GPU's physical address space (CIK).
5300  * Returns 0 for success.
5301  */
5302 static int cik_mc_init(struct radeon_device *rdev)
5303 {
5304 	u32 tmp;
5305 	int chansize, numchan;
5306 
5307 	/* Get VRAM informations */
5308 	rdev->mc.vram_is_ddr = true;
5309 	tmp = RREG32(MC_ARB_RAMCFG);
5310 	if (tmp & CHANSIZE_MASK) {
5311 		chansize = 64;
5312 	} else {
5313 		chansize = 32;
5314 	}
5315 	tmp = RREG32(MC_SHARED_CHMAP);
5316 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5317 	case 0:
5318 	default:
5319 		numchan = 1;
5320 		break;
5321 	case 1:
5322 		numchan = 2;
5323 		break;
5324 	case 2:
5325 		numchan = 4;
5326 		break;
5327 	case 3:
5328 		numchan = 8;
5329 		break;
5330 	case 4:
5331 		numchan = 3;
5332 		break;
5333 	case 5:
5334 		numchan = 6;
5335 		break;
5336 	case 6:
5337 		numchan = 10;
5338 		break;
5339 	case 7:
5340 		numchan = 12;
5341 		break;
5342 	case 8:
5343 		numchan = 16;
5344 		break;
5345 	}
5346 	rdev->mc.vram_width = numchan * chansize;
5347 	/* Could aper size report 0 ? */
5348 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5349 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5350 	/* size in MB on si */
5351 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5352 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5353 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5354 	si_vram_gtt_location(rdev, &rdev->mc);
5355 	radeon_update_bandwidth_info(rdev);
5356 
5357 	return 0;
5358 }
5359 
5360 /*
5361  * GART
5362  * VMID 0 is the physical GPU addresses as used by the kernel.
5363  * VMIDs 1-15 are used for userspace clients and are handled
5364  * by the radeon vm/hsa code.
5365  */
5366 /**
5367  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5368  *
5369  * @rdev: radeon_device pointer
5370  *
5371  * Flush the TLB for the VMID 0 page table (CIK).
5372  */
5373 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5374 {
5375 	/* flush hdp cache */
5376 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5377 
5378 	/* bits 0-15 are the VM contexts0-15 */
5379 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5380 }
5381 
5382 /**
5383  * cik_pcie_gart_enable - gart enable
5384  *
5385  * @rdev: radeon_device pointer
5386  *
5387  * This sets up the TLBs, programs the page tables for VMID0,
5388  * sets up the hw for VMIDs 1-15 which are allocated on
5389  * demand, and sets up the global locations for the LDS, GDS,
5390  * and GPUVM for FSA64 clients (CIK).
5391  * Returns 0 for success, errors for failure.
5392  */
5393 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5394 {
5395 	int r, i;
5396 
5397 	if (rdev->gart.robj == NULL) {
5398 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5399 		return -EINVAL;
5400 	}
5401 	r = radeon_gart_table_vram_pin(rdev);
5402 	if (r)
5403 		return r;
5404 	/* Setup TLB control */
5405 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5406 	       (0xA << 7) |
5407 	       ENABLE_L1_TLB |
5408 	       ENABLE_L1_FRAGMENT_PROCESSING |
5409 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5410 	       ENABLE_ADVANCED_DRIVER_MODEL |
5411 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5412 	/* Setup L2 cache */
5413 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5414 	       ENABLE_L2_FRAGMENT_PROCESSING |
5415 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5416 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5417 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5418 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5419 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5420 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5421 	       BANK_SELECT(4) |
5422 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5423 	/* setup context0 */
5424 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5425 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5426 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5427 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5428 			(u32)(rdev->dummy_page.addr >> 12));
5429 	WREG32(VM_CONTEXT0_CNTL2, 0);
5430 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5431 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5432 
5433 	WREG32(0x15D4, 0);
5434 	WREG32(0x15D8, 0);
5435 	WREG32(0x15DC, 0);
5436 
5437 	/* restore context1-15 */
5438 	/* set vm size, must be a multiple of 4 */
5439 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5440 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5441 	for (i = 1; i < 16; i++) {
5442 		if (i < 8)
5443 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5444 			       rdev->vm_manager.saved_table_addr[i]);
5445 		else
5446 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5447 			       rdev->vm_manager.saved_table_addr[i]);
5448 	}
5449 
5450 	/* enable context1-15 */
5451 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5452 	       (u32)(rdev->dummy_page.addr >> 12));
5453 	WREG32(VM_CONTEXT1_CNTL2, 4);
5454 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5455 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5456 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5457 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5458 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5459 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5460 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5461 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5462 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5463 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5464 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5465 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5466 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5467 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5468 
5469 	if (rdev->family == CHIP_KAVERI) {
5470 		u32 tmp = RREG32(CHUB_CONTROL);
5471 		tmp &= ~BYPASS_VM;
5472 		WREG32(CHUB_CONTROL, tmp);
5473 	}
5474 
5475 	/* XXX SH_MEM regs */
5476 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5477 	spin_lock(&rdev->srbm_mutex);
5478 	for (i = 0; i < 16; i++) {
5479 		cik_srbm_select(rdev, 0, 0, 0, i);
5480 		/* CP and shaders */
5481 		WREG32(SH_MEM_CONFIG, 0);
5482 		WREG32(SH_MEM_APE1_BASE, 1);
5483 		WREG32(SH_MEM_APE1_LIMIT, 0);
5484 		WREG32(SH_MEM_BASES, 0);
5485 		/* SDMA GFX */
5486 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5487 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5488 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5489 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5490 		/* XXX SDMA RLC - todo */
5491 	}
5492 	cik_srbm_select(rdev, 0, 0, 0, 0);
5493 	spin_unlock(&rdev->srbm_mutex);
5494 
5495 	cik_pcie_gart_tlb_flush(rdev);
5496 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5497 		 (unsigned)(rdev->mc.gtt_size >> 20),
5498 		 (unsigned long long)rdev->gart.table_addr);
5499 	rdev->gart.ready = true;
5500 	return 0;
5501 }
5502 
5503 /**
5504  * cik_pcie_gart_disable - gart disable
5505  *
5506  * @rdev: radeon_device pointer
5507  *
5508  * This disables all VM page table (CIK).
5509  */
5510 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5511 {
5512 	unsigned i;
5513 
5514 	for (i = 1; i < 16; ++i) {
5515 		uint32_t reg;
5516 		if (i < 8)
5517 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5518 		else
5519 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5520 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5521 	}
5522 
5523 	/* Disable all tables */
5524 	WREG32(VM_CONTEXT0_CNTL, 0);
5525 	WREG32(VM_CONTEXT1_CNTL, 0);
5526 	/* Setup TLB control */
5527 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5528 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5529 	/* Setup L2 cache */
5530 	WREG32(VM_L2_CNTL,
5531 	       ENABLE_L2_FRAGMENT_PROCESSING |
5532 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5533 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5534 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5535 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5536 	WREG32(VM_L2_CNTL2, 0);
5537 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5538 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5539 	radeon_gart_table_vram_unpin(rdev);
5540 }
5541 
5542 /**
5543  * cik_pcie_gart_fini - vm fini callback
5544  *
5545  * @rdev: radeon_device pointer
5546  *
5547  * Tears down the driver GART/VM setup (CIK).
5548  */
5549 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5550 {
5551 	cik_pcie_gart_disable(rdev);
5552 	radeon_gart_table_vram_free(rdev);
5553 	radeon_gart_fini(rdev);
5554 }
5555 
5556 /* vm parser */
5557 /**
5558  * cik_ib_parse - vm ib_parse callback
5559  *
5560  * @rdev: radeon_device pointer
5561  * @ib: indirect buffer pointer
5562  *
5563  * CIK uses hw IB checking so this is a nop (CIK).
5564  */
5565 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5566 {
5567 	return 0;
5568 }
5569 
5570 /*
5571  * vm
5572  * VMID 0 is the physical GPU addresses as used by the kernel.
5573  * VMIDs 1-15 are used for userspace clients and are handled
5574  * by the radeon vm/hsa code.
5575  */
5576 /**
5577  * cik_vm_init - cik vm init callback
5578  *
5579  * @rdev: radeon_device pointer
5580  *
5581  * Inits cik specific vm parameters (number of VMs, base of vram for
5582  * VMIDs 1-15) (CIK).
5583  * Returns 0 for success.
5584  */
5585 int cik_vm_init(struct radeon_device *rdev)
5586 {
5587 	/* number of VMs */
5588 	rdev->vm_manager.nvm = 16;
5589 	/* base offset of vram pages */
5590 	if (rdev->flags & RADEON_IS_IGP) {
5591 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5592 		tmp <<= 22;
5593 		rdev->vm_manager.vram_base_offset = tmp;
5594 	} else
5595 		rdev->vm_manager.vram_base_offset = 0;
5596 
5597 	return 0;
5598 }
5599 
5600 /**
5601  * cik_vm_fini - cik vm fini callback
5602  *
5603  * @rdev: radeon_device pointer
5604  *
5605  * Tear down any asic specific VM setup (CIK).
5606  */
5607 void cik_vm_fini(struct radeon_device *rdev)
5608 {
5609 }
5610 
5611 /**
5612  * cik_vm_decode_fault - print human readable fault info
5613  *
5614  * @rdev: radeon_device pointer
5615  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5616  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5617  *
5618  * Print human readable fault information (CIK).
5619  */
5620 static void cik_vm_decode_fault(struct radeon_device *rdev,
5621 				u32 status, u32 addr, u32 mc_client)
5622 {
5623 	u32 mc_id;
5624 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5625 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5626 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5627 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5628 
5629 	if (rdev->family == CHIP_HAWAII)
5630 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5631 	else
5632 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5633 
5634 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5635 	       protections, vmid, addr,
5636 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5637 	       block, mc_client, mc_id);
5638 }
5639 
5640 /**
5641  * cik_vm_flush - cik vm flush using the CP
5642  *
5643  * @rdev: radeon_device pointer
5644  *
5645  * Update the page table base and flush the VM TLB
5646  * using the CP (CIK).
5647  */
5648 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5649 {
5650 	struct radeon_ring *ring = &rdev->ring[ridx];
5651 	int usepfp = (ridx == RADEON_RING_TYPE_GFX_INDEX);
5652 
5653 	if (vm == NULL)
5654 		return;
5655 
5656 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5657 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5658 				 WRITE_DATA_DST_SEL(0)));
5659 	if (vm->id < 8) {
5660 		radeon_ring_write(ring,
5661 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5662 	} else {
5663 		radeon_ring_write(ring,
5664 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5665 	}
5666 	radeon_ring_write(ring, 0);
5667 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5668 
5669 	/* update SH_MEM_* regs */
5670 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5671 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5672 				 WRITE_DATA_DST_SEL(0)));
5673 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5674 	radeon_ring_write(ring, 0);
5675 	radeon_ring_write(ring, VMID(vm->id));
5676 
5677 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5678 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5679 				 WRITE_DATA_DST_SEL(0)));
5680 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5681 	radeon_ring_write(ring, 0);
5682 
5683 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5684 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5685 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5686 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5687 
5688 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5689 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5690 				 WRITE_DATA_DST_SEL(0)));
5691 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5692 	radeon_ring_write(ring, 0);
5693 	radeon_ring_write(ring, VMID(0));
5694 
5695 	/* HDP flush */
5696 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5697 
5698 	/* bits 0-15 are the VM contexts0-15 */
5699 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5700 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5701 				 WRITE_DATA_DST_SEL(0)));
5702 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5703 	radeon_ring_write(ring, 0);
5704 	radeon_ring_write(ring, 1 << vm->id);
5705 
5706 	/* compute doesn't have PFP */
5707 	if (usepfp) {
5708 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5709 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5710 		radeon_ring_write(ring, 0x0);
5711 	}
5712 }
5713 
5714 /*
5715  * RLC
5716  * The RLC is a multi-purpose microengine that handles a
5717  * variety of functions, the most important of which is
5718  * the interrupt controller.
5719  */
5720 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5721 					  bool enable)
5722 {
5723 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5724 
5725 	if (enable)
5726 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5727 	else
5728 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5729 	WREG32(CP_INT_CNTL_RING0, tmp);
5730 }
5731 
5732 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5733 {
5734 	u32 tmp;
5735 
5736 	tmp = RREG32(RLC_LB_CNTL);
5737 	if (enable)
5738 		tmp |= LOAD_BALANCE_ENABLE;
5739 	else
5740 		tmp &= ~LOAD_BALANCE_ENABLE;
5741 	WREG32(RLC_LB_CNTL, tmp);
5742 }
5743 
5744 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5745 {
5746 	u32 i, j, k;
5747 	u32 mask;
5748 
5749 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5750 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5751 			cik_select_se_sh(rdev, i, j);
5752 			for (k = 0; k < rdev->usec_timeout; k++) {
5753 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5754 					break;
5755 				udelay(1);
5756 			}
5757 		}
5758 	}
5759 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5760 
5761 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5762 	for (k = 0; k < rdev->usec_timeout; k++) {
5763 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5764 			break;
5765 		udelay(1);
5766 	}
5767 }
5768 
5769 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5770 {
5771 	u32 tmp;
5772 
5773 	tmp = RREG32(RLC_CNTL);
5774 	if (tmp != rlc)
5775 		WREG32(RLC_CNTL, rlc);
5776 }
5777 
5778 static u32 cik_halt_rlc(struct radeon_device *rdev)
5779 {
5780 	u32 data, orig;
5781 
5782 	orig = data = RREG32(RLC_CNTL);
5783 
5784 	if (data & RLC_ENABLE) {
5785 		u32 i;
5786 
5787 		data &= ~RLC_ENABLE;
5788 		WREG32(RLC_CNTL, data);
5789 
5790 		for (i = 0; i < rdev->usec_timeout; i++) {
5791 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5792 				break;
5793 			udelay(1);
5794 		}
5795 
5796 		cik_wait_for_rlc_serdes(rdev);
5797 	}
5798 
5799 	return orig;
5800 }
5801 
5802 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5803 {
5804 	u32 tmp, i, mask;
5805 
5806 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5807 	WREG32(RLC_GPR_REG2, tmp);
5808 
5809 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5810 	for (i = 0; i < rdev->usec_timeout; i++) {
5811 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5812 			break;
5813 		udelay(1);
5814 	}
5815 
5816 	for (i = 0; i < rdev->usec_timeout; i++) {
5817 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5818 			break;
5819 		udelay(1);
5820 	}
5821 }
5822 
5823 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5824 {
5825 	u32 tmp;
5826 
5827 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5828 	WREG32(RLC_GPR_REG2, tmp);
5829 }
5830 
5831 /**
5832  * cik_rlc_stop - stop the RLC ME
5833  *
5834  * @rdev: radeon_device pointer
5835  *
5836  * Halt the RLC ME (MicroEngine) (CIK).
5837  */
5838 static void cik_rlc_stop(struct radeon_device *rdev)
5839 {
5840 	WREG32(RLC_CNTL, 0);
5841 
5842 	cik_enable_gui_idle_interrupt(rdev, false);
5843 
5844 	cik_wait_for_rlc_serdes(rdev);
5845 }
5846 
5847 /**
5848  * cik_rlc_start - start the RLC ME
5849  *
5850  * @rdev: radeon_device pointer
5851  *
5852  * Unhalt the RLC ME (MicroEngine) (CIK).
5853  */
5854 static void cik_rlc_start(struct radeon_device *rdev)
5855 {
5856 	WREG32(RLC_CNTL, RLC_ENABLE);
5857 
5858 	cik_enable_gui_idle_interrupt(rdev, true);
5859 
5860 	udelay(50);
5861 }
5862 
5863 /**
5864  * cik_rlc_resume - setup the RLC hw
5865  *
5866  * @rdev: radeon_device pointer
5867  *
5868  * Initialize the RLC registers, load the ucode,
5869  * and start the RLC (CIK).
5870  * Returns 0 for success, -EINVAL if the ucode is not available.
5871  */
5872 static int cik_rlc_resume(struct radeon_device *rdev)
5873 {
5874 	u32 i, size, tmp;
5875 	const __be32 *fw_data;
5876 
5877 	if (!rdev->rlc_fw)
5878 		return -EINVAL;
5879 
5880 	switch (rdev->family) {
5881 	case CHIP_BONAIRE:
5882 	case CHIP_HAWAII:
5883 	default:
5884 		size = BONAIRE_RLC_UCODE_SIZE;
5885 		break;
5886 	case CHIP_KAVERI:
5887 		size = KV_RLC_UCODE_SIZE;
5888 		break;
5889 	case CHIP_KABINI:
5890 		size = KB_RLC_UCODE_SIZE;
5891 		break;
5892 	case CHIP_MULLINS:
5893 		size = ML_RLC_UCODE_SIZE;
5894 		break;
5895 	}
5896 
5897 	cik_rlc_stop(rdev);
5898 
5899 	/* disable CG */
5900 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5901 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5902 
5903 	si_rlc_reset(rdev);
5904 
5905 	cik_init_pg(rdev);
5906 
5907 	cik_init_cg(rdev);
5908 
5909 	WREG32(RLC_LB_CNTR_INIT, 0);
5910 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5911 
5912 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5913 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5914 	WREG32(RLC_LB_PARAMS, 0x00600408);
5915 	WREG32(RLC_LB_CNTL, 0x80000004);
5916 
5917 	WREG32(RLC_MC_CNTL, 0);
5918 	WREG32(RLC_UCODE_CNTL, 0);
5919 
5920 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5921 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5922 	for (i = 0; i < size; i++)
5923 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5924 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5925 
5926 	/* XXX - find out what chips support lbpw */
5927 	cik_enable_lbpw(rdev, false);
5928 
5929 	if (rdev->family == CHIP_BONAIRE)
5930 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5931 
5932 	cik_rlc_start(rdev);
5933 
5934 	return 0;
5935 }
5936 
5937 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5938 {
5939 	u32 data, orig, tmp, tmp2;
5940 
5941 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5942 
5943 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5944 		cik_enable_gui_idle_interrupt(rdev, true);
5945 
5946 		tmp = cik_halt_rlc(rdev);
5947 
5948 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5949 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5950 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5951 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5952 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5953 
5954 		cik_update_rlc(rdev, tmp);
5955 
5956 		data |= CGCG_EN | CGLS_EN;
5957 	} else {
5958 		cik_enable_gui_idle_interrupt(rdev, false);
5959 
5960 		RREG32(CB_CGTT_SCLK_CTRL);
5961 		RREG32(CB_CGTT_SCLK_CTRL);
5962 		RREG32(CB_CGTT_SCLK_CTRL);
5963 		RREG32(CB_CGTT_SCLK_CTRL);
5964 
5965 		data &= ~(CGCG_EN | CGLS_EN);
5966 	}
5967 
5968 	if (orig != data)
5969 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5970 
5971 }
5972 
5973 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5974 {
5975 	u32 data, orig, tmp = 0;
5976 
5977 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5978 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5979 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5980 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5981 				data |= CP_MEM_LS_EN;
5982 				if (orig != data)
5983 					WREG32(CP_MEM_SLP_CNTL, data);
5984 			}
5985 		}
5986 
5987 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5988 		data &= 0xfffffffd;
5989 		if (orig != data)
5990 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5991 
5992 		tmp = cik_halt_rlc(rdev);
5993 
5994 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5995 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5996 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5997 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5998 		WREG32(RLC_SERDES_WR_CTRL, data);
5999 
6000 		cik_update_rlc(rdev, tmp);
6001 
6002 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6003 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6004 			data &= ~SM_MODE_MASK;
6005 			data |= SM_MODE(0x2);
6006 			data |= SM_MODE_ENABLE;
6007 			data &= ~CGTS_OVERRIDE;
6008 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6009 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6010 				data &= ~CGTS_LS_OVERRIDE;
6011 			data &= ~ON_MONITOR_ADD_MASK;
6012 			data |= ON_MONITOR_ADD_EN;
6013 			data |= ON_MONITOR_ADD(0x96);
6014 			if (orig != data)
6015 				WREG32(CGTS_SM_CTRL_REG, data);
6016 		}
6017 	} else {
6018 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6019 		data |= 0x00000002;
6020 		if (orig != data)
6021 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6022 
6023 		data = RREG32(RLC_MEM_SLP_CNTL);
6024 		if (data & RLC_MEM_LS_EN) {
6025 			data &= ~RLC_MEM_LS_EN;
6026 			WREG32(RLC_MEM_SLP_CNTL, data);
6027 		}
6028 
6029 		data = RREG32(CP_MEM_SLP_CNTL);
6030 		if (data & CP_MEM_LS_EN) {
6031 			data &= ~CP_MEM_LS_EN;
6032 			WREG32(CP_MEM_SLP_CNTL, data);
6033 		}
6034 
6035 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6036 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6037 		if (orig != data)
6038 			WREG32(CGTS_SM_CTRL_REG, data);
6039 
6040 		tmp = cik_halt_rlc(rdev);
6041 
6042 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6043 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6044 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6045 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6046 		WREG32(RLC_SERDES_WR_CTRL, data);
6047 
6048 		cik_update_rlc(rdev, tmp);
6049 	}
6050 }
6051 
6052 static const u32 mc_cg_registers[] =
6053 {
6054 	MC_HUB_MISC_HUB_CG,
6055 	MC_HUB_MISC_SIP_CG,
6056 	MC_HUB_MISC_VM_CG,
6057 	MC_XPB_CLK_GAT,
6058 	ATC_MISC_CG,
6059 	MC_CITF_MISC_WR_CG,
6060 	MC_CITF_MISC_RD_CG,
6061 	MC_CITF_MISC_VM_CG,
6062 	VM_L2_CG,
6063 };
6064 
6065 static void cik_enable_mc_ls(struct radeon_device *rdev,
6066 			     bool enable)
6067 {
6068 	int i;
6069 	u32 orig, data;
6070 
6071 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6072 		orig = data = RREG32(mc_cg_registers[i]);
6073 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6074 			data |= MC_LS_ENABLE;
6075 		else
6076 			data &= ~MC_LS_ENABLE;
6077 		if (data != orig)
6078 			WREG32(mc_cg_registers[i], data);
6079 	}
6080 }
6081 
6082 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6083 			       bool enable)
6084 {
6085 	int i;
6086 	u32 orig, data;
6087 
6088 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6089 		orig = data = RREG32(mc_cg_registers[i]);
6090 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6091 			data |= MC_CG_ENABLE;
6092 		else
6093 			data &= ~MC_CG_ENABLE;
6094 		if (data != orig)
6095 			WREG32(mc_cg_registers[i], data);
6096 	}
6097 }
6098 
6099 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6100 				 bool enable)
6101 {
6102 	u32 orig, data;
6103 
6104 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6105 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6106 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6107 	} else {
6108 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6109 		data |= 0xff000000;
6110 		if (data != orig)
6111 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6112 
6113 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6114 		data |= 0xff000000;
6115 		if (data != orig)
6116 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6117 	}
6118 }
6119 
6120 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6121 				 bool enable)
6122 {
6123 	u32 orig, data;
6124 
6125 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6126 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6127 		data |= 0x100;
6128 		if (orig != data)
6129 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6130 
6131 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6132 		data |= 0x100;
6133 		if (orig != data)
6134 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6135 	} else {
6136 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6137 		data &= ~0x100;
6138 		if (orig != data)
6139 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6140 
6141 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6142 		data &= ~0x100;
6143 		if (orig != data)
6144 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6145 	}
6146 }
6147 
6148 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6149 				bool enable)
6150 {
6151 	u32 orig, data;
6152 
6153 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6154 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6155 		data = 0xfff;
6156 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6157 
6158 		orig = data = RREG32(UVD_CGC_CTRL);
6159 		data |= DCM;
6160 		if (orig != data)
6161 			WREG32(UVD_CGC_CTRL, data);
6162 	} else {
6163 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6164 		data &= ~0xfff;
6165 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6166 
6167 		orig = data = RREG32(UVD_CGC_CTRL);
6168 		data &= ~DCM;
6169 		if (orig != data)
6170 			WREG32(UVD_CGC_CTRL, data);
6171 	}
6172 }
6173 
6174 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6175 			       bool enable)
6176 {
6177 	u32 orig, data;
6178 
6179 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6180 
6181 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6182 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6183 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6184 	else
6185 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6186 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6187 
6188 	if (orig != data)
6189 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6190 }
6191 
6192 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6193 				bool enable)
6194 {
6195 	u32 orig, data;
6196 
6197 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6198 
6199 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6200 		data &= ~CLOCK_GATING_DIS;
6201 	else
6202 		data |= CLOCK_GATING_DIS;
6203 
6204 	if (orig != data)
6205 		WREG32(HDP_HOST_PATH_CNTL, data);
6206 }
6207 
6208 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6209 			      bool enable)
6210 {
6211 	u32 orig, data;
6212 
6213 	orig = data = RREG32(HDP_MEM_POWER_LS);
6214 
6215 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6216 		data |= HDP_LS_ENABLE;
6217 	else
6218 		data &= ~HDP_LS_ENABLE;
6219 
6220 	if (orig != data)
6221 		WREG32(HDP_MEM_POWER_LS, data);
6222 }
6223 
6224 void cik_update_cg(struct radeon_device *rdev,
6225 		   u32 block, bool enable)
6226 {
6227 
6228 	if (block & RADEON_CG_BLOCK_GFX) {
6229 		cik_enable_gui_idle_interrupt(rdev, false);
6230 		/* order matters! */
6231 		if (enable) {
6232 			cik_enable_mgcg(rdev, true);
6233 			cik_enable_cgcg(rdev, true);
6234 		} else {
6235 			cik_enable_cgcg(rdev, false);
6236 			cik_enable_mgcg(rdev, false);
6237 		}
6238 		cik_enable_gui_idle_interrupt(rdev, true);
6239 	}
6240 
6241 	if (block & RADEON_CG_BLOCK_MC) {
6242 		if (!(rdev->flags & RADEON_IS_IGP)) {
6243 			cik_enable_mc_mgcg(rdev, enable);
6244 			cik_enable_mc_ls(rdev, enable);
6245 		}
6246 	}
6247 
6248 	if (block & RADEON_CG_BLOCK_SDMA) {
6249 		cik_enable_sdma_mgcg(rdev, enable);
6250 		cik_enable_sdma_mgls(rdev, enable);
6251 	}
6252 
6253 	if (block & RADEON_CG_BLOCK_BIF) {
6254 		cik_enable_bif_mgls(rdev, enable);
6255 	}
6256 
6257 	if (block & RADEON_CG_BLOCK_UVD) {
6258 		if (rdev->has_uvd)
6259 			cik_enable_uvd_mgcg(rdev, enable);
6260 	}
6261 
6262 	if (block & RADEON_CG_BLOCK_HDP) {
6263 		cik_enable_hdp_mgcg(rdev, enable);
6264 		cik_enable_hdp_ls(rdev, enable);
6265 	}
6266 
6267 	if (block & RADEON_CG_BLOCK_VCE) {
6268 		vce_v2_0_enable_mgcg(rdev, enable);
6269 	}
6270 }
6271 
6272 static void cik_init_cg(struct radeon_device *rdev)
6273 {
6274 
6275 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6276 
6277 	if (rdev->has_uvd)
6278 		si_init_uvd_internal_cg(rdev);
6279 
6280 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6281 			     RADEON_CG_BLOCK_SDMA |
6282 			     RADEON_CG_BLOCK_BIF |
6283 			     RADEON_CG_BLOCK_UVD |
6284 			     RADEON_CG_BLOCK_HDP), true);
6285 }
6286 
6287 static void cik_fini_cg(struct radeon_device *rdev)
6288 {
6289 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6290 			     RADEON_CG_BLOCK_SDMA |
6291 			     RADEON_CG_BLOCK_BIF |
6292 			     RADEON_CG_BLOCK_UVD |
6293 			     RADEON_CG_BLOCK_HDP), false);
6294 
6295 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6296 }
6297 
6298 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6299 					  bool enable)
6300 {
6301 	u32 data, orig;
6302 
6303 	orig = data = RREG32(RLC_PG_CNTL);
6304 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6305 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6306 	else
6307 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6308 	if (orig != data)
6309 		WREG32(RLC_PG_CNTL, data);
6310 }
6311 
6312 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6313 					  bool enable)
6314 {
6315 	u32 data, orig;
6316 
6317 	orig = data = RREG32(RLC_PG_CNTL);
6318 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6319 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6320 	else
6321 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6322 	if (orig != data)
6323 		WREG32(RLC_PG_CNTL, data);
6324 }
6325 
6326 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6327 {
6328 	u32 data, orig;
6329 
6330 	orig = data = RREG32(RLC_PG_CNTL);
6331 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6332 		data &= ~DISABLE_CP_PG;
6333 	else
6334 		data |= DISABLE_CP_PG;
6335 	if (orig != data)
6336 		WREG32(RLC_PG_CNTL, data);
6337 }
6338 
6339 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6340 {
6341 	u32 data, orig;
6342 
6343 	orig = data = RREG32(RLC_PG_CNTL);
6344 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6345 		data &= ~DISABLE_GDS_PG;
6346 	else
6347 		data |= DISABLE_GDS_PG;
6348 	if (orig != data)
6349 		WREG32(RLC_PG_CNTL, data);
6350 }
6351 
6352 #define CP_ME_TABLE_SIZE    96
6353 #define CP_ME_TABLE_OFFSET  2048
6354 #define CP_MEC_TABLE_OFFSET 4096
6355 
6356 void cik_init_cp_pg_table(struct radeon_device *rdev)
6357 {
6358 	const __be32 *fw_data;
6359 	volatile u32 *dst_ptr;
6360 	int me, i, max_me = 4;
6361 	u32 bo_offset = 0;
6362 	u32 table_offset;
6363 
6364 	if (rdev->family == CHIP_KAVERI)
6365 		max_me = 5;
6366 
6367 	if (rdev->rlc.cp_table_ptr == NULL)
6368 		return;
6369 
6370 	/* write the cp table buffer */
6371 	dst_ptr = rdev->rlc.cp_table_ptr;
6372 	for (me = 0; me < max_me; me++) {
6373 		if (me == 0) {
6374 			fw_data = (const __be32 *)rdev->ce_fw->data;
6375 			table_offset = CP_ME_TABLE_OFFSET;
6376 		} else if (me == 1) {
6377 			fw_data = (const __be32 *)rdev->pfp_fw->data;
6378 			table_offset = CP_ME_TABLE_OFFSET;
6379 		} else if (me == 2) {
6380 			fw_data = (const __be32 *)rdev->me_fw->data;
6381 			table_offset = CP_ME_TABLE_OFFSET;
6382 		} else {
6383 			fw_data = (const __be32 *)rdev->mec_fw->data;
6384 			table_offset = CP_MEC_TABLE_OFFSET;
6385 		}
6386 
6387 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6388 			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6389 		}
6390 		bo_offset += CP_ME_TABLE_SIZE;
6391 	}
6392 }
6393 
6394 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6395 				bool enable)
6396 {
6397 	u32 data, orig;
6398 
6399 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6400 		orig = data = RREG32(RLC_PG_CNTL);
6401 		data |= GFX_PG_ENABLE;
6402 		if (orig != data)
6403 			WREG32(RLC_PG_CNTL, data);
6404 
6405 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6406 		data |= AUTO_PG_EN;
6407 		if (orig != data)
6408 			WREG32(RLC_AUTO_PG_CTRL, data);
6409 	} else {
6410 		orig = data = RREG32(RLC_PG_CNTL);
6411 		data &= ~GFX_PG_ENABLE;
6412 		if (orig != data)
6413 			WREG32(RLC_PG_CNTL, data);
6414 
6415 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6416 		data &= ~AUTO_PG_EN;
6417 		if (orig != data)
6418 			WREG32(RLC_AUTO_PG_CTRL, data);
6419 
6420 		data = RREG32(DB_RENDER_CONTROL);
6421 	}
6422 }
6423 
6424 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6425 {
6426 	u32 mask = 0, tmp, tmp1;
6427 	int i;
6428 
6429 	cik_select_se_sh(rdev, se, sh);
6430 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6431 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6432 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6433 
6434 	tmp &= 0xffff0000;
6435 
6436 	tmp |= tmp1;
6437 	tmp >>= 16;
6438 
6439 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6440 		mask <<= 1;
6441 		mask |= 1;
6442 	}
6443 
6444 	return (~tmp) & mask;
6445 }
6446 
6447 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6448 {
6449 	u32 i, j, k, active_cu_number = 0;
6450 	u32 mask, counter, cu_bitmap;
6451 	u32 tmp = 0;
6452 
6453 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6454 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6455 			mask = 1;
6456 			cu_bitmap = 0;
6457 			counter = 0;
6458 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6459 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6460 					if (counter < 2)
6461 						cu_bitmap |= mask;
6462 					counter ++;
6463 				}
6464 				mask <<= 1;
6465 			}
6466 
6467 			active_cu_number += counter;
6468 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6469 		}
6470 	}
6471 
6472 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6473 
6474 	tmp = RREG32(RLC_MAX_PG_CU);
6475 	tmp &= ~MAX_PU_CU_MASK;
6476 	tmp |= MAX_PU_CU(active_cu_number);
6477 	WREG32(RLC_MAX_PG_CU, tmp);
6478 }
6479 
6480 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6481 				       bool enable)
6482 {
6483 	u32 data, orig;
6484 
6485 	orig = data = RREG32(RLC_PG_CNTL);
6486 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6487 		data |= STATIC_PER_CU_PG_ENABLE;
6488 	else
6489 		data &= ~STATIC_PER_CU_PG_ENABLE;
6490 	if (orig != data)
6491 		WREG32(RLC_PG_CNTL, data);
6492 }
6493 
6494 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6495 					bool enable)
6496 {
6497 	u32 data, orig;
6498 
6499 	orig = data = RREG32(RLC_PG_CNTL);
6500 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6501 		data |= DYN_PER_CU_PG_ENABLE;
6502 	else
6503 		data &= ~DYN_PER_CU_PG_ENABLE;
6504 	if (orig != data)
6505 		WREG32(RLC_PG_CNTL, data);
6506 }
6507 
6508 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6509 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6510 
6511 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6512 {
6513 	u32 data, orig;
6514 	u32 i;
6515 
6516 	if (rdev->rlc.cs_data) {
6517 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6518 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6519 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6520 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6521 	} else {
6522 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6523 		for (i = 0; i < 3; i++)
6524 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6525 	}
6526 	if (rdev->rlc.reg_list) {
6527 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6528 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6529 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6530 	}
6531 
6532 	orig = data = RREG32(RLC_PG_CNTL);
6533 	data |= GFX_PG_SRC;
6534 	if (orig != data)
6535 		WREG32(RLC_PG_CNTL, data);
6536 
6537 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6538 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6539 
6540 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6541 	data &= ~IDLE_POLL_COUNT_MASK;
6542 	data |= IDLE_POLL_COUNT(0x60);
6543 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6544 
6545 	data = 0x10101010;
6546 	WREG32(RLC_PG_DELAY, data);
6547 
6548 	data = RREG32(RLC_PG_DELAY_2);
6549 	data &= ~0xff;
6550 	data |= 0x3;
6551 	WREG32(RLC_PG_DELAY_2, data);
6552 
6553 	data = RREG32(RLC_AUTO_PG_CTRL);
6554 	data &= ~GRBM_REG_SGIT_MASK;
6555 	data |= GRBM_REG_SGIT(0x700);
6556 	WREG32(RLC_AUTO_PG_CTRL, data);
6557 
6558 }
6559 
6560 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6561 {
6562 	cik_enable_gfx_cgpg(rdev, enable);
6563 	cik_enable_gfx_static_mgpg(rdev, enable);
6564 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6565 }
6566 
6567 u32 cik_get_csb_size(struct radeon_device *rdev)
6568 {
6569 	u32 count = 0;
6570 	const struct cs_section_def *sect = NULL;
6571 	const struct cs_extent_def *ext = NULL;
6572 
6573 	if (rdev->rlc.cs_data == NULL)
6574 		return 0;
6575 
6576 	/* begin clear state */
6577 	count += 2;
6578 	/* context control state */
6579 	count += 3;
6580 
6581 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6582 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6583 			if (sect->id == SECT_CONTEXT)
6584 				count += 2 + ext->reg_count;
6585 			else
6586 				return 0;
6587 		}
6588 	}
6589 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6590 	count += 4;
6591 	/* end clear state */
6592 	count += 2;
6593 	/* clear state */
6594 	count += 2;
6595 
6596 	return count;
6597 }
6598 
6599 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6600 {
6601 	u32 count = 0, i;
6602 	const struct cs_section_def *sect = NULL;
6603 	const struct cs_extent_def *ext = NULL;
6604 
6605 	if (rdev->rlc.cs_data == NULL)
6606 		return;
6607 	if (buffer == NULL)
6608 		return;
6609 
6610 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6611 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6612 
6613 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6614 	buffer[count++] = cpu_to_le32(0x80000000);
6615 	buffer[count++] = cpu_to_le32(0x80000000);
6616 
6617 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6618 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6619 			if (sect->id == SECT_CONTEXT) {
6620 				buffer[count++] =
6621 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6622 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6623 				for (i = 0; i < ext->reg_count; i++)
6624 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6625 			} else {
6626 				return;
6627 			}
6628 		}
6629 	}
6630 
6631 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6632 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6633 	switch (rdev->family) {
6634 	case CHIP_BONAIRE:
6635 		buffer[count++] = cpu_to_le32(0x16000012);
6636 		buffer[count++] = cpu_to_le32(0x00000000);
6637 		break;
6638 	case CHIP_KAVERI:
6639 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6640 		buffer[count++] = cpu_to_le32(0x00000000);
6641 		break;
6642 	case CHIP_KABINI:
6643 	case CHIP_MULLINS:
6644 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6645 		buffer[count++] = cpu_to_le32(0x00000000);
6646 		break;
6647 	case CHIP_HAWAII:
6648 		buffer[count++] = cpu_to_le32(0x3a00161a);
6649 		buffer[count++] = cpu_to_le32(0x0000002e);
6650 		break;
6651 	default:
6652 		buffer[count++] = cpu_to_le32(0x00000000);
6653 		buffer[count++] = cpu_to_le32(0x00000000);
6654 		break;
6655 	}
6656 
6657 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6658 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6659 
6660 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6661 	buffer[count++] = cpu_to_le32(0);
6662 }
6663 
6664 static void cik_init_pg(struct radeon_device *rdev)
6665 {
6666 	if (rdev->pg_flags) {
6667 		cik_enable_sck_slowdown_on_pu(rdev, true);
6668 		cik_enable_sck_slowdown_on_pd(rdev, true);
6669 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6670 			cik_init_gfx_cgpg(rdev);
6671 			cik_enable_cp_pg(rdev, true);
6672 			cik_enable_gds_pg(rdev, true);
6673 		}
6674 		cik_init_ao_cu_mask(rdev);
6675 		cik_update_gfx_pg(rdev, true);
6676 	}
6677 }
6678 
6679 static void cik_fini_pg(struct radeon_device *rdev)
6680 {
6681 	if (rdev->pg_flags) {
6682 		cik_update_gfx_pg(rdev, false);
6683 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6684 			cik_enable_cp_pg(rdev, false);
6685 			cik_enable_gds_pg(rdev, false);
6686 		}
6687 	}
6688 }
6689 
6690 /*
6691  * Interrupts
6692  * Starting with r6xx, interrupts are handled via a ring buffer.
6693  * Ring buffers are areas of GPU accessible memory that the GPU
6694  * writes interrupt vectors into and the host reads vectors out of.
6695  * There is a rptr (read pointer) that determines where the
6696  * host is currently reading, and a wptr (write pointer)
6697  * which determines where the GPU has written.  When the
6698  * pointers are equal, the ring is idle.  When the GPU
6699  * writes vectors to the ring buffer, it increments the
6700  * wptr.  When there is an interrupt, the host then starts
6701  * fetching commands and processing them until the pointers are
6702  * equal again at which point it updates the rptr.
6703  */
6704 
6705 /**
6706  * cik_enable_interrupts - Enable the interrupt ring buffer
6707  *
6708  * @rdev: radeon_device pointer
6709  *
6710  * Enable the interrupt ring buffer (CIK).
6711  */
6712 static void cik_enable_interrupts(struct radeon_device *rdev)
6713 {
6714 	u32 ih_cntl = RREG32(IH_CNTL);
6715 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6716 
6717 	ih_cntl |= ENABLE_INTR;
6718 	ih_rb_cntl |= IH_RB_ENABLE;
6719 	WREG32(IH_CNTL, ih_cntl);
6720 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6721 	rdev->ih.enabled = true;
6722 }
6723 
6724 /**
6725  * cik_disable_interrupts - Disable the interrupt ring buffer
6726  *
6727  * @rdev: radeon_device pointer
6728  *
6729  * Disable the interrupt ring buffer (CIK).
6730  */
6731 static void cik_disable_interrupts(struct radeon_device *rdev)
6732 {
6733 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6734 	u32 ih_cntl = RREG32(IH_CNTL);
6735 
6736 	ih_rb_cntl &= ~IH_RB_ENABLE;
6737 	ih_cntl &= ~ENABLE_INTR;
6738 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6739 	WREG32(IH_CNTL, ih_cntl);
6740 	/* set rptr, wptr to 0 */
6741 	WREG32(IH_RB_RPTR, 0);
6742 	WREG32(IH_RB_WPTR, 0);
6743 	rdev->ih.enabled = false;
6744 	rdev->ih.rptr = 0;
6745 }
6746 
6747 /**
6748  * cik_disable_interrupt_state - Disable all interrupt sources
6749  *
6750  * @rdev: radeon_device pointer
6751  *
6752  * Clear all interrupt enable bits used by the driver (CIK).
6753  */
6754 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6755 {
6756 	u32 tmp;
6757 
6758 	/* gfx ring */
6759 	tmp = RREG32(CP_INT_CNTL_RING0) &
6760 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6761 	WREG32(CP_INT_CNTL_RING0, tmp);
6762 	/* sdma */
6763 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6764 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6765 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6766 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6767 	/* compute queues */
6768 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6769 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6770 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6771 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6772 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6773 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6774 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6775 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6776 	/* grbm */
6777 	WREG32(GRBM_INT_CNTL, 0);
6778 	/* vline/vblank, etc. */
6779 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6780 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6781 	if (rdev->num_crtc >= 4) {
6782 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6783 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6784 	}
6785 	if (rdev->num_crtc >= 6) {
6786 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6787 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6788 	}
6789 	/* pflip */
6790 	if (rdev->num_crtc >= 2) {
6791 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6792 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6793 	}
6794 	if (rdev->num_crtc >= 4) {
6795 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6796 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6797 	}
6798 	if (rdev->num_crtc >= 6) {
6799 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6800 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6801 	}
6802 
6803 	/* dac hotplug */
6804 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6805 
6806 	/* digital hotplug */
6807 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6808 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6809 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6810 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6811 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6812 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6813 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6814 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6815 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6816 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6817 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6818 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6819 
6820 }
6821 
6822 /**
6823  * cik_irq_init - init and enable the interrupt ring
6824  *
6825  * @rdev: radeon_device pointer
6826  *
6827  * Allocate a ring buffer for the interrupt controller,
6828  * enable the RLC, disable interrupts, enable the IH
6829  * ring buffer and enable it (CIK).
6830  * Called at device load and reume.
6831  * Returns 0 for success, errors for failure.
6832  */
6833 static int cik_irq_init(struct radeon_device *rdev)
6834 {
6835 	int ret = 0;
6836 	int rb_bufsz;
6837 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6838 
6839 	/* allocate ring */
6840 	ret = r600_ih_ring_alloc(rdev);
6841 	if (ret)
6842 		return ret;
6843 
6844 	/* disable irqs */
6845 	cik_disable_interrupts(rdev);
6846 
6847 	/* init rlc */
6848 	ret = cik_rlc_resume(rdev);
6849 	if (ret) {
6850 		r600_ih_ring_fini(rdev);
6851 		return ret;
6852 	}
6853 
6854 	/* setup interrupt control */
6855 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6856 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6857 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6858 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6859 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6860 	 */
6861 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6862 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6863 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6864 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6865 
6866 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6867 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6868 
6869 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6870 		      IH_WPTR_OVERFLOW_CLEAR |
6871 		      (rb_bufsz << 1));
6872 
6873 	if (rdev->wb.enabled)
6874 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6875 
6876 	/* set the writeback address whether it's enabled or not */
6877 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6878 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6879 
6880 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6881 
6882 	/* set rptr, wptr to 0 */
6883 	WREG32(IH_RB_RPTR, 0);
6884 	WREG32(IH_RB_WPTR, 0);
6885 
6886 	/* Default settings for IH_CNTL (disabled at first) */
6887 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6888 	/* RPTR_REARM only works if msi's are enabled */
6889 	if (rdev->msi_enabled)
6890 		ih_cntl |= RPTR_REARM;
6891 	WREG32(IH_CNTL, ih_cntl);
6892 
6893 	/* force the active interrupt state to all disabled */
6894 	cik_disable_interrupt_state(rdev);
6895 
6896 	pci_enable_busmaster(rdev->pdev->dev);
6897 
6898 	/* enable irqs */
6899 	cik_enable_interrupts(rdev);
6900 
6901 	return ret;
6902 }
6903 
6904 /**
6905  * cik_irq_set - enable/disable interrupt sources
6906  *
6907  * @rdev: radeon_device pointer
6908  *
6909  * Enable interrupt sources on the GPU (vblanks, hpd,
6910  * etc.) (CIK).
6911  * Returns 0 for success, errors for failure.
6912  */
6913 int cik_irq_set(struct radeon_device *rdev)
6914 {
6915 	u32 cp_int_cntl;
6916 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6917 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6918 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6919 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6920 	u32 grbm_int_cntl = 0;
6921 	u32 dma_cntl, dma_cntl1;
6922 	u32 thermal_int;
6923 
6924 	if (!rdev->irq.installed) {
6925 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6926 		return -EINVAL;
6927 	}
6928 	/* don't enable anything if the ih is disabled */
6929 	if (!rdev->ih.enabled) {
6930 		cik_disable_interrupts(rdev);
6931 		/* force the active interrupt state to all disabled */
6932 		cik_disable_interrupt_state(rdev);
6933 		return 0;
6934 	}
6935 
6936 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6937 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6938 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6939 
6940 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6941 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6942 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6943 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6944 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6945 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6946 
6947 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6948 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6949 
6950 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6951 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6952 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6953 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6954 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6955 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6956 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6957 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6958 
6959 	if (rdev->flags & RADEON_IS_IGP)
6960 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6961 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6962 	else
6963 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6964 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6965 
6966 	/* enable CP interrupts on all rings */
6967 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6968 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6969 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6970 	}
6971 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6972 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6973 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6974 		if (ring->me == 1) {
6975 			switch (ring->pipe) {
6976 			case 0:
6977 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6978 				break;
6979 			case 1:
6980 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6981 				break;
6982 			case 2:
6983 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6984 				break;
6985 			case 3:
6986 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6987 				break;
6988 			default:
6989 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6990 				break;
6991 			}
6992 		} else if (ring->me == 2) {
6993 			switch (ring->pipe) {
6994 			case 0:
6995 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6996 				break;
6997 			case 1:
6998 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6999 				break;
7000 			case 2:
7001 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7002 				break;
7003 			case 3:
7004 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7005 				break;
7006 			default:
7007 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7008 				break;
7009 			}
7010 		} else {
7011 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7012 		}
7013 	}
7014 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7015 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7016 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7017 		if (ring->me == 1) {
7018 			switch (ring->pipe) {
7019 			case 0:
7020 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7021 				break;
7022 			case 1:
7023 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7024 				break;
7025 			case 2:
7026 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7027 				break;
7028 			case 3:
7029 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7030 				break;
7031 			default:
7032 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7033 				break;
7034 			}
7035 		} else if (ring->me == 2) {
7036 			switch (ring->pipe) {
7037 			case 0:
7038 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7039 				break;
7040 			case 1:
7041 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7042 				break;
7043 			case 2:
7044 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7045 				break;
7046 			case 3:
7047 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7048 				break;
7049 			default:
7050 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7051 				break;
7052 			}
7053 		} else {
7054 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7055 		}
7056 	}
7057 
7058 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7059 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7060 		dma_cntl |= TRAP_ENABLE;
7061 	}
7062 
7063 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7064 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7065 		dma_cntl1 |= TRAP_ENABLE;
7066 	}
7067 
7068 	if (rdev->irq.crtc_vblank_int[0] ||
7069 	    atomic_read(&rdev->irq.pflip[0])) {
7070 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7071 		crtc1 |= VBLANK_INTERRUPT_MASK;
7072 	}
7073 	if (rdev->irq.crtc_vblank_int[1] ||
7074 	    atomic_read(&rdev->irq.pflip[1])) {
7075 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7076 		crtc2 |= VBLANK_INTERRUPT_MASK;
7077 	}
7078 	if (rdev->irq.crtc_vblank_int[2] ||
7079 	    atomic_read(&rdev->irq.pflip[2])) {
7080 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7081 		crtc3 |= VBLANK_INTERRUPT_MASK;
7082 	}
7083 	if (rdev->irq.crtc_vblank_int[3] ||
7084 	    atomic_read(&rdev->irq.pflip[3])) {
7085 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7086 		crtc4 |= VBLANK_INTERRUPT_MASK;
7087 	}
7088 	if (rdev->irq.crtc_vblank_int[4] ||
7089 	    atomic_read(&rdev->irq.pflip[4])) {
7090 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7091 		crtc5 |= VBLANK_INTERRUPT_MASK;
7092 	}
7093 	if (rdev->irq.crtc_vblank_int[5] ||
7094 	    atomic_read(&rdev->irq.pflip[5])) {
7095 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7096 		crtc6 |= VBLANK_INTERRUPT_MASK;
7097 	}
7098 	if (rdev->irq.hpd[0]) {
7099 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7100 		hpd1 |= DC_HPDx_INT_EN;
7101 	}
7102 	if (rdev->irq.hpd[1]) {
7103 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7104 		hpd2 |= DC_HPDx_INT_EN;
7105 	}
7106 	if (rdev->irq.hpd[2]) {
7107 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7108 		hpd3 |= DC_HPDx_INT_EN;
7109 	}
7110 	if (rdev->irq.hpd[3]) {
7111 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7112 		hpd4 |= DC_HPDx_INT_EN;
7113 	}
7114 	if (rdev->irq.hpd[4]) {
7115 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7116 		hpd5 |= DC_HPDx_INT_EN;
7117 	}
7118 	if (rdev->irq.hpd[5]) {
7119 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7120 		hpd6 |= DC_HPDx_INT_EN;
7121 	}
7122 
7123 	if (rdev->irq.dpm_thermal) {
7124 		DRM_DEBUG("dpm thermal\n");
7125 		if (rdev->flags & RADEON_IS_IGP)
7126 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7127 		else
7128 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7129 	}
7130 
7131 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7132 
7133 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7134 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7135 
7136 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7137 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7138 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7139 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7140 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7141 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7142 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7143 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7144 
7145 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7146 
7147 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7148 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7149 	if (rdev->num_crtc >= 4) {
7150 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7151 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7152 	}
7153 	if (rdev->num_crtc >= 6) {
7154 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7155 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7156 	}
7157 
7158 	if (rdev->num_crtc >= 2) {
7159 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7160 		       GRPH_PFLIP_INT_MASK);
7161 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7162 		       GRPH_PFLIP_INT_MASK);
7163 	}
7164 	if (rdev->num_crtc >= 4) {
7165 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7166 		       GRPH_PFLIP_INT_MASK);
7167 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7168 		       GRPH_PFLIP_INT_MASK);
7169 	}
7170 	if (rdev->num_crtc >= 6) {
7171 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7172 		       GRPH_PFLIP_INT_MASK);
7173 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7174 		       GRPH_PFLIP_INT_MASK);
7175 	}
7176 
7177 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7178 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7179 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7180 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7181 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7182 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7183 
7184 	if (rdev->flags & RADEON_IS_IGP)
7185 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7186 	else
7187 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7188 
7189 	return 0;
7190 }
7191 
7192 /**
7193  * cik_irq_ack - ack interrupt sources
7194  *
7195  * @rdev: radeon_device pointer
7196  *
7197  * Ack interrupt sources on the GPU (vblanks, hpd,
7198  * etc.) (CIK).  Certain interrupts sources are sw
7199  * generated and do not require an explicit ack.
7200  */
7201 static inline void cik_irq_ack(struct radeon_device *rdev)
7202 {
7203 	u32 tmp;
7204 
7205 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7206 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7207 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7208 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7209 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7210 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7211 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7212 
7213 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7214 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7215 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7216 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7217 	if (rdev->num_crtc >= 4) {
7218 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7219 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7220 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7221 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7222 	}
7223 	if (rdev->num_crtc >= 6) {
7224 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7225 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7226 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7227 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7228 	}
7229 
7230 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7231 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7232 		       GRPH_PFLIP_INT_CLEAR);
7233 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7234 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7235 		       GRPH_PFLIP_INT_CLEAR);
7236 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7237 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7238 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7239 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7240 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7241 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7242 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7243 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7244 
7245 	if (rdev->num_crtc >= 4) {
7246 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7247 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7248 			       GRPH_PFLIP_INT_CLEAR);
7249 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7250 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7251 			       GRPH_PFLIP_INT_CLEAR);
7252 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7253 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7254 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7255 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7256 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7257 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7258 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7259 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7260 	}
7261 
7262 	if (rdev->num_crtc >= 6) {
7263 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7264 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7265 			       GRPH_PFLIP_INT_CLEAR);
7266 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7267 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7268 			       GRPH_PFLIP_INT_CLEAR);
7269 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7270 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7271 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7272 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7273 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7274 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7275 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7276 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7277 	}
7278 
7279 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7280 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7281 		tmp |= DC_HPDx_INT_ACK;
7282 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7283 	}
7284 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7285 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7286 		tmp |= DC_HPDx_INT_ACK;
7287 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7288 	}
7289 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7290 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7291 		tmp |= DC_HPDx_INT_ACK;
7292 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7293 	}
7294 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7295 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7296 		tmp |= DC_HPDx_INT_ACK;
7297 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7298 	}
7299 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7300 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7301 		tmp |= DC_HPDx_INT_ACK;
7302 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7303 	}
7304 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7305 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7306 		tmp |= DC_HPDx_INT_ACK;
7307 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7308 	}
7309 }
7310 
7311 /**
7312  * cik_irq_disable - disable interrupts
7313  *
7314  * @rdev: radeon_device pointer
7315  *
7316  * Disable interrupts on the hw (CIK).
7317  */
7318 static void cik_irq_disable(struct radeon_device *rdev)
7319 {
7320 	cik_disable_interrupts(rdev);
7321 	/* Wait and acknowledge irq */
7322 	mdelay(1);
7323 	cik_irq_ack(rdev);
7324 	cik_disable_interrupt_state(rdev);
7325 }
7326 
7327 /**
7328  * cik_irq_disable - disable interrupts for suspend
7329  *
7330  * @rdev: radeon_device pointer
7331  *
7332  * Disable interrupts and stop the RLC (CIK).
7333  * Used for suspend.
7334  */
7335 static void cik_irq_suspend(struct radeon_device *rdev)
7336 {
7337 	cik_irq_disable(rdev);
7338 	cik_rlc_stop(rdev);
7339 }
7340 
7341 /**
7342  * cik_irq_fini - tear down interrupt support
7343  *
7344  * @rdev: radeon_device pointer
7345  *
7346  * Disable interrupts on the hw and free the IH ring
7347  * buffer (CIK).
7348  * Used for driver unload.
7349  */
7350 static void cik_irq_fini(struct radeon_device *rdev)
7351 {
7352 	cik_irq_suspend(rdev);
7353 	r600_ih_ring_fini(rdev);
7354 }
7355 
7356 /**
7357  * cik_get_ih_wptr - get the IH ring buffer wptr
7358  *
7359  * @rdev: radeon_device pointer
7360  *
7361  * Get the IH ring buffer wptr from either the register
7362  * or the writeback memory buffer (CIK).  Also check for
7363  * ring buffer overflow and deal with it.
7364  * Used by cik_irq_process().
7365  * Returns the value of the wptr.
7366  */
7367 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7368 {
7369 	u32 wptr, tmp;
7370 
7371 	if (rdev->wb.enabled)
7372 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7373 	else
7374 		wptr = RREG32(IH_RB_WPTR);
7375 
7376 	if (wptr & RB_OVERFLOW) {
7377 		wptr &= ~RB_OVERFLOW;
7378 		/* When a ring buffer overflow happen start parsing interrupt
7379 		 * from the last not overwritten vector (wptr + 16). Hopefully
7380 		 * this should allow us to catchup.
7381 		 */
7382 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7383 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7384 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7385 		tmp = RREG32(IH_RB_CNTL);
7386 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7387 		WREG32(IH_RB_CNTL, tmp);
7388 	}
7389 	return (wptr & rdev->ih.ptr_mask);
7390 }
7391 
7392 /*        CIK IV Ring
7393  * Each IV ring entry is 128 bits:
7394  * [7:0]    - interrupt source id
7395  * [31:8]   - reserved
7396  * [59:32]  - interrupt source data
7397  * [63:60]  - reserved
7398  * [71:64]  - RINGID
7399  *            CP:
7400  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7401  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7402  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7403  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7404  *            PIPE_ID - ME0 0=3D
7405  *                    - ME1&2 compute dispatcher (4 pipes each)
7406  *            SDMA:
7407  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7408  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7409  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7410  * [79:72]  - VMID
7411  * [95:80]  - PASID
7412  * [127:96] - reserved
7413  */
7414 /**
7415  * cik_irq_process - interrupt handler
7416  *
7417  * @rdev: radeon_device pointer
7418  *
7419  * Interrupt hander (CIK).  Walk the IH ring,
7420  * ack interrupts and schedule work to handle
7421  * interrupt events.
7422  * Returns irq process return code.
7423  */
7424 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7425 {
7426 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7427 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7428 	u32 wptr;
7429 	u32 rptr;
7430 	u32 src_id, src_data, ring_id;
7431 	u8 me_id, pipe_id, queue_id;
7432 	u32 ring_index;
7433 	bool queue_hotplug = false;
7434 	bool queue_reset = false;
7435 	u32 addr, status, mc_client;
7436 	bool queue_thermal = false;
7437 
7438 	if (!rdev->ih.enabled || rdev->shutdown)
7439 		return IRQ_NONE;
7440 
7441 	wptr = cik_get_ih_wptr(rdev);
7442 
7443 restart_ih:
7444 	/* is somebody else already processing irqs? */
7445 	if (atomic_xchg(&rdev->ih.lock, 1))
7446 		return IRQ_NONE;
7447 
7448 	rptr = rdev->ih.rptr;
7449 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7450 
7451 	/* Order reading of wptr vs. reading of IH ring data */
7452 	rmb();
7453 
7454 	/* display interrupts */
7455 	cik_irq_ack(rdev);
7456 
7457 	while (rptr != wptr) {
7458 		/* wptr/rptr are in bytes! */
7459 		ring_index = rptr / 4;
7460 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7461 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7462 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7463 
7464 		switch (src_id) {
7465 		case 1: /* D1 vblank/vline */
7466 			switch (src_data) {
7467 			case 0: /* D1 vblank */
7468 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7469 					if (rdev->irq.crtc_vblank_int[0]) {
7470 						drm_handle_vblank(rdev->ddev, 0);
7471 						rdev->pm.vblank_sync = true;
7472 						wake_up(&rdev->irq.vblank_queue);
7473 					}
7474 					if (atomic_read(&rdev->irq.pflip[0]))
7475 						radeon_crtc_handle_vblank(rdev, 0);
7476 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7477 					DRM_DEBUG("IH: D1 vblank\n");
7478 				}
7479 				break;
7480 			case 1: /* D1 vline */
7481 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7482 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7483 					DRM_DEBUG("IH: D1 vline\n");
7484 				}
7485 				break;
7486 			default:
7487 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7488 				break;
7489 			}
7490 			break;
7491 		case 2: /* D2 vblank/vline */
7492 			switch (src_data) {
7493 			case 0: /* D2 vblank */
7494 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7495 					if (rdev->irq.crtc_vblank_int[1]) {
7496 						drm_handle_vblank(rdev->ddev, 1);
7497 						rdev->pm.vblank_sync = true;
7498 						wake_up(&rdev->irq.vblank_queue);
7499 					}
7500 					if (atomic_read(&rdev->irq.pflip[1]))
7501 						radeon_crtc_handle_vblank(rdev, 1);
7502 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7503 					DRM_DEBUG("IH: D2 vblank\n");
7504 				}
7505 				break;
7506 			case 1: /* D2 vline */
7507 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7508 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7509 					DRM_DEBUG("IH: D2 vline\n");
7510 				}
7511 				break;
7512 			default:
7513 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7514 				break;
7515 			}
7516 			break;
7517 		case 3: /* D3 vblank/vline */
7518 			switch (src_data) {
7519 			case 0: /* D3 vblank */
7520 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7521 					if (rdev->irq.crtc_vblank_int[2]) {
7522 						drm_handle_vblank(rdev->ddev, 2);
7523 						rdev->pm.vblank_sync = true;
7524 						wake_up(&rdev->irq.vblank_queue);
7525 					}
7526 					if (atomic_read(&rdev->irq.pflip[2]))
7527 						radeon_crtc_handle_vblank(rdev, 2);
7528 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7529 					DRM_DEBUG("IH: D3 vblank\n");
7530 				}
7531 				break;
7532 			case 1: /* D3 vline */
7533 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7534 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7535 					DRM_DEBUG("IH: D3 vline\n");
7536 				}
7537 				break;
7538 			default:
7539 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7540 				break;
7541 			}
7542 			break;
7543 		case 4: /* D4 vblank/vline */
7544 			switch (src_data) {
7545 			case 0: /* D4 vblank */
7546 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7547 					if (rdev->irq.crtc_vblank_int[3]) {
7548 						drm_handle_vblank(rdev->ddev, 3);
7549 						rdev->pm.vblank_sync = true;
7550 						wake_up(&rdev->irq.vblank_queue);
7551 					}
7552 					if (atomic_read(&rdev->irq.pflip[3]))
7553 						radeon_crtc_handle_vblank(rdev, 3);
7554 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7555 					DRM_DEBUG("IH: D4 vblank\n");
7556 				}
7557 				break;
7558 			case 1: /* D4 vline */
7559 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7560 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7561 					DRM_DEBUG("IH: D4 vline\n");
7562 				}
7563 				break;
7564 			default:
7565 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7566 				break;
7567 			}
7568 			break;
7569 		case 5: /* D5 vblank/vline */
7570 			switch (src_data) {
7571 			case 0: /* D5 vblank */
7572 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7573 					if (rdev->irq.crtc_vblank_int[4]) {
7574 						drm_handle_vblank(rdev->ddev, 4);
7575 						rdev->pm.vblank_sync = true;
7576 						wake_up(&rdev->irq.vblank_queue);
7577 					}
7578 					if (atomic_read(&rdev->irq.pflip[4]))
7579 						radeon_crtc_handle_vblank(rdev, 4);
7580 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7581 					DRM_DEBUG("IH: D5 vblank\n");
7582 				}
7583 				break;
7584 			case 1: /* D5 vline */
7585 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7586 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7587 					DRM_DEBUG("IH: D5 vline\n");
7588 				}
7589 				break;
7590 			default:
7591 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7592 				break;
7593 			}
7594 			break;
7595 		case 6: /* D6 vblank/vline */
7596 			switch (src_data) {
7597 			case 0: /* D6 vblank */
7598 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7599 					if (rdev->irq.crtc_vblank_int[5]) {
7600 						drm_handle_vblank(rdev->ddev, 5);
7601 						rdev->pm.vblank_sync = true;
7602 						wake_up(&rdev->irq.vblank_queue);
7603 					}
7604 					if (atomic_read(&rdev->irq.pflip[5]))
7605 						radeon_crtc_handle_vblank(rdev, 5);
7606 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7607 					DRM_DEBUG("IH: D6 vblank\n");
7608 				}
7609 				break;
7610 			case 1: /* D6 vline */
7611 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7612 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7613 					DRM_DEBUG("IH: D6 vline\n");
7614 				}
7615 				break;
7616 			default:
7617 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7618 				break;
7619 			}
7620 			break;
7621 		case 8: /* D1 page flip */
7622 		case 10: /* D2 page flip */
7623 		case 12: /* D3 page flip */
7624 		case 14: /* D4 page flip */
7625 		case 16: /* D5 page flip */
7626 		case 18: /* D6 page flip */
7627 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7628 			if (radeon_use_pflipirq > 0)
7629 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7630 			break;
7631 		case 42: /* HPD hotplug */
7632 			switch (src_data) {
7633 			case 0:
7634 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7635 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7636 					queue_hotplug = true;
7637 					DRM_DEBUG("IH: HPD1\n");
7638 				}
7639 				break;
7640 			case 1:
7641 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7642 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7643 					queue_hotplug = true;
7644 					DRM_DEBUG("IH: HPD2\n");
7645 				}
7646 				break;
7647 			case 2:
7648 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7649 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7650 					queue_hotplug = true;
7651 					DRM_DEBUG("IH: HPD3\n");
7652 				}
7653 				break;
7654 			case 3:
7655 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7656 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7657 					queue_hotplug = true;
7658 					DRM_DEBUG("IH: HPD4\n");
7659 				}
7660 				break;
7661 			case 4:
7662 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7663 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7664 					queue_hotplug = true;
7665 					DRM_DEBUG("IH: HPD5\n");
7666 				}
7667 				break;
7668 			case 5:
7669 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7670 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7671 					queue_hotplug = true;
7672 					DRM_DEBUG("IH: HPD6\n");
7673 				}
7674 				break;
7675 			default:
7676 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7677 				break;
7678 			}
7679 			break;
7680 		case 124: /* UVD */
7681 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7682 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7683 			break;
7684 		case 146:
7685 		case 147:
7686 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7687 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7688 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7689 			/* reset addr and status */
7690 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7691 			if (addr == 0x0 && status == 0x0)
7692 				break;
7693 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7694 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7695 				addr);
7696 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7697 				status);
7698 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7699 			break;
7700 		case 167: /* VCE */
7701 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7702 			switch (src_data) {
7703 			case 0:
7704 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7705 				break;
7706 			case 1:
7707 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7708 				break;
7709 			default:
7710 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7711 				break;
7712 			}
7713 			break;
7714 		case 176: /* GFX RB CP_INT */
7715 		case 177: /* GFX IB CP_INT */
7716 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7717 			break;
7718 		case 181: /* CP EOP event */
7719 			DRM_DEBUG("IH: CP EOP\n");
7720 			/* XXX check the bitfield order! */
7721 			me_id = (ring_id & 0x60) >> 5;
7722 			pipe_id = (ring_id & 0x18) >> 3;
7723 			queue_id = (ring_id & 0x7) >> 0;
7724 			switch (me_id) {
7725 			case 0:
7726 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7727 				break;
7728 			case 1:
7729 			case 2:
7730 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7731 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7732 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7733 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7734 				break;
7735 			}
7736 			break;
7737 		case 184: /* CP Privileged reg access */
7738 			DRM_ERROR("Illegal register access in command stream\n");
7739 			/* XXX check the bitfield order! */
7740 			me_id = (ring_id & 0x60) >> 5;
7741 			pipe_id = (ring_id & 0x18) >> 3;
7742 			queue_id = (ring_id & 0x7) >> 0;
7743 			switch (me_id) {
7744 			case 0:
7745 				/* This results in a full GPU reset, but all we need to do is soft
7746 				 * reset the CP for gfx
7747 				 */
7748 				queue_reset = true;
7749 				break;
7750 			case 1:
7751 				/* XXX compute */
7752 				queue_reset = true;
7753 				break;
7754 			case 2:
7755 				/* XXX compute */
7756 				queue_reset = true;
7757 				break;
7758 			}
7759 			break;
7760 		case 185: /* CP Privileged inst */
7761 			DRM_ERROR("Illegal instruction in command stream\n");
7762 			/* XXX check the bitfield order! */
7763 			me_id = (ring_id & 0x60) >> 5;
7764 			pipe_id = (ring_id & 0x18) >> 3;
7765 			queue_id = (ring_id & 0x7) >> 0;
7766 			switch (me_id) {
7767 			case 0:
7768 				/* This results in a full GPU reset, but all we need to do is soft
7769 				 * reset the CP for gfx
7770 				 */
7771 				queue_reset = true;
7772 				break;
7773 			case 1:
7774 				/* XXX compute */
7775 				queue_reset = true;
7776 				break;
7777 			case 2:
7778 				/* XXX compute */
7779 				queue_reset = true;
7780 				break;
7781 			}
7782 			break;
7783 		case 224: /* SDMA trap event */
7784 			/* XXX check the bitfield order! */
7785 			me_id = (ring_id & 0x3) >> 0;
7786 			queue_id = (ring_id & 0xc) >> 2;
7787 			DRM_DEBUG("IH: SDMA trap\n");
7788 			switch (me_id) {
7789 			case 0:
7790 				switch (queue_id) {
7791 				case 0:
7792 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7793 					break;
7794 				case 1:
7795 					/* XXX compute */
7796 					break;
7797 				case 2:
7798 					/* XXX compute */
7799 					break;
7800 				}
7801 				break;
7802 			case 1:
7803 				switch (queue_id) {
7804 				case 0:
7805 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7806 					break;
7807 				case 1:
7808 					/* XXX compute */
7809 					break;
7810 				case 2:
7811 					/* XXX compute */
7812 					break;
7813 				}
7814 				break;
7815 			}
7816 			break;
7817 		case 230: /* thermal low to high */
7818 			DRM_DEBUG("IH: thermal low to high\n");
7819 			rdev->pm.dpm.thermal.high_to_low = false;
7820 			queue_thermal = true;
7821 			break;
7822 		case 231: /* thermal high to low */
7823 			DRM_DEBUG("IH: thermal high to low\n");
7824 			rdev->pm.dpm.thermal.high_to_low = true;
7825 			queue_thermal = true;
7826 			break;
7827 		case 233: /* GUI IDLE */
7828 			DRM_DEBUG("IH: GUI idle\n");
7829 			break;
7830 		case 241: /* SDMA Privileged inst */
7831 		case 247: /* SDMA Privileged inst */
7832 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7833 			/* XXX check the bitfield order! */
7834 			me_id = (ring_id & 0x3) >> 0;
7835 			queue_id = (ring_id & 0xc) >> 2;
7836 			switch (me_id) {
7837 			case 0:
7838 				switch (queue_id) {
7839 				case 0:
7840 					queue_reset = true;
7841 					break;
7842 				case 1:
7843 					/* XXX compute */
7844 					queue_reset = true;
7845 					break;
7846 				case 2:
7847 					/* XXX compute */
7848 					queue_reset = true;
7849 					break;
7850 				}
7851 				break;
7852 			case 1:
7853 				switch (queue_id) {
7854 				case 0:
7855 					queue_reset = true;
7856 					break;
7857 				case 1:
7858 					/* XXX compute */
7859 					queue_reset = true;
7860 					break;
7861 				case 2:
7862 					/* XXX compute */
7863 					queue_reset = true;
7864 					break;
7865 				}
7866 				break;
7867 			}
7868 			break;
7869 		default:
7870 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7871 			break;
7872 		}
7873 
7874 		/* wptr/rptr are in bytes! */
7875 		rptr += 16;
7876 		rptr &= rdev->ih.ptr_mask;
7877 		WREG32(IH_RB_RPTR, rptr);
7878 	}
7879 	if (queue_hotplug)
7880 		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
7881 	if (queue_reset) {
7882 		rdev->needs_reset = true;
7883 		wake_up_all(&rdev->fence_queue);
7884 	}
7885 	if (queue_thermal)
7886 		taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
7887 	rdev->ih.rptr = rptr;
7888 	atomic_set(&rdev->ih.lock, 0);
7889 
7890 	/* make sure wptr hasn't changed while processing */
7891 	wptr = cik_get_ih_wptr(rdev);
7892 	if (wptr != rptr)
7893 		goto restart_ih;
7894 
7895 	return IRQ_HANDLED;
7896 }
7897 
7898 /*
7899  * startup/shutdown callbacks
7900  */
7901 /**
7902  * cik_startup - program the asic to a functional state
7903  *
7904  * @rdev: radeon_device pointer
7905  *
7906  * Programs the asic to a functional state (CIK).
7907  * Called by cik_init() and cik_resume().
7908  * Returns 0 for success, error for failure.
7909  */
7910 static int cik_startup(struct radeon_device *rdev)
7911 {
7912 	struct radeon_ring *ring;
7913 	u32 nop;
7914 	int r;
7915 
7916 	/* enable pcie gen2/3 link */
7917 	cik_pcie_gen3_enable(rdev);
7918 	/* enable aspm */
7919 	cik_program_aspm(rdev);
7920 
7921 	/* scratch needs to be initialized before MC */
7922 	r = r600_vram_scratch_init(rdev);
7923 	if (r)
7924 		return r;
7925 
7926 	cik_mc_program(rdev);
7927 
7928 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7929 		r = ci_mc_load_microcode(rdev);
7930 		if (r) {
7931 			DRM_ERROR("Failed to load MC firmware!\n");
7932 			return r;
7933 		}
7934 	}
7935 
7936 	r = cik_pcie_gart_enable(rdev);
7937 	if (r)
7938 		return r;
7939 	cik_gpu_init(rdev);
7940 
7941 	/* allocate rlc buffers */
7942 	if (rdev->flags & RADEON_IS_IGP) {
7943 		if (rdev->family == CHIP_KAVERI) {
7944 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7945 			rdev->rlc.reg_list_size =
7946 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7947 		} else {
7948 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7949 			rdev->rlc.reg_list_size =
7950 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7951 		}
7952 	}
7953 	rdev->rlc.cs_data = ci_cs_data;
7954 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7955 	r = sumo_rlc_init(rdev);
7956 	if (r) {
7957 		DRM_ERROR("Failed to init rlc BOs!\n");
7958 		return r;
7959 	}
7960 
7961 	/* allocate wb buffer */
7962 	r = radeon_wb_init(rdev);
7963 	if (r)
7964 		return r;
7965 
7966 	/* allocate mec buffers */
7967 	r = cik_mec_init(rdev);
7968 	if (r) {
7969 		DRM_ERROR("Failed to init MEC BOs!\n");
7970 		return r;
7971 	}
7972 
7973 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7974 	if (r) {
7975 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7976 		return r;
7977 	}
7978 
7979 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7980 	if (r) {
7981 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7982 		return r;
7983 	}
7984 
7985 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7986 	if (r) {
7987 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7988 		return r;
7989 	}
7990 
7991 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7992 	if (r) {
7993 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7994 		return r;
7995 	}
7996 
7997 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7998 	if (r) {
7999 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8000 		return r;
8001 	}
8002 
8003 	r = radeon_uvd_resume(rdev);
8004 	if (!r) {
8005 		r = uvd_v4_2_resume(rdev);
8006 		if (!r) {
8007 			r = radeon_fence_driver_start_ring(rdev,
8008 							   R600_RING_TYPE_UVD_INDEX);
8009 			if (r)
8010 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8011 		}
8012 	}
8013 	if (r)
8014 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8015 
8016 	r = radeon_vce_resume(rdev);
8017 	if (!r) {
8018 		r = vce_v2_0_resume(rdev);
8019 		if (!r)
8020 			r = radeon_fence_driver_start_ring(rdev,
8021 							   TN_RING_TYPE_VCE1_INDEX);
8022 		if (!r)
8023 			r = radeon_fence_driver_start_ring(rdev,
8024 							   TN_RING_TYPE_VCE2_INDEX);
8025 	}
8026 	if (r) {
8027 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8028 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8029 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8030 	}
8031 
8032 	/* Enable IRQ */
8033 	if (!rdev->irq.installed) {
8034 		r = radeon_irq_kms_init(rdev);
8035 		if (r)
8036 			return r;
8037 	}
8038 
8039 	r = cik_irq_init(rdev);
8040 	if (r) {
8041 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8042 		radeon_irq_kms_fini(rdev);
8043 		return r;
8044 	}
8045 	cik_irq_set(rdev);
8046 
8047 	if (rdev->family == CHIP_HAWAII) {
8048 		nop = RADEON_CP_PACKET2;
8049 	} else {
8050 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8051 	}
8052 
8053 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8054 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8055 			     nop);
8056 	if (r)
8057 		return r;
8058 
8059 	/* set up the compute queues */
8060 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8061 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8062 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8063 			     nop);
8064 	if (r)
8065 		return r;
8066 	ring->me = 1; /* first MEC */
8067 	ring->pipe = 0; /* first pipe */
8068 	ring->queue = 0; /* first queue */
8069 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8070 
8071 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8072 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8073 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8074 			     nop);
8075 	if (r)
8076 		return r;
8077 	/* dGPU only have 1 MEC */
8078 	ring->me = 1; /* first MEC */
8079 	ring->pipe = 0; /* first pipe */
8080 	ring->queue = 1; /* second queue */
8081 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8082 
8083 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8084 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8085 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8086 	if (r)
8087 		return r;
8088 
8089 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8090 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8091 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8092 	if (r)
8093 		return r;
8094 
8095 	r = cik_cp_resume(rdev);
8096 	if (r)
8097 		return r;
8098 
8099 	r = cik_sdma_resume(rdev);
8100 	if (r)
8101 		return r;
8102 
8103 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8104 	if (ring->ring_size) {
8105 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8106 				     RADEON_CP_PACKET2);
8107 		if (!r)
8108 			r = uvd_v1_0_init(rdev);
8109 		if (r)
8110 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8111 	}
8112 
8113 	r = -ENOENT;
8114 
8115 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8116 	if (ring->ring_size)
8117 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8118 				     VCE_CMD_NO_OP);
8119 
8120 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8121 	if (ring->ring_size)
8122 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8123 				     VCE_CMD_NO_OP);
8124 
8125 	if (!r)
8126 		r = vce_v1_0_init(rdev);
8127 	else if (r != -ENOENT)
8128 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8129 
8130 	r = radeon_ib_pool_init(rdev);
8131 	if (r) {
8132 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8133 		return r;
8134 	}
8135 
8136 	r = radeon_vm_manager_init(rdev);
8137 	if (r) {
8138 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8139 		return r;
8140 	}
8141 
8142 	r = dce6_audio_init(rdev);
8143 	if (r)
8144 		return r;
8145 
8146 	return 0;
8147 }
8148 
8149 /**
8150  * cik_resume - resume the asic to a functional state
8151  *
8152  * @rdev: radeon_device pointer
8153  *
8154  * Programs the asic to a functional state (CIK).
8155  * Called at resume.
8156  * Returns 0 for success, error for failure.
8157  */
8158 int cik_resume(struct radeon_device *rdev)
8159 {
8160 	int r;
8161 
8162 	/* post card */
8163 	atom_asic_init(rdev->mode_info.atom_context);
8164 
8165 	/* init golden registers */
8166 	cik_init_golden_registers(rdev);
8167 
8168 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8169 		radeon_pm_resume(rdev);
8170 
8171 	rdev->accel_working = true;
8172 	r = cik_startup(rdev);
8173 	if (r) {
8174 		DRM_ERROR("cik startup failed on resume\n");
8175 		rdev->accel_working = false;
8176 		return r;
8177 	}
8178 
8179 	return r;
8180 
8181 }
8182 
8183 /**
8184  * cik_suspend - suspend the asic
8185  *
8186  * @rdev: radeon_device pointer
8187  *
8188  * Bring the chip into a state suitable for suspend (CIK).
8189  * Called at suspend.
8190  * Returns 0 for success.
8191  */
8192 int cik_suspend(struct radeon_device *rdev)
8193 {
8194 	radeon_pm_suspend(rdev);
8195 	dce6_audio_fini(rdev);
8196 	radeon_vm_manager_fini(rdev);
8197 	cik_cp_enable(rdev, false);
8198 	cik_sdma_enable(rdev, false);
8199 	uvd_v1_0_fini(rdev);
8200 	radeon_uvd_suspend(rdev);
8201 	radeon_vce_suspend(rdev);
8202 	cik_fini_pg(rdev);
8203 	cik_fini_cg(rdev);
8204 	cik_irq_suspend(rdev);
8205 	radeon_wb_disable(rdev);
8206 	cik_pcie_gart_disable(rdev);
8207 	return 0;
8208 }
8209 
8210 /* Plan is to move initialization in that function and use
8211  * helper function so that radeon_device_init pretty much
8212  * do nothing more than calling asic specific function. This
8213  * should also allow to remove a bunch of callback function
8214  * like vram_info.
8215  */
8216 /**
8217  * cik_init - asic specific driver and hw init
8218  *
8219  * @rdev: radeon_device pointer
8220  *
8221  * Setup asic specific driver variables and program the hw
8222  * to a functional state (CIK).
8223  * Called at driver startup.
8224  * Returns 0 for success, errors for failure.
8225  */
8226 int cik_init(struct radeon_device *rdev)
8227 {
8228 	struct radeon_ring *ring;
8229 	int r;
8230 
8231 	/* Read BIOS */
8232 	if (!radeon_get_bios(rdev)) {
8233 		if (ASIC_IS_AVIVO(rdev))
8234 			return -EINVAL;
8235 	}
8236 	/* Must be an ATOMBIOS */
8237 	if (!rdev->is_atom_bios) {
8238 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8239 		return -EINVAL;
8240 	}
8241 	r = radeon_atombios_init(rdev);
8242 	if (r)
8243 		return r;
8244 
8245 	/* Post card if necessary */
8246 	if (!radeon_card_posted(rdev)) {
8247 		if (!rdev->bios) {
8248 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8249 			return -EINVAL;
8250 		}
8251 		DRM_INFO("GPU not posted. posting now...\n");
8252 		atom_asic_init(rdev->mode_info.atom_context);
8253 	}
8254 	/* init golden registers */
8255 	cik_init_golden_registers(rdev);
8256 	/* Initialize scratch registers */
8257 	cik_scratch_init(rdev);
8258 	/* Initialize surface registers */
8259 	radeon_surface_init(rdev);
8260 	/* Initialize clocks */
8261 	radeon_get_clock_info(rdev->ddev);
8262 
8263 	/* Fence driver */
8264 	r = radeon_fence_driver_init(rdev);
8265 	if (r)
8266 		return r;
8267 
8268 	/* initialize memory controller */
8269 	r = cik_mc_init(rdev);
8270 	if (r)
8271 		return r;
8272 	/* Memory manager */
8273 	r = radeon_bo_init(rdev);
8274 	if (r)
8275 		return r;
8276 
8277 	if (rdev->flags & RADEON_IS_IGP) {
8278 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8279 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8280 			r = cik_init_microcode(rdev);
8281 			if (r) {
8282 				DRM_ERROR("Failed to load firmware!\n");
8283 				return r;
8284 			}
8285 		}
8286 	} else {
8287 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8288 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8289 		    !rdev->mc_fw) {
8290 			r = cik_init_microcode(rdev);
8291 			if (r) {
8292 				DRM_ERROR("Failed to load firmware!\n");
8293 				return r;
8294 			}
8295 		}
8296 	}
8297 
8298 	/* Initialize power management */
8299 	radeon_pm_init(rdev);
8300 
8301 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8302 	ring->ring_obj = NULL;
8303 	r600_ring_init(rdev, ring, 1024 * 1024);
8304 
8305 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8306 	ring->ring_obj = NULL;
8307 	r600_ring_init(rdev, ring, 1024 * 1024);
8308 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8309 	if (r)
8310 		return r;
8311 
8312 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8313 	ring->ring_obj = NULL;
8314 	r600_ring_init(rdev, ring, 1024 * 1024);
8315 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8316 	if (r)
8317 		return r;
8318 
8319 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8320 	ring->ring_obj = NULL;
8321 	r600_ring_init(rdev, ring, 256 * 1024);
8322 
8323 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8324 	ring->ring_obj = NULL;
8325 	r600_ring_init(rdev, ring, 256 * 1024);
8326 
8327 	r = radeon_uvd_init(rdev);
8328 	if (!r) {
8329 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8330 		ring->ring_obj = NULL;
8331 		r600_ring_init(rdev, ring, 4096);
8332 	}
8333 
8334 	r = radeon_vce_init(rdev);
8335 	if (!r) {
8336 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8337 		ring->ring_obj = NULL;
8338 		r600_ring_init(rdev, ring, 4096);
8339 
8340 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8341 		ring->ring_obj = NULL;
8342 		r600_ring_init(rdev, ring, 4096);
8343 	}
8344 
8345 	rdev->ih.ring_obj = NULL;
8346 	r600_ih_ring_init(rdev, 64 * 1024);
8347 
8348 	r = r600_pcie_gart_init(rdev);
8349 	if (r)
8350 		return r;
8351 
8352 	rdev->accel_working = true;
8353 	r = cik_startup(rdev);
8354 	if (r) {
8355 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8356 		cik_cp_fini(rdev);
8357 		cik_sdma_fini(rdev);
8358 		cik_irq_fini(rdev);
8359 		sumo_rlc_fini(rdev);
8360 		cik_mec_fini(rdev);
8361 		radeon_wb_fini(rdev);
8362 		radeon_ib_pool_fini(rdev);
8363 		radeon_vm_manager_fini(rdev);
8364 		radeon_irq_kms_fini(rdev);
8365 		cik_pcie_gart_fini(rdev);
8366 		rdev->accel_working = false;
8367 	}
8368 
8369 	/* Don't start up if the MC ucode is missing.
8370 	 * The default clocks and voltages before the MC ucode
8371 	 * is loaded are not suffient for advanced operations.
8372 	 */
8373 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8374 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8375 		return -EINVAL;
8376 	}
8377 
8378 	return 0;
8379 }
8380 
8381 /**
8382  * cik_fini - asic specific driver and hw fini
8383  *
8384  * @rdev: radeon_device pointer
8385  *
8386  * Tear down the asic specific driver variables and program the hw
8387  * to an idle state (CIK).
8388  * Called at driver unload.
8389  */
8390 void cik_fini(struct radeon_device *rdev)
8391 {
8392 	radeon_pm_fini(rdev);
8393 	cik_cp_fini(rdev);
8394 	cik_sdma_fini(rdev);
8395 	cik_fini_pg(rdev);
8396 	cik_fini_cg(rdev);
8397 	cik_irq_fini(rdev);
8398 	sumo_rlc_fini(rdev);
8399 	cik_mec_fini(rdev);
8400 	radeon_wb_fini(rdev);
8401 	radeon_vm_manager_fini(rdev);
8402 	radeon_ib_pool_fini(rdev);
8403 	radeon_irq_kms_fini(rdev);
8404 	uvd_v1_0_fini(rdev);
8405 	radeon_uvd_fini(rdev);
8406 	radeon_vce_fini(rdev);
8407 	cik_pcie_gart_fini(rdev);
8408 	r600_vram_scratch_fini(rdev);
8409 	radeon_gem_fini(rdev);
8410 	radeon_fence_driver_fini(rdev);
8411 	radeon_bo_fini(rdev);
8412 	radeon_atombios_fini(rdev);
8413 	kfree(rdev->bios);
8414 	rdev->bios = NULL;
8415 }
8416 
8417 void dce8_program_fmt(struct drm_encoder *encoder)
8418 {
8419 	struct drm_device *dev = encoder->dev;
8420 	struct radeon_device *rdev = dev->dev_private;
8421 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8422 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8423 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8424 	int bpc = 0;
8425 	u32 tmp = 0;
8426 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8427 
8428 	if (connector) {
8429 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8430 		bpc = radeon_get_monitor_bpc(connector);
8431 		dither = radeon_connector->dither;
8432 	}
8433 
8434 	/* LVDS/eDP FMT is set up by atom */
8435 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8436 		return;
8437 
8438 	/* not needed for analog */
8439 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8440 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8441 		return;
8442 
8443 	if (bpc == 0)
8444 		return;
8445 
8446 	switch (bpc) {
8447 	case 6:
8448 		if (dither == RADEON_FMT_DITHER_ENABLE)
8449 			/* XXX sort out optimal dither settings */
8450 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8451 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8452 		else
8453 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8454 		break;
8455 	case 8:
8456 		if (dither == RADEON_FMT_DITHER_ENABLE)
8457 			/* XXX sort out optimal dither settings */
8458 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8459 				FMT_RGB_RANDOM_ENABLE |
8460 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8461 		else
8462 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8463 		break;
8464 	case 10:
8465 		if (dither == RADEON_FMT_DITHER_ENABLE)
8466 			/* XXX sort out optimal dither settings */
8467 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8468 				FMT_RGB_RANDOM_ENABLE |
8469 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8470 		else
8471 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8472 		break;
8473 	default:
8474 		/* not needed */
8475 		break;
8476 	}
8477 
8478 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8479 }
8480 
8481 /* display watermark setup */
8482 /**
8483  * dce8_line_buffer_adjust - Set up the line buffer
8484  *
8485  * @rdev: radeon_device pointer
8486  * @radeon_crtc: the selected display controller
8487  * @mode: the current display mode on the selected display
8488  * controller
8489  *
8490  * Setup up the line buffer allocation for
8491  * the selected display controller (CIK).
8492  * Returns the line buffer size in pixels.
8493  */
8494 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8495 				   struct radeon_crtc *radeon_crtc,
8496 				   struct drm_display_mode *mode)
8497 {
8498 	u32 tmp, buffer_alloc, i;
8499 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8500 	/*
8501 	 * Line Buffer Setup
8502 	 * There are 6 line buffers, one for each display controllers.
8503 	 * There are 3 partitions per LB. Select the number of partitions
8504 	 * to enable based on the display width.  For display widths larger
8505 	 * than 4096, you need use to use 2 display controllers and combine
8506 	 * them using the stereo blender.
8507 	 */
8508 	if (radeon_crtc->base.enabled && mode) {
8509 		if (mode->crtc_hdisplay < 1920) {
8510 			tmp = 1;
8511 			buffer_alloc = 2;
8512 		} else if (mode->crtc_hdisplay < 2560) {
8513 			tmp = 2;
8514 			buffer_alloc = 2;
8515 		} else if (mode->crtc_hdisplay < 4096) {
8516 			tmp = 0;
8517 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8518 		} else {
8519 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8520 			tmp = 0;
8521 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8522 		}
8523 	} else {
8524 		tmp = 1;
8525 		buffer_alloc = 0;
8526 	}
8527 
8528 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8529 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8530 
8531 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8532 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8533 	for (i = 0; i < rdev->usec_timeout; i++) {
8534 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8535 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8536 			break;
8537 		udelay(1);
8538 	}
8539 
8540 	if (radeon_crtc->base.enabled && mode) {
8541 		switch (tmp) {
8542 		case 0:
8543 		default:
8544 			return 4096 * 2;
8545 		case 1:
8546 			return 1920 * 2;
8547 		case 2:
8548 			return 2560 * 2;
8549 		}
8550 	}
8551 
8552 	/* controller not enabled, so no lb used */
8553 	return 0;
8554 }
8555 
8556 /**
8557  * cik_get_number_of_dram_channels - get the number of dram channels
8558  *
8559  * @rdev: radeon_device pointer
8560  *
8561  * Look up the number of video ram channels (CIK).
8562  * Used for display watermark bandwidth calculations
8563  * Returns the number of dram channels
8564  */
8565 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8566 {
8567 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8568 
8569 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8570 	case 0:
8571 	default:
8572 		return 1;
8573 	case 1:
8574 		return 2;
8575 	case 2:
8576 		return 4;
8577 	case 3:
8578 		return 8;
8579 	case 4:
8580 		return 3;
8581 	case 5:
8582 		return 6;
8583 	case 6:
8584 		return 10;
8585 	case 7:
8586 		return 12;
8587 	case 8:
8588 		return 16;
8589 	}
8590 }
8591 
8592 struct dce8_wm_params {
8593 	u32 dram_channels; /* number of dram channels */
8594 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8595 	u32 sclk;          /* engine clock in kHz */
8596 	u32 disp_clk;      /* display clock in kHz */
8597 	u32 src_width;     /* viewport width */
8598 	u32 active_time;   /* active display time in ns */
8599 	u32 blank_time;    /* blank time in ns */
8600 	bool interlaced;    /* mode is interlaced */
8601 	fixed20_12 vsc;    /* vertical scale ratio */
8602 	u32 num_heads;     /* number of active crtcs */
8603 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8604 	u32 lb_size;       /* line buffer allocated to pipe */
8605 	u32 vtaps;         /* vertical scaler taps */
8606 };
8607 
8608 /**
8609  * dce8_dram_bandwidth - get the dram bandwidth
8610  *
8611  * @wm: watermark calculation data
8612  *
8613  * Calculate the raw dram bandwidth (CIK).
8614  * Used for display watermark bandwidth calculations
8615  * Returns the dram bandwidth in MBytes/s
8616  */
8617 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8618 {
8619 	/* Calculate raw DRAM Bandwidth */
8620 	fixed20_12 dram_efficiency; /* 0.7 */
8621 	fixed20_12 yclk, dram_channels, bandwidth;
8622 	fixed20_12 a;
8623 
8624 	a.full = dfixed_const(1000);
8625 	yclk.full = dfixed_const(wm->yclk);
8626 	yclk.full = dfixed_div(yclk, a);
8627 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8628 	a.full = dfixed_const(10);
8629 	dram_efficiency.full = dfixed_const(7);
8630 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8631 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8632 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8633 
8634 	return dfixed_trunc(bandwidth);
8635 }
8636 
8637 /**
8638  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8639  *
8640  * @wm: watermark calculation data
8641  *
8642  * Calculate the dram bandwidth used for display (CIK).
8643  * Used for display watermark bandwidth calculations
8644  * Returns the dram bandwidth for display in MBytes/s
8645  */
8646 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8647 {
8648 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8649 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8650 	fixed20_12 yclk, dram_channels, bandwidth;
8651 	fixed20_12 a;
8652 
8653 	a.full = dfixed_const(1000);
8654 	yclk.full = dfixed_const(wm->yclk);
8655 	yclk.full = dfixed_div(yclk, a);
8656 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8657 	a.full = dfixed_const(10);
8658 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8659 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8660 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8661 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8662 
8663 	return dfixed_trunc(bandwidth);
8664 }
8665 
8666 /**
8667  * dce8_data_return_bandwidth - get the data return bandwidth
8668  *
8669  * @wm: watermark calculation data
8670  *
8671  * Calculate the data return bandwidth used for display (CIK).
8672  * Used for display watermark bandwidth calculations
8673  * Returns the data return bandwidth in MBytes/s
8674  */
8675 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8676 {
8677 	/* Calculate the display Data return Bandwidth */
8678 	fixed20_12 return_efficiency; /* 0.8 */
8679 	fixed20_12 sclk, bandwidth;
8680 	fixed20_12 a;
8681 
8682 	a.full = dfixed_const(1000);
8683 	sclk.full = dfixed_const(wm->sclk);
8684 	sclk.full = dfixed_div(sclk, a);
8685 	a.full = dfixed_const(10);
8686 	return_efficiency.full = dfixed_const(8);
8687 	return_efficiency.full = dfixed_div(return_efficiency, a);
8688 	a.full = dfixed_const(32);
8689 	bandwidth.full = dfixed_mul(a, sclk);
8690 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8691 
8692 	return dfixed_trunc(bandwidth);
8693 }
8694 
8695 /**
8696  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8697  *
8698  * @wm: watermark calculation data
8699  *
8700  * Calculate the dmif bandwidth used for display (CIK).
8701  * Used for display watermark bandwidth calculations
8702  * Returns the dmif bandwidth in MBytes/s
8703  */
8704 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8705 {
8706 	/* Calculate the DMIF Request Bandwidth */
8707 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8708 	fixed20_12 disp_clk, bandwidth;
8709 	fixed20_12 a, b;
8710 
8711 	a.full = dfixed_const(1000);
8712 	disp_clk.full = dfixed_const(wm->disp_clk);
8713 	disp_clk.full = dfixed_div(disp_clk, a);
8714 	a.full = dfixed_const(32);
8715 	b.full = dfixed_mul(a, disp_clk);
8716 
8717 	a.full = dfixed_const(10);
8718 	disp_clk_request_efficiency.full = dfixed_const(8);
8719 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8720 
8721 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8722 
8723 	return dfixed_trunc(bandwidth);
8724 }
8725 
8726 /**
8727  * dce8_available_bandwidth - get the min available bandwidth
8728  *
8729  * @wm: watermark calculation data
8730  *
8731  * Calculate the min available bandwidth used for display (CIK).
8732  * Used for display watermark bandwidth calculations
8733  * Returns the min available bandwidth in MBytes/s
8734  */
8735 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8736 {
8737 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8738 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8739 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8740 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8741 
8742 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8743 }
8744 
8745 /**
8746  * dce8_average_bandwidth - get the average available bandwidth
8747  *
8748  * @wm: watermark calculation data
8749  *
8750  * Calculate the average available bandwidth used for display (CIK).
8751  * Used for display watermark bandwidth calculations
8752  * Returns the average available bandwidth in MBytes/s
8753  */
8754 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8755 {
8756 	/* Calculate the display mode Average Bandwidth
8757 	 * DisplayMode should contain the source and destination dimensions,
8758 	 * timing, etc.
8759 	 */
8760 	fixed20_12 bpp;
8761 	fixed20_12 line_time;
8762 	fixed20_12 src_width;
8763 	fixed20_12 bandwidth;
8764 	fixed20_12 a;
8765 
8766 	a.full = dfixed_const(1000);
8767 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8768 	line_time.full = dfixed_div(line_time, a);
8769 	bpp.full = dfixed_const(wm->bytes_per_pixel);
8770 	src_width.full = dfixed_const(wm->src_width);
8771 	bandwidth.full = dfixed_mul(src_width, bpp);
8772 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8773 	bandwidth.full = dfixed_div(bandwidth, line_time);
8774 
8775 	return dfixed_trunc(bandwidth);
8776 }
8777 
8778 /**
8779  * dce8_latency_watermark - get the latency watermark
8780  *
8781  * @wm: watermark calculation data
8782  *
8783  * Calculate the latency watermark (CIK).
8784  * Used for display watermark bandwidth calculations
8785  * Returns the latency watermark in ns
8786  */
8787 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8788 {
8789 	/* First calculate the latency in ns */
8790 	u32 mc_latency = 2000; /* 2000 ns. */
8791 	u32 available_bandwidth = dce8_available_bandwidth(wm);
8792 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8793 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8794 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8795 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8796 		(wm->num_heads * cursor_line_pair_return_time);
8797 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8798 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8799 	u32 tmp, dmif_size = 12288;
8800 	fixed20_12 a, b, c;
8801 
8802 	if (wm->num_heads == 0)
8803 		return 0;
8804 
8805 	a.full = dfixed_const(2);
8806 	b.full = dfixed_const(1);
8807 	if ((wm->vsc.full > a.full) ||
8808 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8809 	    (wm->vtaps >= 5) ||
8810 	    ((wm->vsc.full >= a.full) && wm->interlaced))
8811 		max_src_lines_per_dst_line = 4;
8812 	else
8813 		max_src_lines_per_dst_line = 2;
8814 
8815 	a.full = dfixed_const(available_bandwidth);
8816 	b.full = dfixed_const(wm->num_heads);
8817 	a.full = dfixed_div(a, b);
8818 
8819 	b.full = dfixed_const(mc_latency + 512);
8820 	c.full = dfixed_const(wm->disp_clk);
8821 	b.full = dfixed_div(b, c);
8822 
8823 	c.full = dfixed_const(dmif_size);
8824 	b.full = dfixed_div(c, b);
8825 
8826 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8827 
8828 	b.full = dfixed_const(1000);
8829 	c.full = dfixed_const(wm->disp_clk);
8830 	b.full = dfixed_div(c, b);
8831 	c.full = dfixed_const(wm->bytes_per_pixel);
8832 	b.full = dfixed_mul(b, c);
8833 
8834 	lb_fill_bw = min(tmp, dfixed_trunc(b));
8835 
8836 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8837 	b.full = dfixed_const(1000);
8838 	c.full = dfixed_const(lb_fill_bw);
8839 	b.full = dfixed_div(c, b);
8840 	a.full = dfixed_div(a, b);
8841 	line_fill_time = dfixed_trunc(a);
8842 
8843 	if (line_fill_time < wm->active_time)
8844 		return latency;
8845 	else
8846 		return latency + (line_fill_time - wm->active_time);
8847 
8848 }
8849 
8850 /**
8851  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8852  * average and available dram bandwidth
8853  *
8854  * @wm: watermark calculation data
8855  *
8856  * Check if the display average bandwidth fits in the display
8857  * dram bandwidth (CIK).
8858  * Used for display watermark bandwidth calculations
8859  * Returns true if the display fits, false if not.
8860  */
8861 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8862 {
8863 	if (dce8_average_bandwidth(wm) <=
8864 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8865 		return true;
8866 	else
8867 		return false;
8868 }
8869 
8870 /**
8871  * dce8_average_bandwidth_vs_available_bandwidth - check
8872  * average and available bandwidth
8873  *
8874  * @wm: watermark calculation data
8875  *
8876  * Check if the display average bandwidth fits in the display
8877  * available bandwidth (CIK).
8878  * Used for display watermark bandwidth calculations
8879  * Returns true if the display fits, false if not.
8880  */
8881 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8882 {
8883 	if (dce8_average_bandwidth(wm) <=
8884 	    (dce8_available_bandwidth(wm) / wm->num_heads))
8885 		return true;
8886 	else
8887 		return false;
8888 }
8889 
8890 /**
8891  * dce8_check_latency_hiding - check latency hiding
8892  *
8893  * @wm: watermark calculation data
8894  *
8895  * Check latency hiding (CIK).
8896  * Used for display watermark bandwidth calculations
8897  * Returns true if the display fits, false if not.
8898  */
8899 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8900 {
8901 	u32 lb_partitions = wm->lb_size / wm->src_width;
8902 	u32 line_time = wm->active_time + wm->blank_time;
8903 	u32 latency_tolerant_lines;
8904 	u32 latency_hiding;
8905 	fixed20_12 a;
8906 
8907 	a.full = dfixed_const(1);
8908 	if (wm->vsc.full > a.full)
8909 		latency_tolerant_lines = 1;
8910 	else {
8911 		if (lb_partitions <= (wm->vtaps + 1))
8912 			latency_tolerant_lines = 1;
8913 		else
8914 			latency_tolerant_lines = 2;
8915 	}
8916 
8917 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8918 
8919 	if (dce8_latency_watermark(wm) <= latency_hiding)
8920 		return true;
8921 	else
8922 		return false;
8923 }
8924 
8925 /**
8926  * dce8_program_watermarks - program display watermarks
8927  *
8928  * @rdev: radeon_device pointer
8929  * @radeon_crtc: the selected display controller
8930  * @lb_size: line buffer size
8931  * @num_heads: number of display controllers in use
8932  *
8933  * Calculate and program the display watermarks for the
8934  * selected display controller (CIK).
8935  */
8936 static void dce8_program_watermarks(struct radeon_device *rdev,
8937 				    struct radeon_crtc *radeon_crtc,
8938 				    u32 lb_size, u32 num_heads)
8939 {
8940 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8941 	struct dce8_wm_params wm_low, wm_high;
8942 	u32 pixel_period;
8943 	u32 line_time = 0;
8944 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8945 	u32 tmp, wm_mask;
8946 
8947 	if (radeon_crtc->base.enabled && num_heads && mode) {
8948 		pixel_period = 1000000 / (u32)mode->clock;
8949 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8950 
8951 		/* watermark for high clocks */
8952 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8953 		    rdev->pm.dpm_enabled) {
8954 			wm_high.yclk =
8955 				radeon_dpm_get_mclk(rdev, false) * 10;
8956 			wm_high.sclk =
8957 				radeon_dpm_get_sclk(rdev, false) * 10;
8958 		} else {
8959 			wm_high.yclk = rdev->pm.current_mclk * 10;
8960 			wm_high.sclk = rdev->pm.current_sclk * 10;
8961 		}
8962 
8963 		wm_high.disp_clk = mode->clock;
8964 		wm_high.src_width = mode->crtc_hdisplay;
8965 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8966 		wm_high.blank_time = line_time - wm_high.active_time;
8967 		wm_high.interlaced = false;
8968 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8969 			wm_high.interlaced = true;
8970 		wm_high.vsc = radeon_crtc->vsc;
8971 		wm_high.vtaps = 1;
8972 		if (radeon_crtc->rmx_type != RMX_OFF)
8973 			wm_high.vtaps = 2;
8974 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8975 		wm_high.lb_size = lb_size;
8976 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8977 		wm_high.num_heads = num_heads;
8978 
8979 		/* set for high clocks */
8980 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8981 
8982 		/* possibly force display priority to high */
8983 		/* should really do this at mode validation time... */
8984 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8985 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8986 		    !dce8_check_latency_hiding(&wm_high) ||
8987 		    (rdev->disp_priority == 2)) {
8988 			DRM_DEBUG_KMS("force priority to high\n");
8989 		}
8990 
8991 		/* watermark for low clocks */
8992 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8993 		    rdev->pm.dpm_enabled) {
8994 			wm_low.yclk =
8995 				radeon_dpm_get_mclk(rdev, true) * 10;
8996 			wm_low.sclk =
8997 				radeon_dpm_get_sclk(rdev, true) * 10;
8998 		} else {
8999 			wm_low.yclk = rdev->pm.current_mclk * 10;
9000 			wm_low.sclk = rdev->pm.current_sclk * 10;
9001 		}
9002 
9003 		wm_low.disp_clk = mode->clock;
9004 		wm_low.src_width = mode->crtc_hdisplay;
9005 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9006 		wm_low.blank_time = line_time - wm_low.active_time;
9007 		wm_low.interlaced = false;
9008 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9009 			wm_low.interlaced = true;
9010 		wm_low.vsc = radeon_crtc->vsc;
9011 		wm_low.vtaps = 1;
9012 		if (radeon_crtc->rmx_type != RMX_OFF)
9013 			wm_low.vtaps = 2;
9014 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9015 		wm_low.lb_size = lb_size;
9016 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9017 		wm_low.num_heads = num_heads;
9018 
9019 		/* set for low clocks */
9020 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9021 
9022 		/* possibly force display priority to high */
9023 		/* should really do this at mode validation time... */
9024 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9025 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9026 		    !dce8_check_latency_hiding(&wm_low) ||
9027 		    (rdev->disp_priority == 2)) {
9028 			DRM_DEBUG_KMS("force priority to high\n");
9029 		}
9030 	}
9031 
9032 	/* select wm A */
9033 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9034 	tmp = wm_mask;
9035 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9036 	tmp |= LATENCY_WATERMARK_MASK(1);
9037 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9038 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9039 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9040 		LATENCY_HIGH_WATERMARK(line_time)));
9041 	/* select wm B */
9042 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9043 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9044 	tmp |= LATENCY_WATERMARK_MASK(2);
9045 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9046 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9047 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9048 		LATENCY_HIGH_WATERMARK(line_time)));
9049 	/* restore original selection */
9050 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9051 
9052 	/* save values for DPM */
9053 	radeon_crtc->line_time = line_time;
9054 	radeon_crtc->wm_high = latency_watermark_a;
9055 	radeon_crtc->wm_low = latency_watermark_b;
9056 }
9057 
9058 /**
9059  * dce8_bandwidth_update - program display watermarks
9060  *
9061  * @rdev: radeon_device pointer
9062  *
9063  * Calculate and program the display watermarks and line
9064  * buffer allocation (CIK).
9065  */
9066 void dce8_bandwidth_update(struct radeon_device *rdev)
9067 {
9068 	struct drm_display_mode *mode = NULL;
9069 	u32 num_heads = 0, lb_size;
9070 	int i;
9071 
9072 	if (!rdev->mode_info.mode_config_initialized)
9073 		return;
9074 
9075 	radeon_update_display_priority(rdev);
9076 
9077 	for (i = 0; i < rdev->num_crtc; i++) {
9078 		if (rdev->mode_info.crtcs[i]->base.enabled)
9079 			num_heads++;
9080 	}
9081 	for (i = 0; i < rdev->num_crtc; i++) {
9082 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9083 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9084 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9085 	}
9086 }
9087 
9088 /**
9089  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9090  *
9091  * @rdev: radeon_device pointer
9092  *
9093  * Fetches a GPU clock counter snapshot (SI).
9094  * Returns the 64 bit clock counter snapshot.
9095  */
9096 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9097 {
9098 	uint64_t clock;
9099 
9100 	spin_lock(&rdev->gpu_clock_mutex);
9101 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9102 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9103 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9104 	spin_unlock(&rdev->gpu_clock_mutex);
9105 	return clock;
9106 }
9107 
9108 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9109                               u32 cntl_reg, u32 status_reg)
9110 {
9111 	int r, i;
9112 	struct atom_clock_dividers dividers;
9113 	uint32_t tmp;
9114 
9115 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9116 					   clock, false, &dividers);
9117 	if (r)
9118 		return r;
9119 
9120 	tmp = RREG32_SMC(cntl_reg);
9121 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9122 	tmp |= dividers.post_divider;
9123 	WREG32_SMC(cntl_reg, tmp);
9124 
9125 	for (i = 0; i < 100; i++) {
9126 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9127 			break;
9128 		mdelay(10);
9129 	}
9130 	if (i == 100)
9131 		return -ETIMEDOUT;
9132 
9133 	return 0;
9134 }
9135 
9136 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9137 {
9138 	int r = 0;
9139 
9140 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9141 	if (r)
9142 		return r;
9143 
9144 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9145 	return r;
9146 }
9147 
9148 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9149 {
9150 	int r, i;
9151 	struct atom_clock_dividers dividers;
9152 	u32 tmp;
9153 
9154 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9155 					   ecclk, false, &dividers);
9156 	if (r)
9157 		return r;
9158 
9159 	for (i = 0; i < 100; i++) {
9160 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9161 			break;
9162 		mdelay(10);
9163 	}
9164 	if (i == 100)
9165 		return -ETIMEDOUT;
9166 
9167 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9168 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9169 	tmp |= dividers.post_divider;
9170 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9171 
9172 	for (i = 0; i < 100; i++) {
9173 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9174 			break;
9175 		mdelay(10);
9176 	}
9177 	if (i == 100)
9178 		return -ETIMEDOUT;
9179 
9180 	return 0;
9181 }
9182 
9183 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9184 {
9185 	struct pci_dev *root = rdev->pdev->bus->self;
9186 	int bridge_pos, gpu_pos;
9187 	u32 speed_cntl, mask, current_data_rate;
9188 	int ret, i;
9189 	u16 tmp16;
9190 
9191 	if (radeon_pcie_gen2 == 0)
9192 		return;
9193 
9194 	if (rdev->flags & RADEON_IS_IGP)
9195 		return;
9196 
9197 	if (!(rdev->flags & RADEON_IS_PCIE))
9198 		return;
9199 
9200 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9201 	if (ret != 0)
9202 		return;
9203 
9204 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9205 		return;
9206 
9207 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9208 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9209 		LC_CURRENT_DATA_RATE_SHIFT;
9210 	if (mask & DRM_PCIE_SPEED_80) {
9211 		if (current_data_rate == 2) {
9212 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9213 			return;
9214 		}
9215 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9216 	} else if (mask & DRM_PCIE_SPEED_50) {
9217 		if (current_data_rate == 1) {
9218 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9219 			return;
9220 		}
9221 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9222 	}
9223 
9224 	bridge_pos = pci_get_pciecap_ptr(root->dev);
9225 	if (!bridge_pos)
9226 		return;
9227 
9228 	gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev);
9229 	if (!gpu_pos)
9230 		return;
9231 
9232 	if (mask & DRM_PCIE_SPEED_80) {
9233 		/* re-try equalization if gen3 is not already enabled */
9234 		if (current_data_rate != 2) {
9235 			u16 bridge_cfg, gpu_cfg;
9236 			u16 bridge_cfg2, gpu_cfg2;
9237 			u32 max_lw, current_lw, tmp;
9238 
9239 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9240 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9241 
9242 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9243 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9244 
9245 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9246 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9247 
9248 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9249 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9250 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9251 
9252 			if (current_lw < max_lw) {
9253 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9254 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9255 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9256 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9257 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9258 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9259 				}
9260 			}
9261 
9262 			for (i = 0; i < 10; i++) {
9263 				/* check status */
9264 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9265 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9266 					break;
9267 
9268 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9269 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9270 
9271 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9272 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9273 
9274 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9275 				tmp |= LC_SET_QUIESCE;
9276 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9277 
9278 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9279 				tmp |= LC_REDO_EQ;
9280 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9281 
9282 				mdelay(100);
9283 
9284 				/* linkctl */
9285 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9286 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9287 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9288 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9289 
9290 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9291 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9292 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9293 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9294 
9295 				/* linkctl2 */
9296 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9297 				tmp16 &= ~((1 << 4) | (7 << 9));
9298 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9299 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9300 
9301 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9302 				tmp16 &= ~((1 << 4) | (7 << 9));
9303 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9304 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9305 
9306 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9307 				tmp &= ~LC_SET_QUIESCE;
9308 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9309 			}
9310 		}
9311 	}
9312 
9313 	/* set the link speed */
9314 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9315 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9316 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9317 
9318 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9319 	tmp16 &= ~0xf;
9320 	if (mask & DRM_PCIE_SPEED_80)
9321 		tmp16 |= 3; /* gen3 */
9322 	else if (mask & DRM_PCIE_SPEED_50)
9323 		tmp16 |= 2; /* gen2 */
9324 	else
9325 		tmp16 |= 1; /* gen1 */
9326 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9327 
9328 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9329 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9330 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9331 
9332 	for (i = 0; i < rdev->usec_timeout; i++) {
9333 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9334 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9335 			break;
9336 		udelay(1);
9337 	}
9338 }
9339 
9340 static void cik_program_aspm(struct radeon_device *rdev)
9341 {
9342 	u32 data, orig;
9343 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9344 	bool disable_clkreq = false;
9345 
9346 	if (radeon_aspm == 0)
9347 		return;
9348 
9349 	/* XXX double check IGPs */
9350 	if (rdev->flags & RADEON_IS_IGP)
9351 		return;
9352 
9353 	if (!(rdev->flags & RADEON_IS_PCIE))
9354 		return;
9355 
9356 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9357 	data &= ~LC_XMIT_N_FTS_MASK;
9358 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9359 	if (orig != data)
9360 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9361 
9362 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9363 	data |= LC_GO_TO_RECOVERY;
9364 	if (orig != data)
9365 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9366 
9367 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9368 	data |= P_IGNORE_EDB_ERR;
9369 	if (orig != data)
9370 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9371 
9372 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9373 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9374 	data |= LC_PMI_TO_L1_DIS;
9375 	if (!disable_l0s)
9376 		data |= LC_L0S_INACTIVITY(7);
9377 
9378 	if (!disable_l1) {
9379 		data |= LC_L1_INACTIVITY(7);
9380 		data &= ~LC_PMI_TO_L1_DIS;
9381 		if (orig != data)
9382 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9383 
9384 		if (!disable_plloff_in_l1) {
9385 			bool clk_req_support;
9386 
9387 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9388 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9389 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9390 			if (orig != data)
9391 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9392 
9393 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9394 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9395 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9396 			if (orig != data)
9397 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9398 
9399 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9400 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9401 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9402 			if (orig != data)
9403 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9404 
9405 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9406 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9407 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9408 			if (orig != data)
9409 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9410 
9411 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9412 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9413 			data |= LC_DYN_LANES_PWR_STATE(3);
9414 			if (orig != data)
9415 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9416 
9417 			if (!disable_clkreq) {
9418 #ifdef zMN_TODO
9419 				struct pci_dev *root = rdev->pdev->bus->self;
9420 				u32 lnkcap;
9421 
9422 				clk_req_support = false;
9423 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9424 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9425 					clk_req_support = true;
9426 #else
9427 				clk_req_support = false;
9428 #endif
9429 			} else {
9430 				clk_req_support = false;
9431 			}
9432 
9433 			if (clk_req_support) {
9434 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9435 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9436 				if (orig != data)
9437 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9438 
9439 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9440 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9441 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9442 				if (orig != data)
9443 					WREG32_SMC(THM_CLK_CNTL, data);
9444 
9445 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9446 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9447 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9448 				if (orig != data)
9449 					WREG32_SMC(MISC_CLK_CTRL, data);
9450 
9451 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9452 				data &= ~BCLK_AS_XCLK;
9453 				if (orig != data)
9454 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9455 
9456 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9457 				data &= ~FORCE_BIF_REFCLK_EN;
9458 				if (orig != data)
9459 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9460 
9461 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9462 				data &= ~MPLL_CLKOUT_SEL_MASK;
9463 				data |= MPLL_CLKOUT_SEL(4);
9464 				if (orig != data)
9465 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9466 			}
9467 		}
9468 	} else {
9469 		if (orig != data)
9470 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9471 	}
9472 
9473 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9474 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9475 	if (orig != data)
9476 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9477 
9478 	if (!disable_l0s) {
9479 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9480 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9481 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9482 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9483 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9484 				data &= ~LC_L0S_INACTIVITY_MASK;
9485 				if (orig != data)
9486 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9487 			}
9488 		}
9489 	}
9490 }
9491