xref: /dragonfly/sys/dev/drm/radeon/cik.c (revision e4710cad)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include "drmP.h"
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "radeon_audio.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 #include "radeon_kfd.h"
36 
37 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
48 MODULE_FIRMWARE("radeon/bonaire_me.bin");
49 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
54 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
55 
56 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
65 
66 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
67 MODULE_FIRMWARE("radeon/hawaii_me.bin");
68 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
73 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
74 
75 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
81 
82 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
83 MODULE_FIRMWARE("radeon/kaveri_me.bin");
84 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
87 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
88 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
89 
90 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
91 MODULE_FIRMWARE("radeon/KABINI_me.bin");
92 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
93 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
94 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
95 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
96 
97 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
98 MODULE_FIRMWARE("radeon/kabini_me.bin");
99 MODULE_FIRMWARE("radeon/kabini_ce.bin");
100 MODULE_FIRMWARE("radeon/kabini_mec.bin");
101 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
102 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
103 
104 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
110 
111 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
112 MODULE_FIRMWARE("radeon/mullins_me.bin");
113 MODULE_FIRMWARE("radeon/mullins_ce.bin");
114 MODULE_FIRMWARE("radeon/mullins_mec.bin");
115 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
116 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
117 
118 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
119 static void cik_rlc_stop(struct radeon_device *rdev);
120 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
121 static void cik_program_aspm(struct radeon_device *rdev);
122 static void cik_init_pg(struct radeon_device *rdev);
123 static void cik_init_cg(struct radeon_device *rdev);
124 static void cik_fini_pg(struct radeon_device *rdev);
125 static void cik_fini_cg(struct radeon_device *rdev);
126 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
127 					  bool enable);
128 
129 /**
130  * cik_get_allowed_info_register - fetch the register for the info ioctl
131  *
132  * @rdev: radeon_device pointer
133  * @reg: register offset in bytes
134  * @val: register value
135  *
136  * Returns 0 for success or -EINVAL for an invalid register
137  *
138  */
139 int cik_get_allowed_info_register(struct radeon_device *rdev,
140 				  u32 reg, u32 *val)
141 {
142 	switch (reg) {
143 	case GRBM_STATUS:
144 	case GRBM_STATUS2:
145 	case GRBM_STATUS_SE0:
146 	case GRBM_STATUS_SE1:
147 	case GRBM_STATUS_SE2:
148 	case GRBM_STATUS_SE3:
149 	case SRBM_STATUS:
150 	case SRBM_STATUS2:
151 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
152 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
153 	case UVD_STATUS:
154 	/* TODO VCE */
155 		*val = RREG32(reg);
156 		return 0;
157 	default:
158 		return -EINVAL;
159 	}
160 }
161 
162 /*
163  * Indirect registers accessor
164  */
165 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
166 {
167 	u32 r;
168 
169 	spin_lock(&rdev->didt_idx_lock);
170 	WREG32(CIK_DIDT_IND_INDEX, (reg));
171 	r = RREG32(CIK_DIDT_IND_DATA);
172 	spin_unlock(&rdev->didt_idx_lock);
173 	return r;
174 }
175 
176 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
177 {
178 	spin_lock(&rdev->didt_idx_lock);
179 	WREG32(CIK_DIDT_IND_INDEX, (reg));
180 	WREG32(CIK_DIDT_IND_DATA, (v));
181 	spin_unlock(&rdev->didt_idx_lock);
182 }
183 
184 /* get temperature in millidegrees */
185 int ci_get_temp(struct radeon_device *rdev)
186 {
187 	u32 temp;
188 	int actual_temp = 0;
189 
190 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
191 		CTF_TEMP_SHIFT;
192 
193 	if (temp & 0x200)
194 		actual_temp = 255;
195 	else
196 		actual_temp = temp & 0x1ff;
197 
198 	actual_temp = actual_temp * 1000;
199 
200 	return actual_temp;
201 }
202 
203 /* get temperature in millidegrees */
204 int kv_get_temp(struct radeon_device *rdev)
205 {
206 	u32 temp;
207 	int actual_temp = 0;
208 
209 	temp = RREG32_SMC(0xC0300E0C);
210 
211 	if (temp)
212 		actual_temp = (temp / 8) - 49;
213 	else
214 		actual_temp = 0;
215 
216 	actual_temp = actual_temp * 1000;
217 
218 	return actual_temp;
219 }
220 
221 /*
222  * Indirect registers accessor
223  */
224 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
225 {
226 	u32 r;
227 
228 	spin_lock(&rdev->pciep_idx_lock);
229 	WREG32(PCIE_INDEX, reg);
230 	(void)RREG32(PCIE_INDEX);
231 	r = RREG32(PCIE_DATA);
232 	spin_unlock(&rdev->pciep_idx_lock);
233 	return r;
234 }
235 
236 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
237 {
238 	spin_lock(&rdev->pciep_idx_lock);
239 	WREG32(PCIE_INDEX, reg);
240 	(void)RREG32(PCIE_INDEX);
241 	WREG32(PCIE_DATA, v);
242 	(void)RREG32(PCIE_DATA);
243 	spin_unlock(&rdev->pciep_idx_lock);
244 }
245 
246 static const u32 spectre_rlc_save_restore_register_list[] =
247 {
248 	(0x0e00 << 16) | (0xc12c >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc140 >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc150 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc15c >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc168 >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc170 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc178 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc204 >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc2b4 >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc2b8 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc2bc >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc2c0 >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0x8228 >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0x829c >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0x869c >> 2),
277 	0x00000000,
278 	(0x0600 << 16) | (0x98f4 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0x98f8 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0x9900 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc260 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0x90e8 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0x3c000 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0x3c00c >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0x8c1c >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0x9700 >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xcd20 >> 2),
297 	0x00000000,
298 	(0x4e00 << 16) | (0xcd20 >> 2),
299 	0x00000000,
300 	(0x5e00 << 16) | (0xcd20 >> 2),
301 	0x00000000,
302 	(0x6e00 << 16) | (0xcd20 >> 2),
303 	0x00000000,
304 	(0x7e00 << 16) | (0xcd20 >> 2),
305 	0x00000000,
306 	(0x8e00 << 16) | (0xcd20 >> 2),
307 	0x00000000,
308 	(0x9e00 << 16) | (0xcd20 >> 2),
309 	0x00000000,
310 	(0xae00 << 16) | (0xcd20 >> 2),
311 	0x00000000,
312 	(0xbe00 << 16) | (0xcd20 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0x89bc >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0x8900 >> 2),
317 	0x00000000,
318 	0x3,
319 	(0x0e00 << 16) | (0xc130 >> 2),
320 	0x00000000,
321 	(0x0e00 << 16) | (0xc134 >> 2),
322 	0x00000000,
323 	(0x0e00 << 16) | (0xc1fc >> 2),
324 	0x00000000,
325 	(0x0e00 << 16) | (0xc208 >> 2),
326 	0x00000000,
327 	(0x0e00 << 16) | (0xc264 >> 2),
328 	0x00000000,
329 	(0x0e00 << 16) | (0xc268 >> 2),
330 	0x00000000,
331 	(0x0e00 << 16) | (0xc26c >> 2),
332 	0x00000000,
333 	(0x0e00 << 16) | (0xc270 >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0xc274 >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0xc278 >> 2),
338 	0x00000000,
339 	(0x0e00 << 16) | (0xc27c >> 2),
340 	0x00000000,
341 	(0x0e00 << 16) | (0xc280 >> 2),
342 	0x00000000,
343 	(0x0e00 << 16) | (0xc284 >> 2),
344 	0x00000000,
345 	(0x0e00 << 16) | (0xc288 >> 2),
346 	0x00000000,
347 	(0x0e00 << 16) | (0xc28c >> 2),
348 	0x00000000,
349 	(0x0e00 << 16) | (0xc290 >> 2),
350 	0x00000000,
351 	(0x0e00 << 16) | (0xc294 >> 2),
352 	0x00000000,
353 	(0x0e00 << 16) | (0xc298 >> 2),
354 	0x00000000,
355 	(0x0e00 << 16) | (0xc29c >> 2),
356 	0x00000000,
357 	(0x0e00 << 16) | (0xc2a0 >> 2),
358 	0x00000000,
359 	(0x0e00 << 16) | (0xc2a4 >> 2),
360 	0x00000000,
361 	(0x0e00 << 16) | (0xc2a8 >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0xc2ac  >> 2),
364 	0x00000000,
365 	(0x0e00 << 16) | (0xc2b0 >> 2),
366 	0x00000000,
367 	(0x0e00 << 16) | (0x301d0 >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0x30238 >> 2),
370 	0x00000000,
371 	(0x0e00 << 16) | (0x30250 >> 2),
372 	0x00000000,
373 	(0x0e00 << 16) | (0x30254 >> 2),
374 	0x00000000,
375 	(0x0e00 << 16) | (0x30258 >> 2),
376 	0x00000000,
377 	(0x0e00 << 16) | (0x3025c >> 2),
378 	0x00000000,
379 	(0x4e00 << 16) | (0xc900 >> 2),
380 	0x00000000,
381 	(0x5e00 << 16) | (0xc900 >> 2),
382 	0x00000000,
383 	(0x6e00 << 16) | (0xc900 >> 2),
384 	0x00000000,
385 	(0x7e00 << 16) | (0xc900 >> 2),
386 	0x00000000,
387 	(0x8e00 << 16) | (0xc900 >> 2),
388 	0x00000000,
389 	(0x9e00 << 16) | (0xc900 >> 2),
390 	0x00000000,
391 	(0xae00 << 16) | (0xc900 >> 2),
392 	0x00000000,
393 	(0xbe00 << 16) | (0xc900 >> 2),
394 	0x00000000,
395 	(0x4e00 << 16) | (0xc904 >> 2),
396 	0x00000000,
397 	(0x5e00 << 16) | (0xc904 >> 2),
398 	0x00000000,
399 	(0x6e00 << 16) | (0xc904 >> 2),
400 	0x00000000,
401 	(0x7e00 << 16) | (0xc904 >> 2),
402 	0x00000000,
403 	(0x8e00 << 16) | (0xc904 >> 2),
404 	0x00000000,
405 	(0x9e00 << 16) | (0xc904 >> 2),
406 	0x00000000,
407 	(0xae00 << 16) | (0xc904 >> 2),
408 	0x00000000,
409 	(0xbe00 << 16) | (0xc904 >> 2),
410 	0x00000000,
411 	(0x4e00 << 16) | (0xc908 >> 2),
412 	0x00000000,
413 	(0x5e00 << 16) | (0xc908 >> 2),
414 	0x00000000,
415 	(0x6e00 << 16) | (0xc908 >> 2),
416 	0x00000000,
417 	(0x7e00 << 16) | (0xc908 >> 2),
418 	0x00000000,
419 	(0x8e00 << 16) | (0xc908 >> 2),
420 	0x00000000,
421 	(0x9e00 << 16) | (0xc908 >> 2),
422 	0x00000000,
423 	(0xae00 << 16) | (0xc908 >> 2),
424 	0x00000000,
425 	(0xbe00 << 16) | (0xc908 >> 2),
426 	0x00000000,
427 	(0x4e00 << 16) | (0xc90c >> 2),
428 	0x00000000,
429 	(0x5e00 << 16) | (0xc90c >> 2),
430 	0x00000000,
431 	(0x6e00 << 16) | (0xc90c >> 2),
432 	0x00000000,
433 	(0x7e00 << 16) | (0xc90c >> 2),
434 	0x00000000,
435 	(0x8e00 << 16) | (0xc90c >> 2),
436 	0x00000000,
437 	(0x9e00 << 16) | (0xc90c >> 2),
438 	0x00000000,
439 	(0xae00 << 16) | (0xc90c >> 2),
440 	0x00000000,
441 	(0xbe00 << 16) | (0xc90c >> 2),
442 	0x00000000,
443 	(0x4e00 << 16) | (0xc910 >> 2),
444 	0x00000000,
445 	(0x5e00 << 16) | (0xc910 >> 2),
446 	0x00000000,
447 	(0x6e00 << 16) | (0xc910 >> 2),
448 	0x00000000,
449 	(0x7e00 << 16) | (0xc910 >> 2),
450 	0x00000000,
451 	(0x8e00 << 16) | (0xc910 >> 2),
452 	0x00000000,
453 	(0x9e00 << 16) | (0xc910 >> 2),
454 	0x00000000,
455 	(0xae00 << 16) | (0xc910 >> 2),
456 	0x00000000,
457 	(0xbe00 << 16) | (0xc910 >> 2),
458 	0x00000000,
459 	(0x0e00 << 16) | (0xc99c >> 2),
460 	0x00000000,
461 	(0x0e00 << 16) | (0x9834 >> 2),
462 	0x00000000,
463 	(0x0000 << 16) | (0x30f00 >> 2),
464 	0x00000000,
465 	(0x0001 << 16) | (0x30f00 >> 2),
466 	0x00000000,
467 	(0x0000 << 16) | (0x30f04 >> 2),
468 	0x00000000,
469 	(0x0001 << 16) | (0x30f04 >> 2),
470 	0x00000000,
471 	(0x0000 << 16) | (0x30f08 >> 2),
472 	0x00000000,
473 	(0x0001 << 16) | (0x30f08 >> 2),
474 	0x00000000,
475 	(0x0000 << 16) | (0x30f0c >> 2),
476 	0x00000000,
477 	(0x0001 << 16) | (0x30f0c >> 2),
478 	0x00000000,
479 	(0x0600 << 16) | (0x9b7c >> 2),
480 	0x00000000,
481 	(0x0e00 << 16) | (0x8a14 >> 2),
482 	0x00000000,
483 	(0x0e00 << 16) | (0x8a18 >> 2),
484 	0x00000000,
485 	(0x0600 << 16) | (0x30a00 >> 2),
486 	0x00000000,
487 	(0x0e00 << 16) | (0x8bf0 >> 2),
488 	0x00000000,
489 	(0x0e00 << 16) | (0x8bcc >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0x8b24 >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0x30a04 >> 2),
494 	0x00000000,
495 	(0x0600 << 16) | (0x30a10 >> 2),
496 	0x00000000,
497 	(0x0600 << 16) | (0x30a14 >> 2),
498 	0x00000000,
499 	(0x0600 << 16) | (0x30a18 >> 2),
500 	0x00000000,
501 	(0x0600 << 16) | (0x30a2c >> 2),
502 	0x00000000,
503 	(0x0e00 << 16) | (0xc700 >> 2),
504 	0x00000000,
505 	(0x0e00 << 16) | (0xc704 >> 2),
506 	0x00000000,
507 	(0x0e00 << 16) | (0xc708 >> 2),
508 	0x00000000,
509 	(0x0e00 << 16) | (0xc768 >> 2),
510 	0x00000000,
511 	(0x0400 << 16) | (0xc770 >> 2),
512 	0x00000000,
513 	(0x0400 << 16) | (0xc774 >> 2),
514 	0x00000000,
515 	(0x0400 << 16) | (0xc778 >> 2),
516 	0x00000000,
517 	(0x0400 << 16) | (0xc77c >> 2),
518 	0x00000000,
519 	(0x0400 << 16) | (0xc780 >> 2),
520 	0x00000000,
521 	(0x0400 << 16) | (0xc784 >> 2),
522 	0x00000000,
523 	(0x0400 << 16) | (0xc788 >> 2),
524 	0x00000000,
525 	(0x0400 << 16) | (0xc78c >> 2),
526 	0x00000000,
527 	(0x0400 << 16) | (0xc798 >> 2),
528 	0x00000000,
529 	(0x0400 << 16) | (0xc79c >> 2),
530 	0x00000000,
531 	(0x0400 << 16) | (0xc7a0 >> 2),
532 	0x00000000,
533 	(0x0400 << 16) | (0xc7a4 >> 2),
534 	0x00000000,
535 	(0x0400 << 16) | (0xc7a8 >> 2),
536 	0x00000000,
537 	(0x0400 << 16) | (0xc7ac >> 2),
538 	0x00000000,
539 	(0x0400 << 16) | (0xc7b0 >> 2),
540 	0x00000000,
541 	(0x0400 << 16) | (0xc7b4 >> 2),
542 	0x00000000,
543 	(0x0e00 << 16) | (0x9100 >> 2),
544 	0x00000000,
545 	(0x0e00 << 16) | (0x3c010 >> 2),
546 	0x00000000,
547 	(0x0e00 << 16) | (0x92a8 >> 2),
548 	0x00000000,
549 	(0x0e00 << 16) | (0x92ac >> 2),
550 	0x00000000,
551 	(0x0e00 << 16) | (0x92b4 >> 2),
552 	0x00000000,
553 	(0x0e00 << 16) | (0x92b8 >> 2),
554 	0x00000000,
555 	(0x0e00 << 16) | (0x92bc >> 2),
556 	0x00000000,
557 	(0x0e00 << 16) | (0x92c0 >> 2),
558 	0x00000000,
559 	(0x0e00 << 16) | (0x92c4 >> 2),
560 	0x00000000,
561 	(0x0e00 << 16) | (0x92c8 >> 2),
562 	0x00000000,
563 	(0x0e00 << 16) | (0x92cc >> 2),
564 	0x00000000,
565 	(0x0e00 << 16) | (0x92d0 >> 2),
566 	0x00000000,
567 	(0x0e00 << 16) | (0x8c00 >> 2),
568 	0x00000000,
569 	(0x0e00 << 16) | (0x8c04 >> 2),
570 	0x00000000,
571 	(0x0e00 << 16) | (0x8c20 >> 2),
572 	0x00000000,
573 	(0x0e00 << 16) | (0x8c38 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x8c3c >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0xae00 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x9604 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0xac08 >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0xac0c >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0xac10 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0xac14 >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0xac58 >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0xac68 >> 2),
592 	0x00000000,
593 	(0x0e00 << 16) | (0xac6c >> 2),
594 	0x00000000,
595 	(0x0e00 << 16) | (0xac70 >> 2),
596 	0x00000000,
597 	(0x0e00 << 16) | (0xac74 >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0xac78 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0xac7c >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0xac80 >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0xac84 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0xac88 >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xac8c >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0x970c >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0x9714 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0x9718 >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0x971c >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0x31068 >> 2),
620 	0x00000000,
621 	(0x4e00 << 16) | (0x31068 >> 2),
622 	0x00000000,
623 	(0x5e00 << 16) | (0x31068 >> 2),
624 	0x00000000,
625 	(0x6e00 << 16) | (0x31068 >> 2),
626 	0x00000000,
627 	(0x7e00 << 16) | (0x31068 >> 2),
628 	0x00000000,
629 	(0x8e00 << 16) | (0x31068 >> 2),
630 	0x00000000,
631 	(0x9e00 << 16) | (0x31068 >> 2),
632 	0x00000000,
633 	(0xae00 << 16) | (0x31068 >> 2),
634 	0x00000000,
635 	(0xbe00 << 16) | (0x31068 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0xcd10 >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0xcd14 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0x88b0 >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x88b4 >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0x88b8 >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0x88bc >> 2),
648 	0x00000000,
649 	(0x0400 << 16) | (0x89c0 >> 2),
650 	0x00000000,
651 	(0x0e00 << 16) | (0x88c4 >> 2),
652 	0x00000000,
653 	(0x0e00 << 16) | (0x88c8 >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0x88d0 >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0x88d4 >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0x88d8 >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0x8980 >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0x30938 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0x3093c >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0x30940 >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0x89a0 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0x30900 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0x30904 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0x89b4 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0x3c210 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x3c214 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0x3c218 >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0x8904 >> 2),
684 	0x00000000,
685 	0x5,
686 	(0x0e00 << 16) | (0x8c28 >> 2),
687 	(0x0e00 << 16) | (0x8c2c >> 2),
688 	(0x0e00 << 16) | (0x8c30 >> 2),
689 	(0x0e00 << 16) | (0x8c34 >> 2),
690 	(0x0e00 << 16) | (0x9600 >> 2),
691 };
692 
693 static const u32 kalindi_rlc_save_restore_register_list[] =
694 {
695 	(0x0e00 << 16) | (0xc12c >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0xc140 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0xc150 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0xc15c >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0xc168 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0xc170 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0xc204 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0xc2b4 >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0xc2b8 >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0xc2bc >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0xc2c0 >> 2),
716 	0x00000000,
717 	(0x0e00 << 16) | (0x8228 >> 2),
718 	0x00000000,
719 	(0x0e00 << 16) | (0x829c >> 2),
720 	0x00000000,
721 	(0x0e00 << 16) | (0x869c >> 2),
722 	0x00000000,
723 	(0x0600 << 16) | (0x98f4 >> 2),
724 	0x00000000,
725 	(0x0e00 << 16) | (0x98f8 >> 2),
726 	0x00000000,
727 	(0x0e00 << 16) | (0x9900 >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc260 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0x90e8 >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0x3c000 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0x3c00c >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0x8c1c >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0x9700 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xcd20 >> 2),
742 	0x00000000,
743 	(0x4e00 << 16) | (0xcd20 >> 2),
744 	0x00000000,
745 	(0x5e00 << 16) | (0xcd20 >> 2),
746 	0x00000000,
747 	(0x6e00 << 16) | (0xcd20 >> 2),
748 	0x00000000,
749 	(0x7e00 << 16) | (0xcd20 >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x89bc >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x8900 >> 2),
754 	0x00000000,
755 	0x3,
756 	(0x0e00 << 16) | (0xc130 >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0xc134 >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0xc1fc >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0xc208 >> 2),
763 	0x00000000,
764 	(0x0e00 << 16) | (0xc264 >> 2),
765 	0x00000000,
766 	(0x0e00 << 16) | (0xc268 >> 2),
767 	0x00000000,
768 	(0x0e00 << 16) | (0xc26c >> 2),
769 	0x00000000,
770 	(0x0e00 << 16) | (0xc270 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0xc274 >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0xc28c >> 2),
775 	0x00000000,
776 	(0x0e00 << 16) | (0xc290 >> 2),
777 	0x00000000,
778 	(0x0e00 << 16) | (0xc294 >> 2),
779 	0x00000000,
780 	(0x0e00 << 16) | (0xc298 >> 2),
781 	0x00000000,
782 	(0x0e00 << 16) | (0xc2a0 >> 2),
783 	0x00000000,
784 	(0x0e00 << 16) | (0xc2a4 >> 2),
785 	0x00000000,
786 	(0x0e00 << 16) | (0xc2a8 >> 2),
787 	0x00000000,
788 	(0x0e00 << 16) | (0xc2ac >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0x301d0 >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0x30238 >> 2),
793 	0x00000000,
794 	(0x0e00 << 16) | (0x30250 >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0x30254 >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0x30258 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0x3025c >> 2),
801 	0x00000000,
802 	(0x4e00 << 16) | (0xc900 >> 2),
803 	0x00000000,
804 	(0x5e00 << 16) | (0xc900 >> 2),
805 	0x00000000,
806 	(0x6e00 << 16) | (0xc900 >> 2),
807 	0x00000000,
808 	(0x7e00 << 16) | (0xc900 >> 2),
809 	0x00000000,
810 	(0x4e00 << 16) | (0xc904 >> 2),
811 	0x00000000,
812 	(0x5e00 << 16) | (0xc904 >> 2),
813 	0x00000000,
814 	(0x6e00 << 16) | (0xc904 >> 2),
815 	0x00000000,
816 	(0x7e00 << 16) | (0xc904 >> 2),
817 	0x00000000,
818 	(0x4e00 << 16) | (0xc908 >> 2),
819 	0x00000000,
820 	(0x5e00 << 16) | (0xc908 >> 2),
821 	0x00000000,
822 	(0x6e00 << 16) | (0xc908 >> 2),
823 	0x00000000,
824 	(0x7e00 << 16) | (0xc908 >> 2),
825 	0x00000000,
826 	(0x4e00 << 16) | (0xc90c >> 2),
827 	0x00000000,
828 	(0x5e00 << 16) | (0xc90c >> 2),
829 	0x00000000,
830 	(0x6e00 << 16) | (0xc90c >> 2),
831 	0x00000000,
832 	(0x7e00 << 16) | (0xc90c >> 2),
833 	0x00000000,
834 	(0x4e00 << 16) | (0xc910 >> 2),
835 	0x00000000,
836 	(0x5e00 << 16) | (0xc910 >> 2),
837 	0x00000000,
838 	(0x6e00 << 16) | (0xc910 >> 2),
839 	0x00000000,
840 	(0x7e00 << 16) | (0xc910 >> 2),
841 	0x00000000,
842 	(0x0e00 << 16) | (0xc99c >> 2),
843 	0x00000000,
844 	(0x0e00 << 16) | (0x9834 >> 2),
845 	0x00000000,
846 	(0x0000 << 16) | (0x30f00 >> 2),
847 	0x00000000,
848 	(0x0000 << 16) | (0x30f04 >> 2),
849 	0x00000000,
850 	(0x0000 << 16) | (0x30f08 >> 2),
851 	0x00000000,
852 	(0x0000 << 16) | (0x30f0c >> 2),
853 	0x00000000,
854 	(0x0600 << 16) | (0x9b7c >> 2),
855 	0x00000000,
856 	(0x0e00 << 16) | (0x8a14 >> 2),
857 	0x00000000,
858 	(0x0e00 << 16) | (0x8a18 >> 2),
859 	0x00000000,
860 	(0x0600 << 16) | (0x30a00 >> 2),
861 	0x00000000,
862 	(0x0e00 << 16) | (0x8bf0 >> 2),
863 	0x00000000,
864 	(0x0e00 << 16) | (0x8bcc >> 2),
865 	0x00000000,
866 	(0x0e00 << 16) | (0x8b24 >> 2),
867 	0x00000000,
868 	(0x0e00 << 16) | (0x30a04 >> 2),
869 	0x00000000,
870 	(0x0600 << 16) | (0x30a10 >> 2),
871 	0x00000000,
872 	(0x0600 << 16) | (0x30a14 >> 2),
873 	0x00000000,
874 	(0x0600 << 16) | (0x30a18 >> 2),
875 	0x00000000,
876 	(0x0600 << 16) | (0x30a2c >> 2),
877 	0x00000000,
878 	(0x0e00 << 16) | (0xc700 >> 2),
879 	0x00000000,
880 	(0x0e00 << 16) | (0xc704 >> 2),
881 	0x00000000,
882 	(0x0e00 << 16) | (0xc708 >> 2),
883 	0x00000000,
884 	(0x0e00 << 16) | (0xc768 >> 2),
885 	0x00000000,
886 	(0x0400 << 16) | (0xc770 >> 2),
887 	0x00000000,
888 	(0x0400 << 16) | (0xc774 >> 2),
889 	0x00000000,
890 	(0x0400 << 16) | (0xc798 >> 2),
891 	0x00000000,
892 	(0x0400 << 16) | (0xc79c >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x9100 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x3c010 >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x8c00 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x8c04 >> 2),
901 	0x00000000,
902 	(0x0e00 << 16) | (0x8c20 >> 2),
903 	0x00000000,
904 	(0x0e00 << 16) | (0x8c38 >> 2),
905 	0x00000000,
906 	(0x0e00 << 16) | (0x8c3c >> 2),
907 	0x00000000,
908 	(0x0e00 << 16) | (0xae00 >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0x9604 >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0xac08 >> 2),
913 	0x00000000,
914 	(0x0e00 << 16) | (0xac0c >> 2),
915 	0x00000000,
916 	(0x0e00 << 16) | (0xac10 >> 2),
917 	0x00000000,
918 	(0x0e00 << 16) | (0xac14 >> 2),
919 	0x00000000,
920 	(0x0e00 << 16) | (0xac58 >> 2),
921 	0x00000000,
922 	(0x0e00 << 16) | (0xac68 >> 2),
923 	0x00000000,
924 	(0x0e00 << 16) | (0xac6c >> 2),
925 	0x00000000,
926 	(0x0e00 << 16) | (0xac70 >> 2),
927 	0x00000000,
928 	(0x0e00 << 16) | (0xac74 >> 2),
929 	0x00000000,
930 	(0x0e00 << 16) | (0xac78 >> 2),
931 	0x00000000,
932 	(0x0e00 << 16) | (0xac7c >> 2),
933 	0x00000000,
934 	(0x0e00 << 16) | (0xac80 >> 2),
935 	0x00000000,
936 	(0x0e00 << 16) | (0xac84 >> 2),
937 	0x00000000,
938 	(0x0e00 << 16) | (0xac88 >> 2),
939 	0x00000000,
940 	(0x0e00 << 16) | (0xac8c >> 2),
941 	0x00000000,
942 	(0x0e00 << 16) | (0x970c >> 2),
943 	0x00000000,
944 	(0x0e00 << 16) | (0x9714 >> 2),
945 	0x00000000,
946 	(0x0e00 << 16) | (0x9718 >> 2),
947 	0x00000000,
948 	(0x0e00 << 16) | (0x971c >> 2),
949 	0x00000000,
950 	(0x0e00 << 16) | (0x31068 >> 2),
951 	0x00000000,
952 	(0x4e00 << 16) | (0x31068 >> 2),
953 	0x00000000,
954 	(0x5e00 << 16) | (0x31068 >> 2),
955 	0x00000000,
956 	(0x6e00 << 16) | (0x31068 >> 2),
957 	0x00000000,
958 	(0x7e00 << 16) | (0x31068 >> 2),
959 	0x00000000,
960 	(0x0e00 << 16) | (0xcd10 >> 2),
961 	0x00000000,
962 	(0x0e00 << 16) | (0xcd14 >> 2),
963 	0x00000000,
964 	(0x0e00 << 16) | (0x88b0 >> 2),
965 	0x00000000,
966 	(0x0e00 << 16) | (0x88b4 >> 2),
967 	0x00000000,
968 	(0x0e00 << 16) | (0x88b8 >> 2),
969 	0x00000000,
970 	(0x0e00 << 16) | (0x88bc >> 2),
971 	0x00000000,
972 	(0x0400 << 16) | (0x89c0 >> 2),
973 	0x00000000,
974 	(0x0e00 << 16) | (0x88c4 >> 2),
975 	0x00000000,
976 	(0x0e00 << 16) | (0x88c8 >> 2),
977 	0x00000000,
978 	(0x0e00 << 16) | (0x88d0 >> 2),
979 	0x00000000,
980 	(0x0e00 << 16) | (0x88d4 >> 2),
981 	0x00000000,
982 	(0x0e00 << 16) | (0x88d8 >> 2),
983 	0x00000000,
984 	(0x0e00 << 16) | (0x8980 >> 2),
985 	0x00000000,
986 	(0x0e00 << 16) | (0x30938 >> 2),
987 	0x00000000,
988 	(0x0e00 << 16) | (0x3093c >> 2),
989 	0x00000000,
990 	(0x0e00 << 16) | (0x30940 >> 2),
991 	0x00000000,
992 	(0x0e00 << 16) | (0x89a0 >> 2),
993 	0x00000000,
994 	(0x0e00 << 16) | (0x30900 >> 2),
995 	0x00000000,
996 	(0x0e00 << 16) | (0x30904 >> 2),
997 	0x00000000,
998 	(0x0e00 << 16) | (0x89b4 >> 2),
999 	0x00000000,
1000 	(0x0e00 << 16) | (0x3e1fc >> 2),
1001 	0x00000000,
1002 	(0x0e00 << 16) | (0x3c210 >> 2),
1003 	0x00000000,
1004 	(0x0e00 << 16) | (0x3c214 >> 2),
1005 	0x00000000,
1006 	(0x0e00 << 16) | (0x3c218 >> 2),
1007 	0x00000000,
1008 	(0x0e00 << 16) | (0x8904 >> 2),
1009 	0x00000000,
1010 	0x5,
1011 	(0x0e00 << 16) | (0x8c28 >> 2),
1012 	(0x0e00 << 16) | (0x8c2c >> 2),
1013 	(0x0e00 << 16) | (0x8c30 >> 2),
1014 	(0x0e00 << 16) | (0x8c34 >> 2),
1015 	(0x0e00 << 16) | (0x9600 >> 2),
1016 };
1017 
1018 static const u32 bonaire_golden_spm_registers[] =
1019 {
1020 	0x30800, 0xe0ffffff, 0xe0000000
1021 };
1022 
1023 static const u32 bonaire_golden_common_registers[] =
1024 {
1025 	0xc770, 0xffffffff, 0x00000800,
1026 	0xc774, 0xffffffff, 0x00000800,
1027 	0xc798, 0xffffffff, 0x00007fbf,
1028 	0xc79c, 0xffffffff, 0x00007faf
1029 };
1030 
1031 static const u32 bonaire_golden_registers[] =
1032 {
1033 	0x3354, 0x00000333, 0x00000333,
1034 	0x3350, 0x000c0fc0, 0x00040200,
1035 	0x9a10, 0x00010000, 0x00058208,
1036 	0x3c000, 0xffff1fff, 0x00140000,
1037 	0x3c200, 0xfdfc0fff, 0x00000100,
1038 	0x3c234, 0x40000000, 0x40000200,
1039 	0x9830, 0xffffffff, 0x00000000,
1040 	0x9834, 0xf00fffff, 0x00000400,
1041 	0x9838, 0x0002021c, 0x00020200,
1042 	0xc78, 0x00000080, 0x00000000,
1043 	0x5bb0, 0x000000f0, 0x00000070,
1044 	0x5bc0, 0xf0311fff, 0x80300000,
1045 	0x98f8, 0x73773777, 0x12010001,
1046 	0x350c, 0x00810000, 0x408af000,
1047 	0x7030, 0x31000111, 0x00000011,
1048 	0x2f48, 0x73773777, 0x12010001,
1049 	0x220c, 0x00007fb6, 0x0021a1b1,
1050 	0x2210, 0x00007fb6, 0x002021b1,
1051 	0x2180, 0x00007fb6, 0x00002191,
1052 	0x2218, 0x00007fb6, 0x002121b1,
1053 	0x221c, 0x00007fb6, 0x002021b1,
1054 	0x21dc, 0x00007fb6, 0x00002191,
1055 	0x21e0, 0x00007fb6, 0x00002191,
1056 	0x3628, 0x0000003f, 0x0000000a,
1057 	0x362c, 0x0000003f, 0x0000000a,
1058 	0x2ae4, 0x00073ffe, 0x000022a2,
1059 	0x240c, 0x000007ff, 0x00000000,
1060 	0x8a14, 0xf000003f, 0x00000007,
1061 	0x8bf0, 0x00002001, 0x00000001,
1062 	0x8b24, 0xffffffff, 0x00ffffff,
1063 	0x30a04, 0x0000ff0f, 0x00000000,
1064 	0x28a4c, 0x07ffffff, 0x06000000,
1065 	0x4d8, 0x00000fff, 0x00000100,
1066 	0x3e78, 0x00000001, 0x00000002,
1067 	0x9100, 0x03000000, 0x0362c688,
1068 	0x8c00, 0x000000ff, 0x00000001,
1069 	0xe40, 0x00001fff, 0x00001fff,
1070 	0x9060, 0x0000007f, 0x00000020,
1071 	0x9508, 0x00010000, 0x00010000,
1072 	0xac14, 0x000003ff, 0x000000f3,
1073 	0xac0c, 0xffffffff, 0x00001032
1074 };
1075 
1076 static const u32 bonaire_mgcg_cgcg_init[] =
1077 {
1078 	0xc420, 0xffffffff, 0xfffffffc,
1079 	0x30800, 0xffffffff, 0xe0000000,
1080 	0x3c2a0, 0xffffffff, 0x00000100,
1081 	0x3c208, 0xffffffff, 0x00000100,
1082 	0x3c2c0, 0xffffffff, 0xc0000100,
1083 	0x3c2c8, 0xffffffff, 0xc0000100,
1084 	0x3c2c4, 0xffffffff, 0xc0000100,
1085 	0x55e4, 0xffffffff, 0x00600100,
1086 	0x3c280, 0xffffffff, 0x00000100,
1087 	0x3c214, 0xffffffff, 0x06000100,
1088 	0x3c220, 0xffffffff, 0x00000100,
1089 	0x3c218, 0xffffffff, 0x06000100,
1090 	0x3c204, 0xffffffff, 0x00000100,
1091 	0x3c2e0, 0xffffffff, 0x00000100,
1092 	0x3c224, 0xffffffff, 0x00000100,
1093 	0x3c200, 0xffffffff, 0x00000100,
1094 	0x3c230, 0xffffffff, 0x00000100,
1095 	0x3c234, 0xffffffff, 0x00000100,
1096 	0x3c250, 0xffffffff, 0x00000100,
1097 	0x3c254, 0xffffffff, 0x00000100,
1098 	0x3c258, 0xffffffff, 0x00000100,
1099 	0x3c25c, 0xffffffff, 0x00000100,
1100 	0x3c260, 0xffffffff, 0x00000100,
1101 	0x3c27c, 0xffffffff, 0x00000100,
1102 	0x3c278, 0xffffffff, 0x00000100,
1103 	0x3c210, 0xffffffff, 0x06000100,
1104 	0x3c290, 0xffffffff, 0x00000100,
1105 	0x3c274, 0xffffffff, 0x00000100,
1106 	0x3c2b4, 0xffffffff, 0x00000100,
1107 	0x3c2b0, 0xffffffff, 0x00000100,
1108 	0x3c270, 0xffffffff, 0x00000100,
1109 	0x30800, 0xffffffff, 0xe0000000,
1110 	0x3c020, 0xffffffff, 0x00010000,
1111 	0x3c024, 0xffffffff, 0x00030002,
1112 	0x3c028, 0xffffffff, 0x00040007,
1113 	0x3c02c, 0xffffffff, 0x00060005,
1114 	0x3c030, 0xffffffff, 0x00090008,
1115 	0x3c034, 0xffffffff, 0x00010000,
1116 	0x3c038, 0xffffffff, 0x00030002,
1117 	0x3c03c, 0xffffffff, 0x00040007,
1118 	0x3c040, 0xffffffff, 0x00060005,
1119 	0x3c044, 0xffffffff, 0x00090008,
1120 	0x3c048, 0xffffffff, 0x00010000,
1121 	0x3c04c, 0xffffffff, 0x00030002,
1122 	0x3c050, 0xffffffff, 0x00040007,
1123 	0x3c054, 0xffffffff, 0x00060005,
1124 	0x3c058, 0xffffffff, 0x00090008,
1125 	0x3c05c, 0xffffffff, 0x00010000,
1126 	0x3c060, 0xffffffff, 0x00030002,
1127 	0x3c064, 0xffffffff, 0x00040007,
1128 	0x3c068, 0xffffffff, 0x00060005,
1129 	0x3c06c, 0xffffffff, 0x00090008,
1130 	0x3c070, 0xffffffff, 0x00010000,
1131 	0x3c074, 0xffffffff, 0x00030002,
1132 	0x3c078, 0xffffffff, 0x00040007,
1133 	0x3c07c, 0xffffffff, 0x00060005,
1134 	0x3c080, 0xffffffff, 0x00090008,
1135 	0x3c084, 0xffffffff, 0x00010000,
1136 	0x3c088, 0xffffffff, 0x00030002,
1137 	0x3c08c, 0xffffffff, 0x00040007,
1138 	0x3c090, 0xffffffff, 0x00060005,
1139 	0x3c094, 0xffffffff, 0x00090008,
1140 	0x3c098, 0xffffffff, 0x00010000,
1141 	0x3c09c, 0xffffffff, 0x00030002,
1142 	0x3c0a0, 0xffffffff, 0x00040007,
1143 	0x3c0a4, 0xffffffff, 0x00060005,
1144 	0x3c0a8, 0xffffffff, 0x00090008,
1145 	0x3c000, 0xffffffff, 0x96e00200,
1146 	0x8708, 0xffffffff, 0x00900100,
1147 	0xc424, 0xffffffff, 0x0020003f,
1148 	0x38, 0xffffffff, 0x0140001c,
1149 	0x3c, 0x000f0000, 0x000f0000,
1150 	0x220, 0xffffffff, 0xC060000C,
1151 	0x224, 0xc0000fff, 0x00000100,
1152 	0xf90, 0xffffffff, 0x00000100,
1153 	0xf98, 0x00000101, 0x00000000,
1154 	0x20a8, 0xffffffff, 0x00000104,
1155 	0x55e4, 0xff000fff, 0x00000100,
1156 	0x30cc, 0xc0000fff, 0x00000104,
1157 	0xc1e4, 0x00000001, 0x00000001,
1158 	0xd00c, 0xff000ff0, 0x00000100,
1159 	0xd80c, 0xff000ff0, 0x00000100
1160 };
1161 
1162 static const u32 spectre_golden_spm_registers[] =
1163 {
1164 	0x30800, 0xe0ffffff, 0xe0000000
1165 };
1166 
1167 static const u32 spectre_golden_common_registers[] =
1168 {
1169 	0xc770, 0xffffffff, 0x00000800,
1170 	0xc774, 0xffffffff, 0x00000800,
1171 	0xc798, 0xffffffff, 0x00007fbf,
1172 	0xc79c, 0xffffffff, 0x00007faf
1173 };
1174 
1175 static const u32 spectre_golden_registers[] =
1176 {
1177 	0x3c000, 0xffff1fff, 0x96940200,
1178 	0x3c00c, 0xffff0001, 0xff000000,
1179 	0x3c200, 0xfffc0fff, 0x00000100,
1180 	0x6ed8, 0x00010101, 0x00010000,
1181 	0x9834, 0xf00fffff, 0x00000400,
1182 	0x9838, 0xfffffffc, 0x00020200,
1183 	0x5bb0, 0x000000f0, 0x00000070,
1184 	0x5bc0, 0xf0311fff, 0x80300000,
1185 	0x98f8, 0x73773777, 0x12010001,
1186 	0x9b7c, 0x00ff0000, 0x00fc0000,
1187 	0x2f48, 0x73773777, 0x12010001,
1188 	0x8a14, 0xf000003f, 0x00000007,
1189 	0x8b24, 0xffffffff, 0x00ffffff,
1190 	0x28350, 0x3f3f3fff, 0x00000082,
1191 	0x28354, 0x0000003f, 0x00000000,
1192 	0x3e78, 0x00000001, 0x00000002,
1193 	0x913c, 0xffff03df, 0x00000004,
1194 	0xc768, 0x00000008, 0x00000008,
1195 	0x8c00, 0x000008ff, 0x00000800,
1196 	0x9508, 0x00010000, 0x00010000,
1197 	0xac0c, 0xffffffff, 0x54763210,
1198 	0x214f8, 0x01ff01ff, 0x00000002,
1199 	0x21498, 0x007ff800, 0x00200000,
1200 	0x2015c, 0xffffffff, 0x00000f40,
1201 	0x30934, 0xffffffff, 0x00000001
1202 };
1203 
1204 static const u32 spectre_mgcg_cgcg_init[] =
1205 {
1206 	0xc420, 0xffffffff, 0xfffffffc,
1207 	0x30800, 0xffffffff, 0xe0000000,
1208 	0x3c2a0, 0xffffffff, 0x00000100,
1209 	0x3c208, 0xffffffff, 0x00000100,
1210 	0x3c2c0, 0xffffffff, 0x00000100,
1211 	0x3c2c8, 0xffffffff, 0x00000100,
1212 	0x3c2c4, 0xffffffff, 0x00000100,
1213 	0x55e4, 0xffffffff, 0x00600100,
1214 	0x3c280, 0xffffffff, 0x00000100,
1215 	0x3c214, 0xffffffff, 0x06000100,
1216 	0x3c220, 0xffffffff, 0x00000100,
1217 	0x3c218, 0xffffffff, 0x06000100,
1218 	0x3c204, 0xffffffff, 0x00000100,
1219 	0x3c2e0, 0xffffffff, 0x00000100,
1220 	0x3c224, 0xffffffff, 0x00000100,
1221 	0x3c200, 0xffffffff, 0x00000100,
1222 	0x3c230, 0xffffffff, 0x00000100,
1223 	0x3c234, 0xffffffff, 0x00000100,
1224 	0x3c250, 0xffffffff, 0x00000100,
1225 	0x3c254, 0xffffffff, 0x00000100,
1226 	0x3c258, 0xffffffff, 0x00000100,
1227 	0x3c25c, 0xffffffff, 0x00000100,
1228 	0x3c260, 0xffffffff, 0x00000100,
1229 	0x3c27c, 0xffffffff, 0x00000100,
1230 	0x3c278, 0xffffffff, 0x00000100,
1231 	0x3c210, 0xffffffff, 0x06000100,
1232 	0x3c290, 0xffffffff, 0x00000100,
1233 	0x3c274, 0xffffffff, 0x00000100,
1234 	0x3c2b4, 0xffffffff, 0x00000100,
1235 	0x3c2b0, 0xffffffff, 0x00000100,
1236 	0x3c270, 0xffffffff, 0x00000100,
1237 	0x30800, 0xffffffff, 0xe0000000,
1238 	0x3c020, 0xffffffff, 0x00010000,
1239 	0x3c024, 0xffffffff, 0x00030002,
1240 	0x3c028, 0xffffffff, 0x00040007,
1241 	0x3c02c, 0xffffffff, 0x00060005,
1242 	0x3c030, 0xffffffff, 0x00090008,
1243 	0x3c034, 0xffffffff, 0x00010000,
1244 	0x3c038, 0xffffffff, 0x00030002,
1245 	0x3c03c, 0xffffffff, 0x00040007,
1246 	0x3c040, 0xffffffff, 0x00060005,
1247 	0x3c044, 0xffffffff, 0x00090008,
1248 	0x3c048, 0xffffffff, 0x00010000,
1249 	0x3c04c, 0xffffffff, 0x00030002,
1250 	0x3c050, 0xffffffff, 0x00040007,
1251 	0x3c054, 0xffffffff, 0x00060005,
1252 	0x3c058, 0xffffffff, 0x00090008,
1253 	0x3c05c, 0xffffffff, 0x00010000,
1254 	0x3c060, 0xffffffff, 0x00030002,
1255 	0x3c064, 0xffffffff, 0x00040007,
1256 	0x3c068, 0xffffffff, 0x00060005,
1257 	0x3c06c, 0xffffffff, 0x00090008,
1258 	0x3c070, 0xffffffff, 0x00010000,
1259 	0x3c074, 0xffffffff, 0x00030002,
1260 	0x3c078, 0xffffffff, 0x00040007,
1261 	0x3c07c, 0xffffffff, 0x00060005,
1262 	0x3c080, 0xffffffff, 0x00090008,
1263 	0x3c084, 0xffffffff, 0x00010000,
1264 	0x3c088, 0xffffffff, 0x00030002,
1265 	0x3c08c, 0xffffffff, 0x00040007,
1266 	0x3c090, 0xffffffff, 0x00060005,
1267 	0x3c094, 0xffffffff, 0x00090008,
1268 	0x3c098, 0xffffffff, 0x00010000,
1269 	0x3c09c, 0xffffffff, 0x00030002,
1270 	0x3c0a0, 0xffffffff, 0x00040007,
1271 	0x3c0a4, 0xffffffff, 0x00060005,
1272 	0x3c0a8, 0xffffffff, 0x00090008,
1273 	0x3c0ac, 0xffffffff, 0x00010000,
1274 	0x3c0b0, 0xffffffff, 0x00030002,
1275 	0x3c0b4, 0xffffffff, 0x00040007,
1276 	0x3c0b8, 0xffffffff, 0x00060005,
1277 	0x3c0bc, 0xffffffff, 0x00090008,
1278 	0x3c000, 0xffffffff, 0x96e00200,
1279 	0x8708, 0xffffffff, 0x00900100,
1280 	0xc424, 0xffffffff, 0x0020003f,
1281 	0x38, 0xffffffff, 0x0140001c,
1282 	0x3c, 0x000f0000, 0x000f0000,
1283 	0x220, 0xffffffff, 0xC060000C,
1284 	0x224, 0xc0000fff, 0x00000100,
1285 	0xf90, 0xffffffff, 0x00000100,
1286 	0xf98, 0x00000101, 0x00000000,
1287 	0x20a8, 0xffffffff, 0x00000104,
1288 	0x55e4, 0xff000fff, 0x00000100,
1289 	0x30cc, 0xc0000fff, 0x00000104,
1290 	0xc1e4, 0x00000001, 0x00000001,
1291 	0xd00c, 0xff000ff0, 0x00000100,
1292 	0xd80c, 0xff000ff0, 0x00000100
1293 };
1294 
1295 static const u32 kalindi_golden_spm_registers[] =
1296 {
1297 	0x30800, 0xe0ffffff, 0xe0000000
1298 };
1299 
1300 static const u32 kalindi_golden_common_registers[] =
1301 {
1302 	0xc770, 0xffffffff, 0x00000800,
1303 	0xc774, 0xffffffff, 0x00000800,
1304 	0xc798, 0xffffffff, 0x00007fbf,
1305 	0xc79c, 0xffffffff, 0x00007faf
1306 };
1307 
1308 static const u32 kalindi_golden_registers[] =
1309 {
1310 	0x3c000, 0xffffdfff, 0x6e944040,
1311 	0x55e4, 0xff607fff, 0xfc000100,
1312 	0x3c220, 0xff000fff, 0x00000100,
1313 	0x3c224, 0xff000fff, 0x00000100,
1314 	0x3c200, 0xfffc0fff, 0x00000100,
1315 	0x6ed8, 0x00010101, 0x00010000,
1316 	0x9830, 0xffffffff, 0x00000000,
1317 	0x9834, 0xf00fffff, 0x00000400,
1318 	0x5bb0, 0x000000f0, 0x00000070,
1319 	0x5bc0, 0xf0311fff, 0x80300000,
1320 	0x98f8, 0x73773777, 0x12010001,
1321 	0x98fc, 0xffffffff, 0x00000010,
1322 	0x9b7c, 0x00ff0000, 0x00fc0000,
1323 	0x8030, 0x00001f0f, 0x0000100a,
1324 	0x2f48, 0x73773777, 0x12010001,
1325 	0x2408, 0x000fffff, 0x000c007f,
1326 	0x8a14, 0xf000003f, 0x00000007,
1327 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1328 	0x30a04, 0x0000ff0f, 0x00000000,
1329 	0x28a4c, 0x07ffffff, 0x06000000,
1330 	0x4d8, 0x00000fff, 0x00000100,
1331 	0x3e78, 0x00000001, 0x00000002,
1332 	0xc768, 0x00000008, 0x00000008,
1333 	0x8c00, 0x000000ff, 0x00000003,
1334 	0x214f8, 0x01ff01ff, 0x00000002,
1335 	0x21498, 0x007ff800, 0x00200000,
1336 	0x2015c, 0xffffffff, 0x00000f40,
1337 	0x88c4, 0x001f3ae3, 0x00000082,
1338 	0x88d4, 0x0000001f, 0x00000010,
1339 	0x30934, 0xffffffff, 0x00000000
1340 };
1341 
1342 static const u32 kalindi_mgcg_cgcg_init[] =
1343 {
1344 	0xc420, 0xffffffff, 0xfffffffc,
1345 	0x30800, 0xffffffff, 0xe0000000,
1346 	0x3c2a0, 0xffffffff, 0x00000100,
1347 	0x3c208, 0xffffffff, 0x00000100,
1348 	0x3c2c0, 0xffffffff, 0x00000100,
1349 	0x3c2c8, 0xffffffff, 0x00000100,
1350 	0x3c2c4, 0xffffffff, 0x00000100,
1351 	0x55e4, 0xffffffff, 0x00600100,
1352 	0x3c280, 0xffffffff, 0x00000100,
1353 	0x3c214, 0xffffffff, 0x06000100,
1354 	0x3c220, 0xffffffff, 0x00000100,
1355 	0x3c218, 0xffffffff, 0x06000100,
1356 	0x3c204, 0xffffffff, 0x00000100,
1357 	0x3c2e0, 0xffffffff, 0x00000100,
1358 	0x3c224, 0xffffffff, 0x00000100,
1359 	0x3c200, 0xffffffff, 0x00000100,
1360 	0x3c230, 0xffffffff, 0x00000100,
1361 	0x3c234, 0xffffffff, 0x00000100,
1362 	0x3c250, 0xffffffff, 0x00000100,
1363 	0x3c254, 0xffffffff, 0x00000100,
1364 	0x3c258, 0xffffffff, 0x00000100,
1365 	0x3c25c, 0xffffffff, 0x00000100,
1366 	0x3c260, 0xffffffff, 0x00000100,
1367 	0x3c27c, 0xffffffff, 0x00000100,
1368 	0x3c278, 0xffffffff, 0x00000100,
1369 	0x3c210, 0xffffffff, 0x06000100,
1370 	0x3c290, 0xffffffff, 0x00000100,
1371 	0x3c274, 0xffffffff, 0x00000100,
1372 	0x3c2b4, 0xffffffff, 0x00000100,
1373 	0x3c2b0, 0xffffffff, 0x00000100,
1374 	0x3c270, 0xffffffff, 0x00000100,
1375 	0x30800, 0xffffffff, 0xe0000000,
1376 	0x3c020, 0xffffffff, 0x00010000,
1377 	0x3c024, 0xffffffff, 0x00030002,
1378 	0x3c028, 0xffffffff, 0x00040007,
1379 	0x3c02c, 0xffffffff, 0x00060005,
1380 	0x3c030, 0xffffffff, 0x00090008,
1381 	0x3c034, 0xffffffff, 0x00010000,
1382 	0x3c038, 0xffffffff, 0x00030002,
1383 	0x3c03c, 0xffffffff, 0x00040007,
1384 	0x3c040, 0xffffffff, 0x00060005,
1385 	0x3c044, 0xffffffff, 0x00090008,
1386 	0x3c000, 0xffffffff, 0x96e00200,
1387 	0x8708, 0xffffffff, 0x00900100,
1388 	0xc424, 0xffffffff, 0x0020003f,
1389 	0x38, 0xffffffff, 0x0140001c,
1390 	0x3c, 0x000f0000, 0x000f0000,
1391 	0x220, 0xffffffff, 0xC060000C,
1392 	0x224, 0xc0000fff, 0x00000100,
1393 	0x20a8, 0xffffffff, 0x00000104,
1394 	0x55e4, 0xff000fff, 0x00000100,
1395 	0x30cc, 0xc0000fff, 0x00000104,
1396 	0xc1e4, 0x00000001, 0x00000001,
1397 	0xd00c, 0xff000ff0, 0x00000100,
1398 	0xd80c, 0xff000ff0, 0x00000100
1399 };
1400 
1401 static const u32 hawaii_golden_spm_registers[] =
1402 {
1403 	0x30800, 0xe0ffffff, 0xe0000000
1404 };
1405 
1406 static const u32 hawaii_golden_common_registers[] =
1407 {
1408 	0x30800, 0xffffffff, 0xe0000000,
1409 	0x28350, 0xffffffff, 0x3a00161a,
1410 	0x28354, 0xffffffff, 0x0000002e,
1411 	0x9a10, 0xffffffff, 0x00018208,
1412 	0x98f8, 0xffffffff, 0x12011003
1413 };
1414 
1415 static const u32 hawaii_golden_registers[] =
1416 {
1417 	0x3354, 0x00000333, 0x00000333,
1418 	0x9a10, 0x00010000, 0x00058208,
1419 	0x9830, 0xffffffff, 0x00000000,
1420 	0x9834, 0xf00fffff, 0x00000400,
1421 	0x9838, 0x0002021c, 0x00020200,
1422 	0xc78, 0x00000080, 0x00000000,
1423 	0x5bb0, 0x000000f0, 0x00000070,
1424 	0x5bc0, 0xf0311fff, 0x80300000,
1425 	0x350c, 0x00810000, 0x408af000,
1426 	0x7030, 0x31000111, 0x00000011,
1427 	0x2f48, 0x73773777, 0x12010001,
1428 	0x2120, 0x0000007f, 0x0000001b,
1429 	0x21dc, 0x00007fb6, 0x00002191,
1430 	0x3628, 0x0000003f, 0x0000000a,
1431 	0x362c, 0x0000003f, 0x0000000a,
1432 	0x2ae4, 0x00073ffe, 0x000022a2,
1433 	0x240c, 0x000007ff, 0x00000000,
1434 	0x8bf0, 0x00002001, 0x00000001,
1435 	0x8b24, 0xffffffff, 0x00ffffff,
1436 	0x30a04, 0x0000ff0f, 0x00000000,
1437 	0x28a4c, 0x07ffffff, 0x06000000,
1438 	0x3e78, 0x00000001, 0x00000002,
1439 	0xc768, 0x00000008, 0x00000008,
1440 	0xc770, 0x00000f00, 0x00000800,
1441 	0xc774, 0x00000f00, 0x00000800,
1442 	0xc798, 0x00ffffff, 0x00ff7fbf,
1443 	0xc79c, 0x00ffffff, 0x00ff7faf,
1444 	0x8c00, 0x000000ff, 0x00000800,
1445 	0xe40, 0x00001fff, 0x00001fff,
1446 	0x9060, 0x0000007f, 0x00000020,
1447 	0x9508, 0x00010000, 0x00010000,
1448 	0xae00, 0x00100000, 0x000ff07c,
1449 	0xac14, 0x000003ff, 0x0000000f,
1450 	0xac10, 0xffffffff, 0x7564fdec,
1451 	0xac0c, 0xffffffff, 0x3120b9a8,
1452 	0xac08, 0x20000000, 0x0f9c0000
1453 };
1454 
1455 static const u32 hawaii_mgcg_cgcg_init[] =
1456 {
1457 	0xc420, 0xffffffff, 0xfffffffd,
1458 	0x30800, 0xffffffff, 0xe0000000,
1459 	0x3c2a0, 0xffffffff, 0x00000100,
1460 	0x3c208, 0xffffffff, 0x00000100,
1461 	0x3c2c0, 0xffffffff, 0x00000100,
1462 	0x3c2c8, 0xffffffff, 0x00000100,
1463 	0x3c2c4, 0xffffffff, 0x00000100,
1464 	0x55e4, 0xffffffff, 0x00200100,
1465 	0x3c280, 0xffffffff, 0x00000100,
1466 	0x3c214, 0xffffffff, 0x06000100,
1467 	0x3c220, 0xffffffff, 0x00000100,
1468 	0x3c218, 0xffffffff, 0x06000100,
1469 	0x3c204, 0xffffffff, 0x00000100,
1470 	0x3c2e0, 0xffffffff, 0x00000100,
1471 	0x3c224, 0xffffffff, 0x00000100,
1472 	0x3c200, 0xffffffff, 0x00000100,
1473 	0x3c230, 0xffffffff, 0x00000100,
1474 	0x3c234, 0xffffffff, 0x00000100,
1475 	0x3c250, 0xffffffff, 0x00000100,
1476 	0x3c254, 0xffffffff, 0x00000100,
1477 	0x3c258, 0xffffffff, 0x00000100,
1478 	0x3c25c, 0xffffffff, 0x00000100,
1479 	0x3c260, 0xffffffff, 0x00000100,
1480 	0x3c27c, 0xffffffff, 0x00000100,
1481 	0x3c278, 0xffffffff, 0x00000100,
1482 	0x3c210, 0xffffffff, 0x06000100,
1483 	0x3c290, 0xffffffff, 0x00000100,
1484 	0x3c274, 0xffffffff, 0x00000100,
1485 	0x3c2b4, 0xffffffff, 0x00000100,
1486 	0x3c2b0, 0xffffffff, 0x00000100,
1487 	0x3c270, 0xffffffff, 0x00000100,
1488 	0x30800, 0xffffffff, 0xe0000000,
1489 	0x3c020, 0xffffffff, 0x00010000,
1490 	0x3c024, 0xffffffff, 0x00030002,
1491 	0x3c028, 0xffffffff, 0x00040007,
1492 	0x3c02c, 0xffffffff, 0x00060005,
1493 	0x3c030, 0xffffffff, 0x00090008,
1494 	0x3c034, 0xffffffff, 0x00010000,
1495 	0x3c038, 0xffffffff, 0x00030002,
1496 	0x3c03c, 0xffffffff, 0x00040007,
1497 	0x3c040, 0xffffffff, 0x00060005,
1498 	0x3c044, 0xffffffff, 0x00090008,
1499 	0x3c048, 0xffffffff, 0x00010000,
1500 	0x3c04c, 0xffffffff, 0x00030002,
1501 	0x3c050, 0xffffffff, 0x00040007,
1502 	0x3c054, 0xffffffff, 0x00060005,
1503 	0x3c058, 0xffffffff, 0x00090008,
1504 	0x3c05c, 0xffffffff, 0x00010000,
1505 	0x3c060, 0xffffffff, 0x00030002,
1506 	0x3c064, 0xffffffff, 0x00040007,
1507 	0x3c068, 0xffffffff, 0x00060005,
1508 	0x3c06c, 0xffffffff, 0x00090008,
1509 	0x3c070, 0xffffffff, 0x00010000,
1510 	0x3c074, 0xffffffff, 0x00030002,
1511 	0x3c078, 0xffffffff, 0x00040007,
1512 	0x3c07c, 0xffffffff, 0x00060005,
1513 	0x3c080, 0xffffffff, 0x00090008,
1514 	0x3c084, 0xffffffff, 0x00010000,
1515 	0x3c088, 0xffffffff, 0x00030002,
1516 	0x3c08c, 0xffffffff, 0x00040007,
1517 	0x3c090, 0xffffffff, 0x00060005,
1518 	0x3c094, 0xffffffff, 0x00090008,
1519 	0x3c098, 0xffffffff, 0x00010000,
1520 	0x3c09c, 0xffffffff, 0x00030002,
1521 	0x3c0a0, 0xffffffff, 0x00040007,
1522 	0x3c0a4, 0xffffffff, 0x00060005,
1523 	0x3c0a8, 0xffffffff, 0x00090008,
1524 	0x3c0ac, 0xffffffff, 0x00010000,
1525 	0x3c0b0, 0xffffffff, 0x00030002,
1526 	0x3c0b4, 0xffffffff, 0x00040007,
1527 	0x3c0b8, 0xffffffff, 0x00060005,
1528 	0x3c0bc, 0xffffffff, 0x00090008,
1529 	0x3c0c0, 0xffffffff, 0x00010000,
1530 	0x3c0c4, 0xffffffff, 0x00030002,
1531 	0x3c0c8, 0xffffffff, 0x00040007,
1532 	0x3c0cc, 0xffffffff, 0x00060005,
1533 	0x3c0d0, 0xffffffff, 0x00090008,
1534 	0x3c0d4, 0xffffffff, 0x00010000,
1535 	0x3c0d8, 0xffffffff, 0x00030002,
1536 	0x3c0dc, 0xffffffff, 0x00040007,
1537 	0x3c0e0, 0xffffffff, 0x00060005,
1538 	0x3c0e4, 0xffffffff, 0x00090008,
1539 	0x3c0e8, 0xffffffff, 0x00010000,
1540 	0x3c0ec, 0xffffffff, 0x00030002,
1541 	0x3c0f0, 0xffffffff, 0x00040007,
1542 	0x3c0f4, 0xffffffff, 0x00060005,
1543 	0x3c0f8, 0xffffffff, 0x00090008,
1544 	0xc318, 0xffffffff, 0x00020200,
1545 	0x3350, 0xffffffff, 0x00000200,
1546 	0x15c0, 0xffffffff, 0x00000400,
1547 	0x55e8, 0xffffffff, 0x00000000,
1548 	0x2f50, 0xffffffff, 0x00000902,
1549 	0x3c000, 0xffffffff, 0x96940200,
1550 	0x8708, 0xffffffff, 0x00900100,
1551 	0xc424, 0xffffffff, 0x0020003f,
1552 	0x38, 0xffffffff, 0x0140001c,
1553 	0x3c, 0x000f0000, 0x000f0000,
1554 	0x220, 0xffffffff, 0xc060000c,
1555 	0x224, 0xc0000fff, 0x00000100,
1556 	0xf90, 0xffffffff, 0x00000100,
1557 	0xf98, 0x00000101, 0x00000000,
1558 	0x20a8, 0xffffffff, 0x00000104,
1559 	0x55e4, 0xff000fff, 0x00000100,
1560 	0x30cc, 0xc0000fff, 0x00000104,
1561 	0xc1e4, 0x00000001, 0x00000001,
1562 	0xd00c, 0xff000ff0, 0x00000100,
1563 	0xd80c, 0xff000ff0, 0x00000100
1564 };
1565 
1566 static const u32 godavari_golden_registers[] =
1567 {
1568 	0x55e4, 0xff607fff, 0xfc000100,
1569 	0x6ed8, 0x00010101, 0x00010000,
1570 	0x9830, 0xffffffff, 0x00000000,
1571 	0x98302, 0xf00fffff, 0x00000400,
1572 	0x6130, 0xffffffff, 0x00010000,
1573 	0x5bb0, 0x000000f0, 0x00000070,
1574 	0x5bc0, 0xf0311fff, 0x80300000,
1575 	0x98f8, 0x73773777, 0x12010001,
1576 	0x98fc, 0xffffffff, 0x00000010,
1577 	0x8030, 0x00001f0f, 0x0000100a,
1578 	0x2f48, 0x73773777, 0x12010001,
1579 	0x2408, 0x000fffff, 0x000c007f,
1580 	0x8a14, 0xf000003f, 0x00000007,
1581 	0x8b24, 0xffffffff, 0x00ff0fff,
1582 	0x30a04, 0x0000ff0f, 0x00000000,
1583 	0x28a4c, 0x07ffffff, 0x06000000,
1584 	0x4d8, 0x00000fff, 0x00000100,
1585 	0xd014, 0x00010000, 0x00810001,
1586 	0xd814, 0x00010000, 0x00810001,
1587 	0x3e78, 0x00000001, 0x00000002,
1588 	0xc768, 0x00000008, 0x00000008,
1589 	0xc770, 0x00000f00, 0x00000800,
1590 	0xc774, 0x00000f00, 0x00000800,
1591 	0xc798, 0x00ffffff, 0x00ff7fbf,
1592 	0xc79c, 0x00ffffff, 0x00ff7faf,
1593 	0x8c00, 0x000000ff, 0x00000001,
1594 	0x214f8, 0x01ff01ff, 0x00000002,
1595 	0x21498, 0x007ff800, 0x00200000,
1596 	0x2015c, 0xffffffff, 0x00000f40,
1597 	0x88c4, 0x001f3ae3, 0x00000082,
1598 	0x88d4, 0x0000001f, 0x00000010,
1599 	0x30934, 0xffffffff, 0x00000000
1600 };
1601 
1602 
1603 static void cik_init_golden_registers(struct radeon_device *rdev)
1604 {
1605 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1606 	mutex_lock(&rdev->grbm_idx_mutex);
1607 	switch (rdev->family) {
1608 	case CHIP_BONAIRE:
1609 		radeon_program_register_sequence(rdev,
1610 						 bonaire_mgcg_cgcg_init,
1611 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1612 		radeon_program_register_sequence(rdev,
1613 						 bonaire_golden_registers,
1614 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1615 		radeon_program_register_sequence(rdev,
1616 						 bonaire_golden_common_registers,
1617 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1618 		radeon_program_register_sequence(rdev,
1619 						 bonaire_golden_spm_registers,
1620 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1621 		break;
1622 	case CHIP_KABINI:
1623 		radeon_program_register_sequence(rdev,
1624 						 kalindi_mgcg_cgcg_init,
1625 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1626 		radeon_program_register_sequence(rdev,
1627 						 kalindi_golden_registers,
1628 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1629 		radeon_program_register_sequence(rdev,
1630 						 kalindi_golden_common_registers,
1631 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1632 		radeon_program_register_sequence(rdev,
1633 						 kalindi_golden_spm_registers,
1634 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1635 		break;
1636 	case CHIP_MULLINS:
1637 		radeon_program_register_sequence(rdev,
1638 						 kalindi_mgcg_cgcg_init,
1639 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1640 		radeon_program_register_sequence(rdev,
1641 						 godavari_golden_registers,
1642 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1643 		radeon_program_register_sequence(rdev,
1644 						 kalindi_golden_common_registers,
1645 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1646 		radeon_program_register_sequence(rdev,
1647 						 kalindi_golden_spm_registers,
1648 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1649 		break;
1650 	case CHIP_KAVERI:
1651 		radeon_program_register_sequence(rdev,
1652 						 spectre_mgcg_cgcg_init,
1653 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1654 		radeon_program_register_sequence(rdev,
1655 						 spectre_golden_registers,
1656 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1657 		radeon_program_register_sequence(rdev,
1658 						 spectre_golden_common_registers,
1659 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1660 		radeon_program_register_sequence(rdev,
1661 						 spectre_golden_spm_registers,
1662 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1663 		break;
1664 	case CHIP_HAWAII:
1665 		radeon_program_register_sequence(rdev,
1666 						 hawaii_mgcg_cgcg_init,
1667 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1668 		radeon_program_register_sequence(rdev,
1669 						 hawaii_golden_registers,
1670 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1671 		radeon_program_register_sequence(rdev,
1672 						 hawaii_golden_common_registers,
1673 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1674 		radeon_program_register_sequence(rdev,
1675 						 hawaii_golden_spm_registers,
1676 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1677 		break;
1678 	default:
1679 		break;
1680 	}
1681 	mutex_unlock(&rdev->grbm_idx_mutex);
1682 }
1683 
1684 /**
1685  * cik_get_xclk - get the xclk
1686  *
1687  * @rdev: radeon_device pointer
1688  *
1689  * Returns the reference clock used by the gfx engine
1690  * (CIK).
1691  */
1692 u32 cik_get_xclk(struct radeon_device *rdev)
1693 {
1694         u32 reference_clock = rdev->clock.spll.reference_freq;
1695 
1696 	if (rdev->flags & RADEON_IS_IGP) {
1697 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1698 			return reference_clock / 2;
1699 	} else {
1700 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1701 			return reference_clock / 4;
1702 	}
1703 	return reference_clock;
1704 }
1705 
1706 /**
1707  * cik_mm_rdoorbell - read a doorbell dword
1708  *
1709  * @rdev: radeon_device pointer
1710  * @index: doorbell index
1711  *
1712  * Returns the value in the doorbell aperture at the
1713  * requested doorbell index (CIK).
1714  */
1715 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1716 {
1717 	if (index < rdev->doorbell.num_doorbells) {
1718 		return readl(rdev->doorbell.ptr + index);
1719 	} else {
1720 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1721 		return 0;
1722 	}
1723 }
1724 
1725 /**
1726  * cik_mm_wdoorbell - write a doorbell dword
1727  *
1728  * @rdev: radeon_device pointer
1729  * @index: doorbell index
1730  * @v: value to write
1731  *
1732  * Writes @v to the doorbell aperture at the
1733  * requested doorbell index (CIK).
1734  */
1735 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1736 {
1737 	if (index < rdev->doorbell.num_doorbells) {
1738 		writel(v, rdev->doorbell.ptr + index);
1739 	} else {
1740 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1741 	}
1742 }
1743 
1744 #define BONAIRE_IO_MC_REGS_SIZE 36
1745 
1746 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1747 {
1748 	{0x00000070, 0x04400000},
1749 	{0x00000071, 0x80c01803},
1750 	{0x00000072, 0x00004004},
1751 	{0x00000073, 0x00000100},
1752 	{0x00000074, 0x00ff0000},
1753 	{0x00000075, 0x34000000},
1754 	{0x00000076, 0x08000014},
1755 	{0x00000077, 0x00cc08ec},
1756 	{0x00000078, 0x00000400},
1757 	{0x00000079, 0x00000000},
1758 	{0x0000007a, 0x04090000},
1759 	{0x0000007c, 0x00000000},
1760 	{0x0000007e, 0x4408a8e8},
1761 	{0x0000007f, 0x00000304},
1762 	{0x00000080, 0x00000000},
1763 	{0x00000082, 0x00000001},
1764 	{0x00000083, 0x00000002},
1765 	{0x00000084, 0xf3e4f400},
1766 	{0x00000085, 0x052024e3},
1767 	{0x00000087, 0x00000000},
1768 	{0x00000088, 0x01000000},
1769 	{0x0000008a, 0x1c0a0000},
1770 	{0x0000008b, 0xff010000},
1771 	{0x0000008d, 0xffffefff},
1772 	{0x0000008e, 0xfff3efff},
1773 	{0x0000008f, 0xfff3efbf},
1774 	{0x00000092, 0xf7ffffff},
1775 	{0x00000093, 0xffffff7f},
1776 	{0x00000095, 0x00101101},
1777 	{0x00000096, 0x00000fff},
1778 	{0x00000097, 0x00116fff},
1779 	{0x00000098, 0x60010000},
1780 	{0x00000099, 0x10010000},
1781 	{0x0000009a, 0x00006000},
1782 	{0x0000009b, 0x00001000},
1783 	{0x0000009f, 0x00b48000}
1784 };
1785 
1786 #define HAWAII_IO_MC_REGS_SIZE 22
1787 
1788 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1789 {
1790 	{0x0000007d, 0x40000000},
1791 	{0x0000007e, 0x40180304},
1792 	{0x0000007f, 0x0000ff00},
1793 	{0x00000081, 0x00000000},
1794 	{0x00000083, 0x00000800},
1795 	{0x00000086, 0x00000000},
1796 	{0x00000087, 0x00000100},
1797 	{0x00000088, 0x00020100},
1798 	{0x00000089, 0x00000000},
1799 	{0x0000008b, 0x00040000},
1800 	{0x0000008c, 0x00000100},
1801 	{0x0000008e, 0xff010000},
1802 	{0x00000090, 0xffffefff},
1803 	{0x00000091, 0xfff3efff},
1804 	{0x00000092, 0xfff3efbf},
1805 	{0x00000093, 0xf7ffffff},
1806 	{0x00000094, 0xffffff7f},
1807 	{0x00000095, 0x00000fff},
1808 	{0x00000096, 0x00116fff},
1809 	{0x00000097, 0x60010000},
1810 	{0x00000098, 0x10010000},
1811 	{0x0000009f, 0x00c79000}
1812 };
1813 
1814 
1815 /**
1816  * cik_srbm_select - select specific register instances
1817  *
1818  * @rdev: radeon_device pointer
1819  * @me: selected ME (micro engine)
1820  * @pipe: pipe
1821  * @queue: queue
1822  * @vmid: VMID
1823  *
1824  * Switches the currently active registers instances.  Some
1825  * registers are instanced per VMID, others are instanced per
1826  * me/pipe/queue combination.
1827  */
1828 static void cik_srbm_select(struct radeon_device *rdev,
1829 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1830 {
1831 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1832 			     MEID(me & 0x3) |
1833 			     VMID(vmid & 0xf) |
1834 			     QUEUEID(queue & 0x7));
1835 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1836 }
1837 
1838 /* ucode loading */
1839 /**
1840  * ci_mc_load_microcode - load MC ucode into the hw
1841  *
1842  * @rdev: radeon_device pointer
1843  *
1844  * Load the GDDR MC ucode into the hw (CIK).
1845  * Returns 0 on success, error on failure.
1846  */
1847 int ci_mc_load_microcode(struct radeon_device *rdev)
1848 {
1849 	const __be32 *fw_data = NULL;
1850 	const __le32 *new_fw_data = NULL;
1851 	u32 running, blackout = 0, tmp;
1852 	u32 *io_mc_regs = NULL;
1853 	const __le32 *new_io_mc_regs = NULL;
1854 	int i, regs_size, ucode_size;
1855 
1856 	if (!rdev->mc_fw)
1857 		return -EINVAL;
1858 
1859 	if (rdev->new_fw) {
1860 		const struct mc_firmware_header_v1_0 *hdr =
1861 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1862 
1863 		radeon_ucode_print_mc_hdr(&hdr->header);
1864 
1865 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1866 		new_io_mc_regs = (const __le32 *)
1867 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1868 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1869 		new_fw_data = (const __le32 *)
1870 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1871 	} else {
1872 		ucode_size = rdev->mc_fw->datasize / 4;
1873 
1874 		switch (rdev->family) {
1875 		case CHIP_BONAIRE:
1876 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1877 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1878 			break;
1879 		case CHIP_HAWAII:
1880 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1881 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1882 			break;
1883 		default:
1884 			return -EINVAL;
1885 		}
1886 		fw_data = (const __be32 *)rdev->mc_fw->data;
1887 	}
1888 
1889 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1890 
1891 	if (running == 0) {
1892 		if (running) {
1893 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1894 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1895 		}
1896 
1897 		/* reset the engine and set to writable */
1898 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1899 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1900 
1901 		/* load mc io regs */
1902 		for (i = 0; i < regs_size; i++) {
1903 			if (rdev->new_fw) {
1904 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1905 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1906 			} else {
1907 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1908 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1909 			}
1910 		}
1911 
1912 		tmp = RREG32(MC_SEQ_MISC0);
1913 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1914 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1915 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1916 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1917 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1918 		}
1919 
1920 		/* load the MC ucode */
1921 		for (i = 0; i < ucode_size; i++) {
1922 			if (rdev->new_fw)
1923 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1924 			else
1925 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1926 		}
1927 
1928 		/* put the engine back into the active state */
1929 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1930 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1931 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1932 
1933 		/* wait for training to complete */
1934 		for (i = 0; i < rdev->usec_timeout; i++) {
1935 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1936 				break;
1937 			udelay(1);
1938 		}
1939 		for (i = 0; i < rdev->usec_timeout; i++) {
1940 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1941 				break;
1942 			udelay(1);
1943 		}
1944 
1945 		if (running)
1946 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1947 	}
1948 
1949 	return 0;
1950 }
1951 
1952 /**
1953  * cik_init_microcode - load ucode images from disk
1954  *
1955  * @rdev: radeon_device pointer
1956  *
1957  * Use the firmware interface to load the ucode images into
1958  * the driver (not loaded into hw).
1959  * Returns 0 on success, error on failure.
1960  */
1961 static int cik_init_microcode(struct radeon_device *rdev)
1962 {
1963 	const char *chip_name;
1964 	const char *new_chip_name;
1965 	size_t pfp_req_size, me_req_size, ce_req_size,
1966 		mec_req_size, rlc_req_size, mc_req_size = 0,
1967 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1968 	char fw_name[30];
1969 	int new_fw = 0;
1970 	int err;
1971 	int num_fw;
1972 
1973 	DRM_DEBUG("\n");
1974 
1975 	switch (rdev->family) {
1976 	case CHIP_BONAIRE:
1977 		chip_name = "BONAIRE";
1978 		new_chip_name = "bonaire";
1979 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1980 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1981 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1982 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1983 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1984 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1985 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1986 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1987 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1988 		num_fw = 8;
1989 		break;
1990 	case CHIP_HAWAII:
1991 		chip_name = "HAWAII";
1992 		new_chip_name = "hawaii";
1993 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1994 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1995 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1996 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1997 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1998 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1999 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2000 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2001 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2002 		num_fw = 8;
2003 		break;
2004 	case CHIP_KAVERI:
2005 		chip_name = "KAVERI";
2006 		new_chip_name = "kaveri";
2007 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2008 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2009 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2010 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2011 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2012 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2013 		num_fw = 7;
2014 		break;
2015 	case CHIP_KABINI:
2016 		chip_name = "KABINI";
2017 		new_chip_name = "kabini";
2018 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2019 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2020 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2021 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2022 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2023 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2024 		num_fw = 6;
2025 		break;
2026 	case CHIP_MULLINS:
2027 		chip_name = "MULLINS";
2028 		new_chip_name = "mullins";
2029 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2030 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2031 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2032 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2033 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2034 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2035 		num_fw = 6;
2036 		break;
2037 	default: BUG();
2038 	}
2039 
2040 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2041 
2042 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
2043 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2044 	if (err) {
2045 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
2046 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2047 		if (err)
2048 			goto out;
2049 		if (rdev->pfp_fw->datasize != pfp_req_size) {
2050 			printk(KERN_ERR
2051 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2052 			       rdev->pfp_fw->datasize, fw_name);
2053 			err = -EINVAL;
2054 			goto out;
2055 		}
2056 	} else {
2057 		err = radeon_ucode_validate(rdev->pfp_fw);
2058 		if (err) {
2059 			printk(KERN_ERR
2060 			       "cik_fw: validation failed for firmware \"%s\"\n",
2061 			       fw_name);
2062 			goto out;
2063 		} else {
2064 			new_fw++;
2065 		}
2066 	}
2067 
2068 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
2069 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2070 	if (err) {
2071 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
2072 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2073 		if (err)
2074 			goto out;
2075 		if (rdev->me_fw->datasize != me_req_size) {
2076 			printk(KERN_ERR
2077 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2078 			       rdev->me_fw->datasize, fw_name);
2079 			err = -EINVAL;
2080 		}
2081 	} else {
2082 		err = radeon_ucode_validate(rdev->me_fw);
2083 		if (err) {
2084 			printk(KERN_ERR
2085 			       "cik_fw: validation failed for firmware \"%s\"\n",
2086 			       fw_name);
2087 			goto out;
2088 		} else {
2089 			new_fw++;
2090 		}
2091 	}
2092 
2093 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
2094 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2095 	if (err) {
2096 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
2097 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2098 		if (err)
2099 			goto out;
2100 		if (rdev->ce_fw->datasize != ce_req_size) {
2101 			printk(KERN_ERR
2102 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2103 			       rdev->ce_fw->datasize, fw_name);
2104 			err = -EINVAL;
2105 		}
2106 	} else {
2107 		err = radeon_ucode_validate(rdev->ce_fw);
2108 		if (err) {
2109 			printk(KERN_ERR
2110 			       "cik_fw: validation failed for firmware \"%s\"\n",
2111 			       fw_name);
2112 			goto out;
2113 		} else {
2114 			new_fw++;
2115 		}
2116 	}
2117 
2118 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name);
2119 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2120 	if (err) {
2121 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
2122 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2123 		if (err)
2124 			goto out;
2125 		if (rdev->mec_fw->datasize != mec_req_size) {
2126 			printk(KERN_ERR
2127 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2128 			       rdev->mec_fw->datasize, fw_name);
2129 			err = -EINVAL;
2130 		}
2131 	} else {
2132 		err = radeon_ucode_validate(rdev->mec_fw);
2133 		if (err) {
2134 			printk(KERN_ERR
2135 			       "cik_fw: validation failed for firmware \"%s\"\n",
2136 			       fw_name);
2137 			goto out;
2138 		} else {
2139 			new_fw++;
2140 		}
2141 	}
2142 
2143 	if (rdev->family == CHIP_KAVERI) {
2144 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name);
2145 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2146 		if (err) {
2147 			goto out;
2148 		} else {
2149 			err = radeon_ucode_validate(rdev->mec2_fw);
2150 			if (err) {
2151 				goto out;
2152 			} else {
2153 				new_fw++;
2154 			}
2155 		}
2156 	}
2157 
2158 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
2159 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2160 	if (err) {
2161 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
2162 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2163 		if (err)
2164 			goto out;
2165 		if (rdev->rlc_fw->datasize != rlc_req_size) {
2166 			printk(KERN_ERR
2167 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2168 			       rdev->rlc_fw->datasize, fw_name);
2169 			err = -EINVAL;
2170 		}
2171 	} else {
2172 		err = radeon_ucode_validate(rdev->rlc_fw);
2173 		if (err) {
2174 			printk(KERN_ERR
2175 			       "cik_fw: validation failed for firmware \"%s\"\n",
2176 			       fw_name);
2177 			goto out;
2178 		} else {
2179 			new_fw++;
2180 		}
2181 	}
2182 
2183 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name);
2184 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2185 	if (err) {
2186 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
2187 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2188 		if (err)
2189 			goto out;
2190 		if (rdev->sdma_fw->datasize != sdma_req_size) {
2191 			printk(KERN_ERR
2192 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2193 			       rdev->sdma_fw->datasize, fw_name);
2194 			err = -EINVAL;
2195 		}
2196 	} else {
2197 		err = radeon_ucode_validate(rdev->sdma_fw);
2198 		if (err) {
2199 			printk(KERN_ERR
2200 			       "cik_fw: validation failed for firmware \"%s\"\n",
2201 			       fw_name);
2202 			goto out;
2203 		} else {
2204 			new_fw++;
2205 		}
2206 	}
2207 
2208 	/* No SMC, MC ucode on APUs */
2209 	if (!(rdev->flags & RADEON_IS_IGP)) {
2210 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
2211 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2212 		if (err) {
2213 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
2214 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2215 			if (err) {
2216 				ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
2217 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2218 				if (err)
2219 					goto out;
2220 			}
2221 			if ((rdev->mc_fw->datasize != mc_req_size) &&
2222 			    (rdev->mc_fw->datasize != mc2_req_size)){
2223 				printk(KERN_ERR
2224 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2225 				       rdev->mc_fw->datasize, fw_name);
2226 				err = -EINVAL;
2227 			}
2228 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
2229 		} else {
2230 			err = radeon_ucode_validate(rdev->mc_fw);
2231 			if (err) {
2232 				printk(KERN_ERR
2233 				       "cik_fw: validation failed for firmware \"%s\"\n",
2234 				       fw_name);
2235 				goto out;
2236 			} else {
2237 				new_fw++;
2238 			}
2239 		}
2240 
2241 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
2242 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2243 		if (err) {
2244 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
2245 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2246 			if (err) {
2247 				printk(KERN_ERR
2248 				       "smc: error loading firmware \"%s\"\n",
2249 				       fw_name);
2250 				release_firmware(rdev->smc_fw);
2251 				rdev->smc_fw = NULL;
2252 				err = 0;
2253 			} else if (rdev->smc_fw->datasize != smc_req_size) {
2254 				printk(KERN_ERR
2255 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2256 				       rdev->smc_fw->datasize, fw_name);
2257 				err = -EINVAL;
2258 			}
2259 		} else {
2260 			err = radeon_ucode_validate(rdev->smc_fw);
2261 			if (err) {
2262 				printk(KERN_ERR
2263 				       "cik_fw: validation failed for firmware \"%s\"\n",
2264 				       fw_name);
2265 				goto out;
2266 			} else {
2267 				new_fw++;
2268 			}
2269 		}
2270 	}
2271 
2272 	if (new_fw == 0) {
2273 		rdev->new_fw = false;
2274 	} else if (new_fw < num_fw) {
2275 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2276 		err = -EINVAL;
2277 	} else {
2278 		rdev->new_fw = true;
2279 	}
2280 
2281 out:
2282 	if (err) {
2283 		if (err != -EINVAL)
2284 			printk(KERN_ERR
2285 			       "cik_cp: Failed to load firmware \"%s\"\n",
2286 			       fw_name);
2287 		release_firmware(rdev->pfp_fw);
2288 		rdev->pfp_fw = NULL;
2289 		release_firmware(rdev->me_fw);
2290 		rdev->me_fw = NULL;
2291 		release_firmware(rdev->ce_fw);
2292 		rdev->ce_fw = NULL;
2293 		release_firmware(rdev->mec_fw);
2294 		rdev->mec_fw = NULL;
2295 		release_firmware(rdev->mec2_fw);
2296 		rdev->mec2_fw = NULL;
2297 		release_firmware(rdev->rlc_fw);
2298 		rdev->rlc_fw = NULL;
2299 		release_firmware(rdev->sdma_fw);
2300 		rdev->sdma_fw = NULL;
2301 		release_firmware(rdev->mc_fw);
2302 		rdev->mc_fw = NULL;
2303 		release_firmware(rdev->smc_fw);
2304 		rdev->smc_fw = NULL;
2305 	}
2306 	return err;
2307 }
2308 
2309 /**
2310  * cik_fini_microcode - drop the firmwares image references
2311  *
2312  * @rdev: radeon_device pointer
2313  *
2314  * Drop the pfp, me, mec, mec2, rlc, sdma, mc, smc and ce firmware image references.
2315  * Called at driver shutdown.
2316  */
2317 static void cik_fini_microcode(struct radeon_device *rdev)
2318 {
2319 	release_firmware(rdev->pfp_fw);
2320 	rdev->pfp_fw = NULL;
2321 	release_firmware(rdev->me_fw);
2322 	rdev->me_fw = NULL;
2323 	release_firmware(rdev->ce_fw);
2324 	rdev->ce_fw = NULL;
2325 	release_firmware(rdev->mec_fw);
2326 	rdev->mec_fw = NULL;
2327 	release_firmware(rdev->mec2_fw);
2328 	rdev->mec2_fw = NULL;
2329 	release_firmware(rdev->rlc_fw);
2330 	rdev->rlc_fw = NULL;
2331 	release_firmware(rdev->sdma_fw);
2332 	rdev->sdma_fw = NULL;
2333 	release_firmware(rdev->mc_fw);
2334 	rdev->mc_fw = NULL;
2335 	release_firmware(rdev->smc_fw);
2336 	rdev->smc_fw = NULL;
2337 }
2338 
2339 /*
2340  * Core functions
2341  */
2342 /**
2343  * cik_tiling_mode_table_init - init the hw tiling table
2344  *
2345  * @rdev: radeon_device pointer
2346  *
2347  * Starting with SI, the tiling setup is done globally in a
2348  * set of 32 tiling modes.  Rather than selecting each set of
2349  * parameters per surface as on older asics, we just select
2350  * which index in the tiling table we want to use, and the
2351  * surface uses those parameters (CIK).
2352  */
2353 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2354 {
2355 	const u32 num_tile_mode_states = 32;
2356 	const u32 num_secondary_tile_mode_states = 16;
2357 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2358 	u32 num_pipe_configs;
2359 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2360 		rdev->config.cik.max_shader_engines;
2361 
2362 	switch (rdev->config.cik.mem_row_size_in_kb) {
2363 	case 1:
2364 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2365 		break;
2366 	case 2:
2367 	default:
2368 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2369 		break;
2370 	case 4:
2371 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2372 		break;
2373 	}
2374 
2375 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2376 	if (num_pipe_configs > 8)
2377 		num_pipe_configs = 16;
2378 
2379 	if (num_pipe_configs == 16) {
2380 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2381 			switch (reg_offset) {
2382 			case 0:
2383 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2384 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2385 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2387 				break;
2388 			case 1:
2389 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2393 				break;
2394 			case 2:
2395 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2396 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2397 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2399 				break;
2400 			case 3:
2401 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2403 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2405 				break;
2406 			case 4:
2407 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2409 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410 						 TILE_SPLIT(split_equal_to_row_size));
2411 				break;
2412 			case 5:
2413 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416 				break;
2417 			case 6:
2418 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2419 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2420 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2422 				break;
2423 			case 7:
2424 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2425 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2426 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 						 TILE_SPLIT(split_equal_to_row_size));
2428 				break;
2429 			case 8:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2431 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2432 				break;
2433 			case 9:
2434 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2437 				break;
2438 			case 10:
2439 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443 				break;
2444 			case 11:
2445 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2446 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2447 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2448 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449 				break;
2450 			case 12:
2451 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2452 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2453 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455 				break;
2456 			case 13:
2457 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2458 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2460 				break;
2461 			case 14:
2462 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2464 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2465 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2466 				break;
2467 			case 16:
2468 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2469 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2470 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2471 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2472 				break;
2473 			case 17:
2474 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2475 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2476 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2477 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2478 				break;
2479 			case 27:
2480 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2481 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2482 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2483 				break;
2484 			case 28:
2485 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2486 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2487 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2488 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2489 				break;
2490 			case 29:
2491 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2492 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2493 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2494 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2495 				break;
2496 			case 30:
2497 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2498 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2499 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2500 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2501 				break;
2502 			default:
2503 				gb_tile_moden = 0;
2504 				break;
2505 			}
2506 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2507 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2508 		}
2509 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2510 			switch (reg_offset) {
2511 			case 0:
2512 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2514 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2515 						 NUM_BANKS(ADDR_SURF_16_BANK));
2516 				break;
2517 			case 1:
2518 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2520 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2521 						 NUM_BANKS(ADDR_SURF_16_BANK));
2522 				break;
2523 			case 2:
2524 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2526 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2527 						 NUM_BANKS(ADDR_SURF_16_BANK));
2528 				break;
2529 			case 3:
2530 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2532 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2533 						 NUM_BANKS(ADDR_SURF_16_BANK));
2534 				break;
2535 			case 4:
2536 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539 						 NUM_BANKS(ADDR_SURF_8_BANK));
2540 				break;
2541 			case 5:
2542 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2543 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2544 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2545 						 NUM_BANKS(ADDR_SURF_4_BANK));
2546 				break;
2547 			case 6:
2548 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2550 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2551 						 NUM_BANKS(ADDR_SURF_2_BANK));
2552 				break;
2553 			case 8:
2554 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2556 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557 						 NUM_BANKS(ADDR_SURF_16_BANK));
2558 				break;
2559 			case 9:
2560 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2562 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2563 						 NUM_BANKS(ADDR_SURF_16_BANK));
2564 				break;
2565 			case 10:
2566 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2568 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2569 						 NUM_BANKS(ADDR_SURF_16_BANK));
2570 				break;
2571 			case 11:
2572 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2573 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2574 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2575 						 NUM_BANKS(ADDR_SURF_8_BANK));
2576 				break;
2577 			case 12:
2578 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2580 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2581 						 NUM_BANKS(ADDR_SURF_4_BANK));
2582 				break;
2583 			case 13:
2584 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2585 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2586 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2587 						 NUM_BANKS(ADDR_SURF_2_BANK));
2588 				break;
2589 			case 14:
2590 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2592 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2593 						 NUM_BANKS(ADDR_SURF_2_BANK));
2594 				break;
2595 			default:
2596 				gb_tile_moden = 0;
2597 				break;
2598 			}
2599 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2600 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2601 		}
2602 	} else if (num_pipe_configs == 8) {
2603 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2604 			switch (reg_offset) {
2605 			case 0:
2606 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2607 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2608 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2610 				break;
2611 			case 1:
2612 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2614 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2616 				break;
2617 			case 2:
2618 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2620 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2622 				break;
2623 			case 3:
2624 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2626 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2628 				break;
2629 			case 4:
2630 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2631 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2632 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2633 						 TILE_SPLIT(split_equal_to_row_size));
2634 				break;
2635 			case 5:
2636 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2639 				break;
2640 			case 6:
2641 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2642 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2643 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2644 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2645 				break;
2646 			case 7:
2647 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2648 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2649 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650 						 TILE_SPLIT(split_equal_to_row_size));
2651 				break;
2652 			case 8:
2653 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2654 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2655 				break;
2656 			case 9:
2657 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2658 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2659 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2660 				break;
2661 			case 10:
2662 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2663 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2664 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2665 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666 				break;
2667 			case 11:
2668 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2669 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2670 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2671 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672 				break;
2673 			case 12:
2674 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2675 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2676 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678 				break;
2679 			case 13:
2680 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2681 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2682 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2683 				break;
2684 			case 14:
2685 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2687 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2688 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2689 				break;
2690 			case 16:
2691 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2692 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2693 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2694 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2695 				break;
2696 			case 17:
2697 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2698 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2699 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2700 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701 				break;
2702 			case 27:
2703 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2705 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2706 				break;
2707 			case 28:
2708 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2710 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2711 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712 				break;
2713 			case 29:
2714 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2715 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2717 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718 				break;
2719 			case 30:
2720 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2721 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2722 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2723 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2724 				break;
2725 			default:
2726 				gb_tile_moden = 0;
2727 				break;
2728 			}
2729 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2730 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2731 		}
2732 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2733 			switch (reg_offset) {
2734 			case 0:
2735 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2737 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2738 						 NUM_BANKS(ADDR_SURF_16_BANK));
2739 				break;
2740 			case 1:
2741 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2742 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2743 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2744 						 NUM_BANKS(ADDR_SURF_16_BANK));
2745 				break;
2746 			case 2:
2747 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2749 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2750 						 NUM_BANKS(ADDR_SURF_16_BANK));
2751 				break;
2752 			case 3:
2753 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2755 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2756 						 NUM_BANKS(ADDR_SURF_16_BANK));
2757 				break;
2758 			case 4:
2759 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2762 						 NUM_BANKS(ADDR_SURF_8_BANK));
2763 				break;
2764 			case 5:
2765 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2767 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2768 						 NUM_BANKS(ADDR_SURF_4_BANK));
2769 				break;
2770 			case 6:
2771 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2773 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2774 						 NUM_BANKS(ADDR_SURF_2_BANK));
2775 				break;
2776 			case 8:
2777 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2779 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2780 						 NUM_BANKS(ADDR_SURF_16_BANK));
2781 				break;
2782 			case 9:
2783 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2785 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2786 						 NUM_BANKS(ADDR_SURF_16_BANK));
2787 				break;
2788 			case 10:
2789 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2791 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2792 						 NUM_BANKS(ADDR_SURF_16_BANK));
2793 				break;
2794 			case 11:
2795 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2797 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2798 						 NUM_BANKS(ADDR_SURF_16_BANK));
2799 				break;
2800 			case 12:
2801 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2803 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2804 						 NUM_BANKS(ADDR_SURF_8_BANK));
2805 				break;
2806 			case 13:
2807 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2809 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2810 						 NUM_BANKS(ADDR_SURF_4_BANK));
2811 				break;
2812 			case 14:
2813 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2815 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2816 						 NUM_BANKS(ADDR_SURF_2_BANK));
2817 				break;
2818 			default:
2819 				gb_tile_moden = 0;
2820 				break;
2821 			}
2822 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2823 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2824 		}
2825 	} else if (num_pipe_configs == 4) {
2826 		if (num_rbs == 4) {
2827 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2828 				switch (reg_offset) {
2829 				case 0:
2830 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2831 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2832 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2834 					break;
2835 				case 1:
2836 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2837 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2838 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2840 					break;
2841 				case 2:
2842 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2844 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2845 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2846 					break;
2847 				case 3:
2848 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2849 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2850 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2851 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2852 					break;
2853 				case 4:
2854 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2855 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2856 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2857 							 TILE_SPLIT(split_equal_to_row_size));
2858 					break;
2859 				case 5:
2860 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2861 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2863 					break;
2864 				case 6:
2865 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2866 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2867 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2868 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2869 					break;
2870 				case 7:
2871 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2872 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2873 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874 							 TILE_SPLIT(split_equal_to_row_size));
2875 					break;
2876 				case 8:
2877 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2878 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2879 					break;
2880 				case 9:
2881 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2882 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2883 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2884 					break;
2885 				case 10:
2886 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2887 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2888 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2889 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2890 					break;
2891 				case 11:
2892 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2893 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2894 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2895 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896 					break;
2897 				case 12:
2898 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2899 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2900 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2901 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2902 					break;
2903 				case 13:
2904 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2905 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2906 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2907 					break;
2908 				case 14:
2909 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2911 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2912 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2913 					break;
2914 				case 16:
2915 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2916 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2917 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2918 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2919 					break;
2920 				case 17:
2921 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2922 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2923 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2924 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925 					break;
2926 				case 27:
2927 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2928 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2929 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2930 					break;
2931 				case 28:
2932 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2933 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2934 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2935 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 					break;
2937 				case 29:
2938 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2940 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2941 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942 					break;
2943 				case 30:
2944 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2945 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2947 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948 					break;
2949 				default:
2950 					gb_tile_moden = 0;
2951 					break;
2952 				}
2953 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2954 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2955 			}
2956 		} else if (num_rbs < 4) {
2957 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2958 				switch (reg_offset) {
2959 				case 0:
2960 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2961 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2962 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2963 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2964 					break;
2965 				case 1:
2966 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2967 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2968 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2969 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2970 					break;
2971 				case 2:
2972 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2973 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2974 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2975 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2976 					break;
2977 				case 3:
2978 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2979 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2980 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2981 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2982 					break;
2983 				case 4:
2984 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2985 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2986 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2987 							 TILE_SPLIT(split_equal_to_row_size));
2988 					break;
2989 				case 5:
2990 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2991 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2992 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2993 					break;
2994 				case 6:
2995 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2996 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2997 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2998 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2999 					break;
3000 				case 7:
3001 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3002 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3003 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004 							 TILE_SPLIT(split_equal_to_row_size));
3005 					break;
3006 				case 8:
3007 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3008 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
3009 					break;
3010 				case 9:
3011 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3012 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3013 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
3014 					break;
3015 				case 10:
3016 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3017 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3018 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3019 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3020 					break;
3021 				case 11:
3022 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3023 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3024 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3025 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3026 					break;
3027 				case 12:
3028 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3029 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3030 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3031 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032 					break;
3033 				case 13:
3034 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3035 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3036 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3037 					break;
3038 				case 14:
3039 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3040 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3041 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3042 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3043 					break;
3044 				case 16:
3045 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3046 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3047 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3048 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3049 					break;
3050 				case 17:
3051 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3052 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3053 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3054 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3055 					break;
3056 				case 27:
3057 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3058 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3059 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3060 					break;
3061 				case 28:
3062 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3063 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3064 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3065 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3066 					break;
3067 				case 29:
3068 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3069 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3070 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3071 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3072 					break;
3073 				case 30:
3074 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3075 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3076 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3077 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3078 					break;
3079 				default:
3080 					gb_tile_moden = 0;
3081 					break;
3082 				}
3083 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3084 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3085 			}
3086 		}
3087 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3088 			switch (reg_offset) {
3089 			case 0:
3090 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3091 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3092 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3093 						 NUM_BANKS(ADDR_SURF_16_BANK));
3094 				break;
3095 			case 1:
3096 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3097 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3098 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3099 						 NUM_BANKS(ADDR_SURF_16_BANK));
3100 				break;
3101 			case 2:
3102 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3103 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3104 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3105 						 NUM_BANKS(ADDR_SURF_16_BANK));
3106 				break;
3107 			case 3:
3108 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3109 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3110 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3111 						 NUM_BANKS(ADDR_SURF_16_BANK));
3112 				break;
3113 			case 4:
3114 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3115 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3116 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3117 						 NUM_BANKS(ADDR_SURF_16_BANK));
3118 				break;
3119 			case 5:
3120 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3121 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3122 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3123 						 NUM_BANKS(ADDR_SURF_8_BANK));
3124 				break;
3125 			case 6:
3126 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3127 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3128 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3129 						 NUM_BANKS(ADDR_SURF_4_BANK));
3130 				break;
3131 			case 8:
3132 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3133 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3134 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3135 						 NUM_BANKS(ADDR_SURF_16_BANK));
3136 				break;
3137 			case 9:
3138 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3139 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3140 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3141 						 NUM_BANKS(ADDR_SURF_16_BANK));
3142 				break;
3143 			case 10:
3144 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3145 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3146 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3147 						 NUM_BANKS(ADDR_SURF_16_BANK));
3148 				break;
3149 			case 11:
3150 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3152 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3153 						 NUM_BANKS(ADDR_SURF_16_BANK));
3154 				break;
3155 			case 12:
3156 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3157 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3158 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3159 						 NUM_BANKS(ADDR_SURF_16_BANK));
3160 				break;
3161 			case 13:
3162 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3163 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3164 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3165 						 NUM_BANKS(ADDR_SURF_8_BANK));
3166 				break;
3167 			case 14:
3168 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3170 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3171 						 NUM_BANKS(ADDR_SURF_4_BANK));
3172 				break;
3173 			default:
3174 				gb_tile_moden = 0;
3175 				break;
3176 			}
3177 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3178 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3179 		}
3180 	} else if (num_pipe_configs == 2) {
3181 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3182 			switch (reg_offset) {
3183 			case 0:
3184 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3185 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3186 						 PIPE_CONFIG(ADDR_SURF_P2) |
3187 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3188 				break;
3189 			case 1:
3190 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3191 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3192 						 PIPE_CONFIG(ADDR_SURF_P2) |
3193 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3194 				break;
3195 			case 2:
3196 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3198 						 PIPE_CONFIG(ADDR_SURF_P2) |
3199 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3200 				break;
3201 			case 3:
3202 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3203 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3204 						 PIPE_CONFIG(ADDR_SURF_P2) |
3205 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3206 				break;
3207 			case 4:
3208 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3209 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3210 						 PIPE_CONFIG(ADDR_SURF_P2) |
3211 						 TILE_SPLIT(split_equal_to_row_size));
3212 				break;
3213 			case 5:
3214 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3215 						 PIPE_CONFIG(ADDR_SURF_P2) |
3216 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3217 				break;
3218 			case 6:
3219 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3220 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3221 						 PIPE_CONFIG(ADDR_SURF_P2) |
3222 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3223 				break;
3224 			case 7:
3225 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3226 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3227 						 PIPE_CONFIG(ADDR_SURF_P2) |
3228 						 TILE_SPLIT(split_equal_to_row_size));
3229 				break;
3230 			case 8:
3231 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3232 						PIPE_CONFIG(ADDR_SURF_P2);
3233 				break;
3234 			case 9:
3235 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3236 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3237 						 PIPE_CONFIG(ADDR_SURF_P2));
3238 				break;
3239 			case 10:
3240 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3241 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3242 						 PIPE_CONFIG(ADDR_SURF_P2) |
3243 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3244 				break;
3245 			case 11:
3246 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3247 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3248 						 PIPE_CONFIG(ADDR_SURF_P2) |
3249 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3250 				break;
3251 			case 12:
3252 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3253 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3254 						 PIPE_CONFIG(ADDR_SURF_P2) |
3255 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256 				break;
3257 			case 13:
3258 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3259 						 PIPE_CONFIG(ADDR_SURF_P2) |
3260 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3261 				break;
3262 			case 14:
3263 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3264 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3265 						 PIPE_CONFIG(ADDR_SURF_P2) |
3266 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267 				break;
3268 			case 16:
3269 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3270 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3271 						 PIPE_CONFIG(ADDR_SURF_P2) |
3272 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3273 				break;
3274 			case 17:
3275 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3276 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3277 						 PIPE_CONFIG(ADDR_SURF_P2) |
3278 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3279 				break;
3280 			case 27:
3281 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3282 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3283 						 PIPE_CONFIG(ADDR_SURF_P2));
3284 				break;
3285 			case 28:
3286 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3287 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3288 						 PIPE_CONFIG(ADDR_SURF_P2) |
3289 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3290 				break;
3291 			case 29:
3292 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3293 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3294 						 PIPE_CONFIG(ADDR_SURF_P2) |
3295 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3296 				break;
3297 			case 30:
3298 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3299 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3300 						 PIPE_CONFIG(ADDR_SURF_P2) |
3301 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3302 				break;
3303 			default:
3304 				gb_tile_moden = 0;
3305 				break;
3306 			}
3307 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3308 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3309 		}
3310 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3311 			switch (reg_offset) {
3312 			case 0:
3313 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3314 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3315 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3316 						 NUM_BANKS(ADDR_SURF_16_BANK));
3317 				break;
3318 			case 1:
3319 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3320 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3321 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322 						 NUM_BANKS(ADDR_SURF_16_BANK));
3323 				break;
3324 			case 2:
3325 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3326 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3327 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3328 						 NUM_BANKS(ADDR_SURF_16_BANK));
3329 				break;
3330 			case 3:
3331 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3332 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3333 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3334 						 NUM_BANKS(ADDR_SURF_16_BANK));
3335 				break;
3336 			case 4:
3337 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3338 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3339 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3340 						 NUM_BANKS(ADDR_SURF_16_BANK));
3341 				break;
3342 			case 5:
3343 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3344 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3345 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3346 						 NUM_BANKS(ADDR_SURF_16_BANK));
3347 				break;
3348 			case 6:
3349 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3350 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3351 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3352 						 NUM_BANKS(ADDR_SURF_8_BANK));
3353 				break;
3354 			case 8:
3355 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3356 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3357 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3358 						 NUM_BANKS(ADDR_SURF_16_BANK));
3359 				break;
3360 			case 9:
3361 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3362 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3363 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3364 						 NUM_BANKS(ADDR_SURF_16_BANK));
3365 				break;
3366 			case 10:
3367 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3368 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3369 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3370 						 NUM_BANKS(ADDR_SURF_16_BANK));
3371 				break;
3372 			case 11:
3373 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3374 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3375 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3376 						 NUM_BANKS(ADDR_SURF_16_BANK));
3377 				break;
3378 			case 12:
3379 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3380 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3381 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3382 						 NUM_BANKS(ADDR_SURF_16_BANK));
3383 				break;
3384 			case 13:
3385 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3387 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388 						 NUM_BANKS(ADDR_SURF_16_BANK));
3389 				break;
3390 			case 14:
3391 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3392 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3393 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3394 						 NUM_BANKS(ADDR_SURF_8_BANK));
3395 				break;
3396 			default:
3397 				gb_tile_moden = 0;
3398 				break;
3399 			}
3400 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3401 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3402 		}
3403 	} else
3404 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3405 }
3406 
3407 /**
3408  * cik_select_se_sh - select which SE, SH to address
3409  *
3410  * @rdev: radeon_device pointer
3411  * @se_num: shader engine to address
3412  * @sh_num: sh block to address
3413  *
3414  * Select which SE, SH combinations to address. Certain
3415  * registers are instanced per SE or SH.  0xffffffff means
3416  * broadcast to all SEs or SHs (CIK).
3417  */
3418 static void cik_select_se_sh(struct radeon_device *rdev,
3419 			     u32 se_num, u32 sh_num)
3420 {
3421 	u32 data = INSTANCE_BROADCAST_WRITES;
3422 
3423 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3424 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3425 	else if (se_num == 0xffffffff)
3426 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3427 	else if (sh_num == 0xffffffff)
3428 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3429 	else
3430 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3431 	WREG32(GRBM_GFX_INDEX, data);
3432 }
3433 
3434 /**
3435  * cik_create_bitmask - create a bitmask
3436  *
3437  * @bit_width: length of the mask
3438  *
3439  * create a variable length bit mask (CIK).
3440  * Returns the bitmask.
3441  */
3442 static u32 cik_create_bitmask(u32 bit_width)
3443 {
3444 	u32 i, mask = 0;
3445 
3446 	for (i = 0; i < bit_width; i++) {
3447 		mask <<= 1;
3448 		mask |= 1;
3449 	}
3450 	return mask;
3451 }
3452 
3453 /**
3454  * cik_get_rb_disabled - computes the mask of disabled RBs
3455  *
3456  * @rdev: radeon_device pointer
3457  * @max_rb_num: max RBs (render backends) for the asic
3458  * @se_num: number of SEs (shader engines) for the asic
3459  * @sh_per_se: number of SH blocks per SE for the asic
3460  *
3461  * Calculates the bitmask of disabled RBs (CIK).
3462  * Returns the disabled RB bitmask.
3463  */
3464 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3465 			      u32 max_rb_num_per_se,
3466 			      u32 sh_per_se)
3467 {
3468 	u32 data, mask;
3469 
3470 	data = RREG32(CC_RB_BACKEND_DISABLE);
3471 	if (data & 1)
3472 		data &= BACKEND_DISABLE_MASK;
3473 	else
3474 		data = 0;
3475 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3476 
3477 	data >>= BACKEND_DISABLE_SHIFT;
3478 
3479 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3480 
3481 	return data & mask;
3482 }
3483 
3484 /**
3485  * cik_setup_rb - setup the RBs on the asic
3486  *
3487  * @rdev: radeon_device pointer
3488  * @se_num: number of SEs (shader engines) for the asic
3489  * @sh_per_se: number of SH blocks per SE for the asic
3490  * @max_rb_num: max RBs (render backends) for the asic
3491  *
3492  * Configures per-SE/SH RB registers (CIK).
3493  */
3494 static void cik_setup_rb(struct radeon_device *rdev,
3495 			 u32 se_num, u32 sh_per_se,
3496 			 u32 max_rb_num_per_se)
3497 {
3498 	int i, j;
3499 	u32 data, mask;
3500 	u32 disabled_rbs = 0;
3501 	u32 enabled_rbs = 0;
3502 
3503 	mutex_lock(&rdev->grbm_idx_mutex);
3504 	for (i = 0; i < se_num; i++) {
3505 		for (j = 0; j < sh_per_se; j++) {
3506 			cik_select_se_sh(rdev, i, j);
3507 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3508 			if (rdev->family == CHIP_HAWAII)
3509 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3510 			else
3511 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3512 		}
3513 	}
3514 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3515 	mutex_unlock(&rdev->grbm_idx_mutex);
3516 
3517 	mask = 1;
3518 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3519 		if (!(disabled_rbs & mask))
3520 			enabled_rbs |= mask;
3521 		mask <<= 1;
3522 	}
3523 
3524 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3525 
3526 	mutex_lock(&rdev->grbm_idx_mutex);
3527 	for (i = 0; i < se_num; i++) {
3528 		cik_select_se_sh(rdev, i, 0xffffffff);
3529 		data = 0;
3530 		for (j = 0; j < sh_per_se; j++) {
3531 			switch (enabled_rbs & 3) {
3532 			case 0:
3533 				if (j == 0)
3534 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3535 				else
3536 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3537 				break;
3538 			case 1:
3539 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3540 				break;
3541 			case 2:
3542 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3543 				break;
3544 			case 3:
3545 			default:
3546 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3547 				break;
3548 			}
3549 			enabled_rbs >>= 2;
3550 		}
3551 		WREG32(PA_SC_RASTER_CONFIG, data);
3552 	}
3553 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3554 	mutex_unlock(&rdev->grbm_idx_mutex);
3555 }
3556 
3557 /**
3558  * cik_gpu_init - setup the 3D engine
3559  *
3560  * @rdev: radeon_device pointer
3561  *
3562  * Configures the 3D engine and tiling configuration
3563  * registers so that the 3D engine is usable.
3564  */
3565 static void cik_gpu_init(struct radeon_device *rdev)
3566 {
3567 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3568 	u32 mc_shared_chmap, mc_arb_ramcfg;
3569 	u32 hdp_host_path_cntl;
3570 	u32 tmp;
3571 	int i, j;
3572 
3573 	switch (rdev->family) {
3574 	case CHIP_BONAIRE:
3575 		rdev->config.cik.max_shader_engines = 2;
3576 		rdev->config.cik.max_tile_pipes = 4;
3577 		rdev->config.cik.max_cu_per_sh = 7;
3578 		rdev->config.cik.max_sh_per_se = 1;
3579 		rdev->config.cik.max_backends_per_se = 2;
3580 		rdev->config.cik.max_texture_channel_caches = 4;
3581 		rdev->config.cik.max_gprs = 256;
3582 		rdev->config.cik.max_gs_threads = 32;
3583 		rdev->config.cik.max_hw_contexts = 8;
3584 
3585 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3586 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3587 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3588 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3589 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3590 		break;
3591 	case CHIP_HAWAII:
3592 		rdev->config.cik.max_shader_engines = 4;
3593 		rdev->config.cik.max_tile_pipes = 16;
3594 		rdev->config.cik.max_cu_per_sh = 11;
3595 		rdev->config.cik.max_sh_per_se = 1;
3596 		rdev->config.cik.max_backends_per_se = 4;
3597 		rdev->config.cik.max_texture_channel_caches = 16;
3598 		rdev->config.cik.max_gprs = 256;
3599 		rdev->config.cik.max_gs_threads = 32;
3600 		rdev->config.cik.max_hw_contexts = 8;
3601 
3602 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3603 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3604 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3605 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3606 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3607 		break;
3608 	case CHIP_KAVERI:
3609 		rdev->config.cik.max_shader_engines = 1;
3610 		rdev->config.cik.max_tile_pipes = 4;
3611 		rdev->config.cik.max_cu_per_sh = 8;
3612 		rdev->config.cik.max_backends_per_se = 2;
3613 		rdev->config.cik.max_sh_per_se = 1;
3614 		rdev->config.cik.max_texture_channel_caches = 4;
3615 		rdev->config.cik.max_gprs = 256;
3616 		rdev->config.cik.max_gs_threads = 16;
3617 		rdev->config.cik.max_hw_contexts = 8;
3618 
3619 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3620 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3621 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3622 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3623 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3624 		break;
3625 	case CHIP_KABINI:
3626 	case CHIP_MULLINS:
3627 	default:
3628 		rdev->config.cik.max_shader_engines = 1;
3629 		rdev->config.cik.max_tile_pipes = 2;
3630 		rdev->config.cik.max_cu_per_sh = 2;
3631 		rdev->config.cik.max_sh_per_se = 1;
3632 		rdev->config.cik.max_backends_per_se = 1;
3633 		rdev->config.cik.max_texture_channel_caches = 2;
3634 		rdev->config.cik.max_gprs = 256;
3635 		rdev->config.cik.max_gs_threads = 16;
3636 		rdev->config.cik.max_hw_contexts = 8;
3637 
3638 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3639 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3640 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3641 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3642 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3643 		break;
3644 	}
3645 
3646 	/* Initialize HDP */
3647 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3648 		WREG32((0x2c14 + j), 0x00000000);
3649 		WREG32((0x2c18 + j), 0x00000000);
3650 		WREG32((0x2c1c + j), 0x00000000);
3651 		WREG32((0x2c20 + j), 0x00000000);
3652 		WREG32((0x2c24 + j), 0x00000000);
3653 	}
3654 
3655 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3656 	WREG32(SRBM_INT_CNTL, 0x1);
3657 	WREG32(SRBM_INT_ACK, 0x1);
3658 
3659 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3660 
3661 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3662 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3663 
3664 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3665 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3666 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3667 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3668 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3669 		rdev->config.cik.mem_row_size_in_kb = 4;
3670 	/* XXX use MC settings? */
3671 	rdev->config.cik.shader_engine_tile_size = 32;
3672 	rdev->config.cik.num_gpus = 1;
3673 	rdev->config.cik.multi_gpu_tile_size = 64;
3674 
3675 	/* fix up row size */
3676 	gb_addr_config &= ~ROW_SIZE_MASK;
3677 	switch (rdev->config.cik.mem_row_size_in_kb) {
3678 	case 1:
3679 	default:
3680 		gb_addr_config |= ROW_SIZE(0);
3681 		break;
3682 	case 2:
3683 		gb_addr_config |= ROW_SIZE(1);
3684 		break;
3685 	case 4:
3686 		gb_addr_config |= ROW_SIZE(2);
3687 		break;
3688 	}
3689 
3690 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3691 	 * not have bank info, so create a custom tiling dword.
3692 	 * bits 3:0   num_pipes
3693 	 * bits 7:4   num_banks
3694 	 * bits 11:8  group_size
3695 	 * bits 15:12 row_size
3696 	 */
3697 	rdev->config.cik.tile_config = 0;
3698 	switch (rdev->config.cik.num_tile_pipes) {
3699 	case 1:
3700 		rdev->config.cik.tile_config |= (0 << 0);
3701 		break;
3702 	case 2:
3703 		rdev->config.cik.tile_config |= (1 << 0);
3704 		break;
3705 	case 4:
3706 		rdev->config.cik.tile_config |= (2 << 0);
3707 		break;
3708 	case 8:
3709 	default:
3710 		/* XXX what about 12? */
3711 		rdev->config.cik.tile_config |= (3 << 0);
3712 		break;
3713 	}
3714 	rdev->config.cik.tile_config |=
3715 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3716 	rdev->config.cik.tile_config |=
3717 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3718 	rdev->config.cik.tile_config |=
3719 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3720 
3721 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3722 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3723 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3724 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3725 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3726 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3727 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3728 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3729 
3730 	cik_tiling_mode_table_init(rdev);
3731 
3732 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3733 		     rdev->config.cik.max_sh_per_se,
3734 		     rdev->config.cik.max_backends_per_se);
3735 
3736 	rdev->config.cik.active_cus = 0;
3737 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3738 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3739 			rdev->config.cik.active_cus +=
3740 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3741 		}
3742 	}
3743 
3744 	/* set HW defaults for 3D engine */
3745 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3746 
3747 	mutex_lock(&rdev->grbm_idx_mutex);
3748 	/*
3749 	 * making sure that the following register writes will be broadcasted
3750 	 * to all the shaders
3751 	 */
3752 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3753 	WREG32(SX_DEBUG_1, 0x20);
3754 
3755 	WREG32(TA_CNTL_AUX, 0x00010000);
3756 
3757 	tmp = RREG32(SPI_CONFIG_CNTL);
3758 	tmp |= 0x03000000;
3759 	WREG32(SPI_CONFIG_CNTL, tmp);
3760 
3761 	WREG32(SQ_CONFIG, 1);
3762 
3763 	WREG32(DB_DEBUG, 0);
3764 
3765 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3766 	tmp |= 0x00000400;
3767 	WREG32(DB_DEBUG2, tmp);
3768 
3769 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3770 	tmp |= 0x00020200;
3771 	WREG32(DB_DEBUG3, tmp);
3772 
3773 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3774 	tmp |= 0x00018208;
3775 	WREG32(CB_HW_CONTROL, tmp);
3776 
3777 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3778 
3779 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3780 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3781 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3782 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3783 
3784 	WREG32(VGT_NUM_INSTANCES, 1);
3785 
3786 	WREG32(CP_PERFMON_CNTL, 0);
3787 
3788 	WREG32(SQ_CONFIG, 0);
3789 
3790 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3791 					  FORCE_EOV_MAX_REZ_CNT(255)));
3792 
3793 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3794 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3795 
3796 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3797 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3798 
3799 	tmp = RREG32(HDP_MISC_CNTL);
3800 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3801 	WREG32(HDP_MISC_CNTL, tmp);
3802 
3803 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3804 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3805 
3806 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3807 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3808 	mutex_unlock(&rdev->grbm_idx_mutex);
3809 
3810 	udelay(50);
3811 }
3812 
3813 /*
3814  * GPU scratch registers helpers function.
3815  */
3816 /**
3817  * cik_scratch_init - setup driver info for CP scratch regs
3818  *
3819  * @rdev: radeon_device pointer
3820  *
3821  * Set up the number and offset of the CP scratch registers.
3822  * NOTE: use of CP scratch registers is a legacy inferface and
3823  * is not used by default on newer asics (r6xx+).  On newer asics,
3824  * memory buffers are used for fences rather than scratch regs.
3825  */
3826 static void cik_scratch_init(struct radeon_device *rdev)
3827 {
3828 	int i;
3829 
3830 	rdev->scratch.num_reg = 7;
3831 	rdev->scratch.reg_base = SCRATCH_REG0;
3832 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3833 		rdev->scratch.free[i] = true;
3834 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3835 	}
3836 }
3837 
3838 /**
3839  * cik_ring_test - basic gfx ring test
3840  *
3841  * @rdev: radeon_device pointer
3842  * @ring: radeon_ring structure holding ring information
3843  *
3844  * Allocate a scratch register and write to it using the gfx ring (CIK).
3845  * Provides a basic gfx ring test to verify that the ring is working.
3846  * Used by cik_cp_gfx_resume();
3847  * Returns 0 on success, error on failure.
3848  */
3849 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3850 {
3851 	uint32_t scratch;
3852 	uint32_t tmp = 0;
3853 	unsigned i;
3854 	int r;
3855 
3856 	r = radeon_scratch_get(rdev, &scratch);
3857 	if (r) {
3858 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3859 		return r;
3860 	}
3861 	WREG32(scratch, 0xCAFEDEAD);
3862 	r = radeon_ring_lock(rdev, ring, 3);
3863 	if (r) {
3864 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3865 		radeon_scratch_free(rdev, scratch);
3866 		return r;
3867 	}
3868 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3869 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3870 	radeon_ring_write(ring, 0xDEADBEEF);
3871 	radeon_ring_unlock_commit(rdev, ring, false);
3872 
3873 	for (i = 0; i < rdev->usec_timeout; i++) {
3874 		tmp = RREG32(scratch);
3875 		if (tmp == 0xDEADBEEF)
3876 			break;
3877 		DRM_UDELAY(1);
3878 	}
3879 	if (i < rdev->usec_timeout) {
3880 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3881 	} else {
3882 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3883 			  ring->idx, scratch, tmp);
3884 		r = -EINVAL;
3885 	}
3886 	radeon_scratch_free(rdev, scratch);
3887 	return r;
3888 }
3889 
3890 /**
3891  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3892  *
3893  * @rdev: radeon_device pointer
3894  * @ridx: radeon ring index
3895  *
3896  * Emits an hdp flush on the cp.
3897  */
3898 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3899 				       int ridx)
3900 {
3901 	struct radeon_ring *ring = &rdev->ring[ridx];
3902 	u32 ref_and_mask;
3903 
3904 	switch (ring->idx) {
3905 	case CAYMAN_RING_TYPE_CP1_INDEX:
3906 	case CAYMAN_RING_TYPE_CP2_INDEX:
3907 	default:
3908 		switch (ring->me) {
3909 		case 0:
3910 			ref_and_mask = CP2 << ring->pipe;
3911 			break;
3912 		case 1:
3913 			ref_and_mask = CP6 << ring->pipe;
3914 			break;
3915 		default:
3916 			return;
3917 		}
3918 		break;
3919 	case RADEON_RING_TYPE_GFX_INDEX:
3920 		ref_and_mask = CP0;
3921 		break;
3922 	}
3923 
3924 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3925 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3926 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3927 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3928 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3929 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3930 	radeon_ring_write(ring, ref_and_mask);
3931 	radeon_ring_write(ring, ref_and_mask);
3932 	radeon_ring_write(ring, 0x20); /* poll interval */
3933 }
3934 
3935 /**
3936  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3937  *
3938  * @rdev: radeon_device pointer
3939  * @fence: radeon fence object
3940  *
3941  * Emits a fence sequnce number on the gfx ring and flushes
3942  * GPU caches.
3943  */
3944 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3945 			     struct radeon_fence *fence)
3946 {
3947 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3948 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3949 
3950 	/* Workaround for cache flush problems. First send a dummy EOP
3951 	 * event down the pipe with seq one below.
3952 	 */
3953 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3954 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3955 				 EOP_TC_ACTION_EN |
3956 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3957 				 EVENT_INDEX(5)));
3958 	radeon_ring_write(ring, addr & 0xfffffffc);
3959 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3960 				DATA_SEL(1) | INT_SEL(0));
3961 	radeon_ring_write(ring, fence->seq - 1);
3962 	radeon_ring_write(ring, 0);
3963 
3964 	/* Then send the real EOP event down the pipe. */
3965 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3966 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3967 				 EOP_TC_ACTION_EN |
3968 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3969 				 EVENT_INDEX(5)));
3970 	radeon_ring_write(ring, addr & 0xfffffffc);
3971 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3972 	radeon_ring_write(ring, fence->seq);
3973 	radeon_ring_write(ring, 0);
3974 }
3975 
3976 /**
3977  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3978  *
3979  * @rdev: radeon_device pointer
3980  * @fence: radeon fence object
3981  *
3982  * Emits a fence sequnce number on the compute ring and flushes
3983  * GPU caches.
3984  */
3985 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3986 				 struct radeon_fence *fence)
3987 {
3988 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3989 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3990 
3991 	/* RELEASE_MEM - flush caches, send int */
3992 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3993 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3994 				 EOP_TC_ACTION_EN |
3995 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3996 				 EVENT_INDEX(5)));
3997 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3998 	radeon_ring_write(ring, addr & 0xfffffffc);
3999 	radeon_ring_write(ring, upper_32_bits(addr));
4000 	radeon_ring_write(ring, fence->seq);
4001 	radeon_ring_write(ring, 0);
4002 }
4003 
4004 /**
4005  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
4006  *
4007  * @rdev: radeon_device pointer
4008  * @ring: radeon ring buffer object
4009  * @semaphore: radeon semaphore object
4010  * @emit_wait: Is this a sempahore wait?
4011  *
4012  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4013  * from running ahead of semaphore waits.
4014  */
4015 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4016 			     struct radeon_ring *ring,
4017 			     struct radeon_semaphore *semaphore,
4018 			     bool emit_wait)
4019 {
4020 	uint64_t addr = semaphore->gpu_addr;
4021 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4022 
4023 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4024 	radeon_ring_write(ring, lower_32_bits(addr));
4025 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4026 
4027 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4028 		/* Prevent the PFP from running ahead of the semaphore wait */
4029 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4030 		radeon_ring_write(ring, 0x0);
4031 	}
4032 
4033 	return true;
4034 }
4035 
4036 /**
4037  * cik_copy_cpdma - copy pages using the CP DMA engine
4038  *
4039  * @rdev: radeon_device pointer
4040  * @src_offset: src GPU address
4041  * @dst_offset: dst GPU address
4042  * @num_gpu_pages: number of GPU pages to xfer
4043  * @resv: reservation object to sync to
4044  *
4045  * Copy GPU paging using the CP DMA engine (CIK+).
4046  * Used by the radeon ttm implementation to move pages if
4047  * registered as the asic copy callback.
4048  */
4049 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4050 				    uint64_t src_offset, uint64_t dst_offset,
4051 				    unsigned num_gpu_pages,
4052 				    struct reservation_object *resv)
4053 {
4054 	struct radeon_fence *fence;
4055 	struct radeon_sync sync;
4056 	int ring_index = rdev->asic->copy.blit_ring_index;
4057 	struct radeon_ring *ring = &rdev->ring[ring_index];
4058 	u32 size_in_bytes, cur_size_in_bytes, control;
4059 	int i, num_loops;
4060 	int r = 0;
4061 
4062 	radeon_sync_create(&sync);
4063 
4064 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4065 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4066 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4067 	if (r) {
4068 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4069 		radeon_sync_free(rdev, &sync, NULL);
4070 		return ERR_PTR(r);
4071 	}
4072 
4073 	radeon_sync_resv(rdev, &sync, resv, false);
4074 	radeon_sync_rings(rdev, &sync, ring->idx);
4075 
4076 	for (i = 0; i < num_loops; i++) {
4077 		cur_size_in_bytes = size_in_bytes;
4078 		if (cur_size_in_bytes > 0x1fffff)
4079 			cur_size_in_bytes = 0x1fffff;
4080 		size_in_bytes -= cur_size_in_bytes;
4081 		control = 0;
4082 		if (size_in_bytes == 0)
4083 			control |= PACKET3_DMA_DATA_CP_SYNC;
4084 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4085 		radeon_ring_write(ring, control);
4086 		radeon_ring_write(ring, lower_32_bits(src_offset));
4087 		radeon_ring_write(ring, upper_32_bits(src_offset));
4088 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4089 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4090 		radeon_ring_write(ring, cur_size_in_bytes);
4091 		src_offset += cur_size_in_bytes;
4092 		dst_offset += cur_size_in_bytes;
4093 	}
4094 
4095 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4096 	if (r) {
4097 		radeon_ring_unlock_undo(rdev, ring);
4098 		radeon_sync_free(rdev, &sync, NULL);
4099 		return ERR_PTR(r);
4100 	}
4101 
4102 	radeon_ring_unlock_commit(rdev, ring, false);
4103 	radeon_sync_free(rdev, &sync, fence);
4104 
4105 	return fence;
4106 }
4107 
4108 /*
4109  * IB stuff
4110  */
4111 /**
4112  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4113  *
4114  * @rdev: radeon_device pointer
4115  * @ib: radeon indirect buffer object
4116  *
4117  * Emits an DE (drawing engine) or CE (constant engine) IB
4118  * on the gfx ring.  IBs are usually generated by userspace
4119  * acceleration drivers and submitted to the kernel for
4120  * sheduling on the ring.  This function schedules the IB
4121  * on the gfx ring for execution by the GPU.
4122  */
4123 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4124 {
4125 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4126 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4127 	u32 header, control = INDIRECT_BUFFER_VALID;
4128 
4129 	if (ib->is_const_ib) {
4130 		/* set switch buffer packet before const IB */
4131 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4132 		radeon_ring_write(ring, 0);
4133 
4134 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4135 	} else {
4136 		u32 next_rptr;
4137 		if (ring->rptr_save_reg) {
4138 			next_rptr = ring->wptr + 3 + 4;
4139 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4140 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4141 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4142 			radeon_ring_write(ring, next_rptr);
4143 		} else if (rdev->wb.enabled) {
4144 			next_rptr = ring->wptr + 5 + 4;
4145 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4146 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4147 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4148 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4149 			radeon_ring_write(ring, next_rptr);
4150 		}
4151 
4152 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4153 	}
4154 
4155 	control |= ib->length_dw | (vm_id << 24);
4156 
4157 	radeon_ring_write(ring, header);
4158 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
4159 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4160 	radeon_ring_write(ring, control);
4161 }
4162 
4163 /**
4164  * cik_ib_test - basic gfx ring IB test
4165  *
4166  * @rdev: radeon_device pointer
4167  * @ring: radeon_ring structure holding ring information
4168  *
4169  * Allocate an IB and execute it on the gfx ring (CIK).
4170  * Provides a basic gfx ring test to verify that IBs are working.
4171  * Returns 0 on success, error on failure.
4172  */
4173 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4174 {
4175 	struct radeon_ib ib;
4176 	uint32_t scratch;
4177 	uint32_t tmp = 0;
4178 	unsigned i;
4179 	int r;
4180 
4181 	r = radeon_scratch_get(rdev, &scratch);
4182 	if (r) {
4183 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4184 		return r;
4185 	}
4186 	WREG32(scratch, 0xCAFEDEAD);
4187 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4188 	if (r) {
4189 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4190 		radeon_scratch_free(rdev, scratch);
4191 		return r;
4192 	}
4193 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4194 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4195 	ib.ptr[2] = 0xDEADBEEF;
4196 	ib.length_dw = 3;
4197 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4198 	if (r) {
4199 		radeon_scratch_free(rdev, scratch);
4200 		radeon_ib_free(rdev, &ib);
4201 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4202 		return r;
4203 	}
4204 	r = radeon_fence_wait(ib.fence, false);
4205 	if (r) {
4206 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4207 		radeon_scratch_free(rdev, scratch);
4208 		radeon_ib_free(rdev, &ib);
4209 		return r;
4210 	}
4211 	for (i = 0; i < rdev->usec_timeout; i++) {
4212 		tmp = RREG32(scratch);
4213 		if (tmp == 0xDEADBEEF)
4214 			break;
4215 		DRM_UDELAY(1);
4216 	}
4217 	if (i < rdev->usec_timeout) {
4218 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4219 	} else {
4220 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4221 			  scratch, tmp);
4222 		r = -EINVAL;
4223 	}
4224 	radeon_scratch_free(rdev, scratch);
4225 	radeon_ib_free(rdev, &ib);
4226 	return r;
4227 }
4228 
4229 /*
4230  * CP.
4231  * On CIK, gfx and compute now have independant command processors.
4232  *
4233  * GFX
4234  * Gfx consists of a single ring and can process both gfx jobs and
4235  * compute jobs.  The gfx CP consists of three microengines (ME):
4236  * PFP - Pre-Fetch Parser
4237  * ME - Micro Engine
4238  * CE - Constant Engine
4239  * The PFP and ME make up what is considered the Drawing Engine (DE).
4240  * The CE is an asynchronous engine used for updating buffer desciptors
4241  * used by the DE so that they can be loaded into cache in parallel
4242  * while the DE is processing state update packets.
4243  *
4244  * Compute
4245  * The compute CP consists of two microengines (ME):
4246  * MEC1 - Compute MicroEngine 1
4247  * MEC2 - Compute MicroEngine 2
4248  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4249  * The queues are exposed to userspace and are programmed directly
4250  * by the compute runtime.
4251  */
4252 /**
4253  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4254  *
4255  * @rdev: radeon_device pointer
4256  * @enable: enable or disable the MEs
4257  *
4258  * Halts or unhalts the gfx MEs.
4259  */
4260 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4261 {
4262 	if (enable)
4263 		WREG32(CP_ME_CNTL, 0);
4264 	else {
4265 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4266 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4267 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4268 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4269 	}
4270 	udelay(50);
4271 }
4272 
4273 /**
4274  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4275  *
4276  * @rdev: radeon_device pointer
4277  *
4278  * Loads the gfx PFP, ME, and CE ucode.
4279  * Returns 0 for success, -EINVAL if the ucode is not available.
4280  */
4281 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4282 {
4283 	int i;
4284 
4285 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4286 		return -EINVAL;
4287 
4288 	cik_cp_gfx_enable(rdev, false);
4289 
4290 	if (rdev->new_fw) {
4291 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4292 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4293 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4294 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4295 		const struct gfx_firmware_header_v1_0 *me_hdr =
4296 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4297 		const __le32 *fw_data;
4298 		u32 fw_size;
4299 
4300 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4301 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4302 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4303 
4304 		/* PFP */
4305 		fw_data = (const __le32 *)
4306 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4307 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4308 		WREG32(CP_PFP_UCODE_ADDR, 0);
4309 		for (i = 0; i < fw_size; i++)
4310 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4311 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4312 
4313 		/* CE */
4314 		fw_data = (const __le32 *)
4315 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4316 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4317 		WREG32(CP_CE_UCODE_ADDR, 0);
4318 		for (i = 0; i < fw_size; i++)
4319 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4320 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4321 
4322 		/* ME */
4323 		fw_data = (const __be32 *)
4324 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4325 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4326 		WREG32(CP_ME_RAM_WADDR, 0);
4327 		for (i = 0; i < fw_size; i++)
4328 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4329 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4330 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4331 	} else {
4332 		const __be32 *fw_data;
4333 
4334 		/* PFP */
4335 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4336 		WREG32(CP_PFP_UCODE_ADDR, 0);
4337 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4338 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4339 		WREG32(CP_PFP_UCODE_ADDR, 0);
4340 
4341 		/* CE */
4342 		fw_data = (const __be32 *)rdev->ce_fw->data;
4343 		WREG32(CP_CE_UCODE_ADDR, 0);
4344 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4345 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4346 		WREG32(CP_CE_UCODE_ADDR, 0);
4347 
4348 		/* ME */
4349 		fw_data = (const __be32 *)rdev->me_fw->data;
4350 		WREG32(CP_ME_RAM_WADDR, 0);
4351 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4352 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4353 		WREG32(CP_ME_RAM_WADDR, 0);
4354 	}
4355 
4356 	return 0;
4357 }
4358 
4359 /**
4360  * cik_cp_gfx_start - start the gfx ring
4361  *
4362  * @rdev: radeon_device pointer
4363  *
4364  * Enables the ring and loads the clear state context and other
4365  * packets required to init the ring.
4366  * Returns 0 for success, error for failure.
4367  */
4368 static int cik_cp_gfx_start(struct radeon_device *rdev)
4369 {
4370 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4371 	int r, i;
4372 
4373 	/* init the CP */
4374 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4375 	WREG32(CP_ENDIAN_SWAP, 0);
4376 	WREG32(CP_DEVICE_ID, 1);
4377 
4378 	cik_cp_gfx_enable(rdev, true);
4379 
4380 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4381 	if (r) {
4382 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4383 		return r;
4384 	}
4385 
4386 	/* init the CE partitions.  CE only used for gfx on CIK */
4387 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4388 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4389 	radeon_ring_write(ring, 0x8000);
4390 	radeon_ring_write(ring, 0x8000);
4391 
4392 	/* setup clear context state */
4393 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4394 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4395 
4396 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4397 	radeon_ring_write(ring, 0x80000000);
4398 	radeon_ring_write(ring, 0x80000000);
4399 
4400 	for (i = 0; i < cik_default_size; i++)
4401 		radeon_ring_write(ring, cik_default_state[i]);
4402 
4403 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4404 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4405 
4406 	/* set clear context state */
4407 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4408 	radeon_ring_write(ring, 0);
4409 
4410 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4411 	radeon_ring_write(ring, 0x00000316);
4412 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4413 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4414 
4415 	radeon_ring_unlock_commit(rdev, ring, false);
4416 
4417 	return 0;
4418 }
4419 
4420 /**
4421  * cik_cp_gfx_fini - stop the gfx ring
4422  *
4423  * @rdev: radeon_device pointer
4424  *
4425  * Stop the gfx ring and tear down the driver ring
4426  * info.
4427  */
4428 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4429 {
4430 	cik_cp_gfx_enable(rdev, false);
4431 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4432 }
4433 
4434 /**
4435  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4436  *
4437  * @rdev: radeon_device pointer
4438  *
4439  * Program the location and size of the gfx ring buffer
4440  * and test it to make sure it's working.
4441  * Returns 0 for success, error for failure.
4442  */
4443 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4444 {
4445 	struct radeon_ring *ring;
4446 	u32 tmp;
4447 	u32 rb_bufsz;
4448 	u64 rb_addr;
4449 	int r;
4450 
4451 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4452 	if (rdev->family != CHIP_HAWAII)
4453 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4454 
4455 	/* Set the write pointer delay */
4456 	WREG32(CP_RB_WPTR_DELAY, 0);
4457 
4458 	/* set the RB to use vmid 0 */
4459 	WREG32(CP_RB_VMID, 0);
4460 
4461 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4462 
4463 	/* ring 0 - compute and gfx */
4464 	/* Set ring buffer size */
4465 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4466 	rb_bufsz = order_base_2(ring->ring_size / 8);
4467 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4468 #ifdef __BIG_ENDIAN
4469 	tmp |= BUF_SWAP_32BIT;
4470 #endif
4471 	WREG32(CP_RB0_CNTL, tmp);
4472 
4473 	/* Initialize the ring buffer's read and write pointers */
4474 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4475 	ring->wptr = 0;
4476 	WREG32(CP_RB0_WPTR, ring->wptr);
4477 
4478 	/* set the wb address wether it's enabled or not */
4479 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4480 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4481 
4482 	/* scratch register shadowing is no longer supported */
4483 	WREG32(SCRATCH_UMSK, 0);
4484 
4485 	if (!rdev->wb.enabled)
4486 		tmp |= RB_NO_UPDATE;
4487 
4488 	mdelay(1);
4489 	WREG32(CP_RB0_CNTL, tmp);
4490 
4491 	rb_addr = ring->gpu_addr >> 8;
4492 	WREG32(CP_RB0_BASE, rb_addr);
4493 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4494 
4495 	/* start the ring */
4496 	cik_cp_gfx_start(rdev);
4497 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4498 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4499 	if (r) {
4500 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4501 		return r;
4502 	}
4503 
4504 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4505 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4506 
4507 	return 0;
4508 }
4509 
4510 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4511 		     struct radeon_ring *ring)
4512 {
4513 	u32 rptr;
4514 
4515 	if (rdev->wb.enabled)
4516 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4517 	else
4518 		rptr = RREG32(CP_RB0_RPTR);
4519 
4520 	return rptr;
4521 }
4522 
4523 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4524 		     struct radeon_ring *ring)
4525 {
4526 	u32 wptr;
4527 
4528 	wptr = RREG32(CP_RB0_WPTR);
4529 
4530 	return wptr;
4531 }
4532 
4533 void cik_gfx_set_wptr(struct radeon_device *rdev,
4534 		      struct radeon_ring *ring)
4535 {
4536 	WREG32(CP_RB0_WPTR, ring->wptr);
4537 	(void)RREG32(CP_RB0_WPTR);
4538 }
4539 
4540 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4541 			 struct radeon_ring *ring)
4542 {
4543 	u32 rptr;
4544 
4545 	if (rdev->wb.enabled) {
4546 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4547 	} else {
4548 		mutex_lock(&rdev->srbm_mutex);
4549 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4550 		rptr = RREG32(CP_HQD_PQ_RPTR);
4551 		cik_srbm_select(rdev, 0, 0, 0, 0);
4552 		mutex_unlock(&rdev->srbm_mutex);
4553 	}
4554 
4555 	return rptr;
4556 }
4557 
4558 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4559 			 struct radeon_ring *ring)
4560 {
4561 	u32 wptr;
4562 
4563 	if (rdev->wb.enabled) {
4564 		/* XXX check if swapping is necessary on BE */
4565 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4566 	} else {
4567 		mutex_lock(&rdev->srbm_mutex);
4568 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4569 		wptr = RREG32(CP_HQD_PQ_WPTR);
4570 		cik_srbm_select(rdev, 0, 0, 0, 0);
4571 		mutex_unlock(&rdev->srbm_mutex);
4572 	}
4573 
4574 	return wptr;
4575 }
4576 
4577 void cik_compute_set_wptr(struct radeon_device *rdev,
4578 			  struct radeon_ring *ring)
4579 {
4580 	/* XXX check if swapping is necessary on BE */
4581 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4582 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4583 }
4584 
4585 static void cik_compute_stop(struct radeon_device *rdev,
4586 			     struct radeon_ring *ring)
4587 {
4588 	u32 j, tmp;
4589 
4590 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4591 	/* Disable wptr polling. */
4592 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4593 	tmp &= ~WPTR_POLL_EN;
4594 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4595 	/* Disable HQD. */
4596 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4597 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4598 		for (j = 0; j < rdev->usec_timeout; j++) {
4599 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4600 				break;
4601 			udelay(1);
4602 		}
4603 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4604 		WREG32(CP_HQD_PQ_RPTR, 0);
4605 		WREG32(CP_HQD_PQ_WPTR, 0);
4606 	}
4607 	cik_srbm_select(rdev, 0, 0, 0, 0);
4608 }
4609 
4610 /**
4611  * cik_cp_compute_enable - enable/disable the compute CP MEs
4612  *
4613  * @rdev: radeon_device pointer
4614  * @enable: enable or disable the MEs
4615  *
4616  * Halts or unhalts the compute MEs.
4617  */
4618 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4619 {
4620 	if (enable)
4621 		WREG32(CP_MEC_CNTL, 0);
4622 	else {
4623 		/*
4624 		 * To make hibernation reliable we need to clear compute ring
4625 		 * configuration before halting the compute ring.
4626 		 */
4627 		mutex_lock(&rdev->srbm_mutex);
4628 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4629 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4630 		mutex_unlock(&rdev->srbm_mutex);
4631 
4632 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4633 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4634 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4635 	}
4636 	udelay(50);
4637 }
4638 
4639 /**
4640  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4641  *
4642  * @rdev: radeon_device pointer
4643  *
4644  * Loads the compute MEC1&2 ucode.
4645  * Returns 0 for success, -EINVAL if the ucode is not available.
4646  */
4647 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4648 {
4649 	int i;
4650 
4651 	if (!rdev->mec_fw)
4652 		return -EINVAL;
4653 
4654 	cik_cp_compute_enable(rdev, false);
4655 
4656 	if (rdev->new_fw) {
4657 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4658 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4659 		const __le32 *fw_data;
4660 		u32 fw_size;
4661 
4662 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4663 
4664 		/* MEC1 */
4665 		fw_data = (const __le32 *)
4666 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4667 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4668 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4669 		for (i = 0; i < fw_size; i++)
4670 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4671 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4672 
4673 		/* MEC2 */
4674 		if (rdev->family == CHIP_KAVERI) {
4675 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4676 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4677 
4678 			fw_data = (const __le32 *)
4679 				(rdev->mec2_fw->data +
4680 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4681 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4682 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4683 			for (i = 0; i < fw_size; i++)
4684 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4685 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4686 		}
4687 	} else {
4688 		const __be32 *fw_data;
4689 
4690 		/* MEC1 */
4691 		fw_data = (const __be32 *)rdev->mec_fw->data;
4692 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4693 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4694 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4695 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4696 
4697 		if (rdev->family == CHIP_KAVERI) {
4698 			/* MEC2 */
4699 			fw_data = (const __be32 *)rdev->mec_fw->data;
4700 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4701 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4702 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4703 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4704 		}
4705 	}
4706 
4707 	return 0;
4708 }
4709 
4710 /**
4711  * cik_cp_compute_start - start the compute queues
4712  *
4713  * @rdev: radeon_device pointer
4714  *
4715  * Enable the compute queues.
4716  * Returns 0 for success, error for failure.
4717  */
4718 static int cik_cp_compute_start(struct radeon_device *rdev)
4719 {
4720 	cik_cp_compute_enable(rdev, true);
4721 
4722 	return 0;
4723 }
4724 
4725 /**
4726  * cik_cp_compute_fini - stop the compute queues
4727  *
4728  * @rdev: radeon_device pointer
4729  *
4730  * Stop the compute queues and tear down the driver queue
4731  * info.
4732  */
4733 static void cik_cp_compute_fini(struct radeon_device *rdev)
4734 {
4735 	int i, idx, r;
4736 
4737 	cik_cp_compute_enable(rdev, false);
4738 
4739 	for (i = 0; i < 2; i++) {
4740 		if (i == 0)
4741 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4742 		else
4743 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4744 
4745 		if (rdev->ring[idx].mqd_obj) {
4746 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4747 			if (unlikely(r != 0))
4748 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4749 
4750 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4751 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4752 
4753 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4754 			rdev->ring[idx].mqd_obj = NULL;
4755 		}
4756 	}
4757 }
4758 
4759 static void cik_mec_fini(struct radeon_device *rdev)
4760 {
4761 	int r;
4762 
4763 	if (rdev->mec.hpd_eop_obj) {
4764 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4765 		if (unlikely(r != 0))
4766 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4767 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4768 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4769 
4770 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4771 		rdev->mec.hpd_eop_obj = NULL;
4772 	}
4773 }
4774 
4775 #define MEC_HPD_SIZE 2048
4776 
4777 static int cik_mec_init(struct radeon_device *rdev)
4778 {
4779 	int r;
4780 	u32 *hpd;
4781 
4782 	/*
4783 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4784 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4785 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4786 	 * be handled by KFD
4787 	 */
4788 	rdev->mec.num_mec = 1;
4789 	rdev->mec.num_pipe = 1;
4790 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4791 
4792 	if (rdev->mec.hpd_eop_obj == NULL) {
4793 		r = radeon_bo_create(rdev,
4794 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4795 				     PAGE_SIZE, true,
4796 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4797 				     &rdev->mec.hpd_eop_obj);
4798 		if (r) {
4799 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4800 			return r;
4801 		}
4802 	}
4803 
4804 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4805 	if (unlikely(r != 0)) {
4806 		cik_mec_fini(rdev);
4807 		return r;
4808 	}
4809 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4810 			  &rdev->mec.hpd_eop_gpu_addr);
4811 	if (r) {
4812 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4813 		cik_mec_fini(rdev);
4814 		return r;
4815 	}
4816 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4817 	if (r) {
4818 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4819 		cik_mec_fini(rdev);
4820 		return r;
4821 	}
4822 
4823 	/* clear memory.  Not sure if this is required or not */
4824 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4825 
4826 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4827 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4828 
4829 	return 0;
4830 }
4831 
4832 struct hqd_registers
4833 {
4834 	u32 cp_mqd_base_addr;
4835 	u32 cp_mqd_base_addr_hi;
4836 	u32 cp_hqd_active;
4837 	u32 cp_hqd_vmid;
4838 	u32 cp_hqd_persistent_state;
4839 	u32 cp_hqd_pipe_priority;
4840 	u32 cp_hqd_queue_priority;
4841 	u32 cp_hqd_quantum;
4842 	u32 cp_hqd_pq_base;
4843 	u32 cp_hqd_pq_base_hi;
4844 	u32 cp_hqd_pq_rptr;
4845 	u32 cp_hqd_pq_rptr_report_addr;
4846 	u32 cp_hqd_pq_rptr_report_addr_hi;
4847 	u32 cp_hqd_pq_wptr_poll_addr;
4848 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4849 	u32 cp_hqd_pq_doorbell_control;
4850 	u32 cp_hqd_pq_wptr;
4851 	u32 cp_hqd_pq_control;
4852 	u32 cp_hqd_ib_base_addr;
4853 	u32 cp_hqd_ib_base_addr_hi;
4854 	u32 cp_hqd_ib_rptr;
4855 	u32 cp_hqd_ib_control;
4856 	u32 cp_hqd_iq_timer;
4857 	u32 cp_hqd_iq_rptr;
4858 	u32 cp_hqd_dequeue_request;
4859 	u32 cp_hqd_dma_offload;
4860 	u32 cp_hqd_sema_cmd;
4861 	u32 cp_hqd_msg_type;
4862 	u32 cp_hqd_atomic0_preop_lo;
4863 	u32 cp_hqd_atomic0_preop_hi;
4864 	u32 cp_hqd_atomic1_preop_lo;
4865 	u32 cp_hqd_atomic1_preop_hi;
4866 	u32 cp_hqd_hq_scheduler0;
4867 	u32 cp_hqd_hq_scheduler1;
4868 	u32 cp_mqd_control;
4869 };
4870 
4871 struct bonaire_mqd
4872 {
4873 	u32 header;
4874 	u32 dispatch_initiator;
4875 	u32 dimensions[3];
4876 	u32 start_idx[3];
4877 	u32 num_threads[3];
4878 	u32 pipeline_stat_enable;
4879 	u32 perf_counter_enable;
4880 	u32 pgm[2];
4881 	u32 tba[2];
4882 	u32 tma[2];
4883 	u32 pgm_rsrc[2];
4884 	u32 vmid;
4885 	u32 resource_limits;
4886 	u32 static_thread_mgmt01[2];
4887 	u32 tmp_ring_size;
4888 	u32 static_thread_mgmt23[2];
4889 	u32 restart[3];
4890 	u32 thread_trace_enable;
4891 	u32 reserved1;
4892 	u32 user_data[16];
4893 	u32 vgtcs_invoke_count[2];
4894 	struct hqd_registers queue_state;
4895 	u32 dequeue_cntr;
4896 	u32 interrupt_queue[64];
4897 };
4898 
4899 /**
4900  * cik_cp_compute_resume - setup the compute queue registers
4901  *
4902  * @rdev: radeon_device pointer
4903  *
4904  * Program the compute queues and test them to make sure they
4905  * are working.
4906  * Returns 0 for success, error for failure.
4907  */
4908 static int cik_cp_compute_resume(struct radeon_device *rdev)
4909 {
4910 	int r, i, j, idx;
4911 	u32 tmp;
4912 	bool use_doorbell = true;
4913 	u64 hqd_gpu_addr;
4914 	u64 mqd_gpu_addr;
4915 	u64 eop_gpu_addr;
4916 	u64 wb_gpu_addr;
4917 	u32 *buf;
4918 	struct bonaire_mqd *mqd;
4919 
4920 	r = cik_cp_compute_start(rdev);
4921 	if (r)
4922 		return r;
4923 
4924 	/* fix up chicken bits */
4925 	tmp = RREG32(CP_CPF_DEBUG);
4926 	tmp |= (1 << 23);
4927 	WREG32(CP_CPF_DEBUG, tmp);
4928 
4929 	/* init the pipes */
4930 	mutex_lock(&rdev->srbm_mutex);
4931 
4932 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4933 
4934 	cik_srbm_select(rdev, 0, 0, 0, 0);
4935 
4936 	/* write the EOP addr */
4937 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4938 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4939 
4940 	/* set the VMID assigned */
4941 	WREG32(CP_HPD_EOP_VMID, 0);
4942 
4943 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4944 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4945 	tmp &= ~EOP_SIZE_MASK;
4946 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4947 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4948 
4949 	mutex_unlock(&rdev->srbm_mutex);
4950 
4951 	/* init the queues.  Just two for now. */
4952 	for (i = 0; i < 2; i++) {
4953 		if (i == 0)
4954 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4955 		else
4956 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4957 
4958 		if (rdev->ring[idx].mqd_obj == NULL) {
4959 			r = radeon_bo_create(rdev,
4960 					     sizeof(struct bonaire_mqd),
4961 					     PAGE_SIZE, true,
4962 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4963 					     NULL, &rdev->ring[idx].mqd_obj);
4964 			if (r) {
4965 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4966 				return r;
4967 			}
4968 		}
4969 
4970 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4971 		if (unlikely(r != 0)) {
4972 			cik_cp_compute_fini(rdev);
4973 			return r;
4974 		}
4975 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4976 				  &mqd_gpu_addr);
4977 		if (r) {
4978 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4979 			cik_cp_compute_fini(rdev);
4980 			return r;
4981 		}
4982 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4983 		if (r) {
4984 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4985 			cik_cp_compute_fini(rdev);
4986 			return r;
4987 		}
4988 
4989 		/* init the mqd struct */
4990 		memset(buf, 0, sizeof(struct bonaire_mqd));
4991 
4992 		mqd = (struct bonaire_mqd *)buf;
4993 		mqd->header = 0xC0310800;
4994 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4995 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4996 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4997 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4998 
4999 		mutex_lock(&rdev->srbm_mutex);
5000 		cik_srbm_select(rdev, rdev->ring[idx].me,
5001 				rdev->ring[idx].pipe,
5002 				rdev->ring[idx].queue, 0);
5003 
5004 		/* disable wptr polling */
5005 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5006 		tmp &= ~WPTR_POLL_EN;
5007 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5008 
5009 		/* enable doorbell? */
5010 		mqd->queue_state.cp_hqd_pq_doorbell_control =
5011 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5012 		if (use_doorbell)
5013 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5014 		else
5015 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5016 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5017 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5018 
5019 		/* disable the queue if it's active */
5020 		mqd->queue_state.cp_hqd_dequeue_request = 0;
5021 		mqd->queue_state.cp_hqd_pq_rptr = 0;
5022 		mqd->queue_state.cp_hqd_pq_wptr= 0;
5023 		if (RREG32(CP_HQD_ACTIVE) & 1) {
5024 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5025 			for (j = 0; j < rdev->usec_timeout; j++) {
5026 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
5027 					break;
5028 				udelay(1);
5029 			}
5030 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5031 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5032 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5033 		}
5034 
5035 		/* set the pointer to the MQD */
5036 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5037 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5038 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5039 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5040 		/* set MQD vmid to 0 */
5041 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5042 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5043 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5044 
5045 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5046 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5047 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5048 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5049 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5050 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5051 
5052 		/* set up the HQD, this is similar to CP_RB0_CNTL */
5053 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5054 		mqd->queue_state.cp_hqd_pq_control &=
5055 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5056 
5057 		mqd->queue_state.cp_hqd_pq_control |=
5058 			order_base_2(rdev->ring[idx].ring_size / 8);
5059 		mqd->queue_state.cp_hqd_pq_control |=
5060 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5061 #ifdef __BIG_ENDIAN
5062 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5063 #endif
5064 		mqd->queue_state.cp_hqd_pq_control &=
5065 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5066 		mqd->queue_state.cp_hqd_pq_control |=
5067 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5068 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5069 
5070 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5071 		if (i == 0)
5072 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5073 		else
5074 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5075 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5076 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5077 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5078 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5079 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5080 
5081 		/* set the wb address wether it's enabled or not */
5082 		if (i == 0)
5083 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5084 		else
5085 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5086 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5087 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5088 			upper_32_bits(wb_gpu_addr) & 0xffff;
5089 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5090 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5091 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5092 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5093 
5094 		/* enable the doorbell if requested */
5095 		if (use_doorbell) {
5096 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5097 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5098 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5099 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5100 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5101 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5102 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5103 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5104 
5105 		} else {
5106 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5107 		}
5108 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5109 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5110 
5111 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5112 		rdev->ring[idx].wptr = 0;
5113 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5114 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5115 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5116 
5117 		/* set the vmid for the queue */
5118 		mqd->queue_state.cp_hqd_vmid = 0;
5119 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5120 
5121 		/* activate the queue */
5122 		mqd->queue_state.cp_hqd_active = 1;
5123 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5124 
5125 		cik_srbm_select(rdev, 0, 0, 0, 0);
5126 		mutex_unlock(&rdev->srbm_mutex);
5127 
5128 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5129 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5130 
5131 		rdev->ring[idx].ready = true;
5132 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5133 		if (r)
5134 			rdev->ring[idx].ready = false;
5135 	}
5136 
5137 	return 0;
5138 }
5139 
5140 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5141 {
5142 	cik_cp_gfx_enable(rdev, enable);
5143 	cik_cp_compute_enable(rdev, enable);
5144 }
5145 
5146 static int cik_cp_load_microcode(struct radeon_device *rdev)
5147 {
5148 	int r;
5149 
5150 	r = cik_cp_gfx_load_microcode(rdev);
5151 	if (r)
5152 		return r;
5153 	r = cik_cp_compute_load_microcode(rdev);
5154 	if (r)
5155 		return r;
5156 
5157 	return 0;
5158 }
5159 
5160 static void cik_cp_fini(struct radeon_device *rdev)
5161 {
5162 	cik_cp_gfx_fini(rdev);
5163 	cik_cp_compute_fini(rdev);
5164 }
5165 
5166 static int cik_cp_resume(struct radeon_device *rdev)
5167 {
5168 	int r;
5169 
5170 	cik_enable_gui_idle_interrupt(rdev, false);
5171 
5172 	r = cik_cp_load_microcode(rdev);
5173 	if (r)
5174 		return r;
5175 
5176 	r = cik_cp_gfx_resume(rdev);
5177 	if (r)
5178 		return r;
5179 	r = cik_cp_compute_resume(rdev);
5180 	if (r)
5181 		return r;
5182 
5183 	cik_enable_gui_idle_interrupt(rdev, true);
5184 
5185 	return 0;
5186 }
5187 
5188 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5189 {
5190 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5191 		RREG32(GRBM_STATUS));
5192 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5193 		RREG32(GRBM_STATUS2));
5194 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5195 		RREG32(GRBM_STATUS_SE0));
5196 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5197 		RREG32(GRBM_STATUS_SE1));
5198 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5199 		RREG32(GRBM_STATUS_SE2));
5200 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5201 		RREG32(GRBM_STATUS_SE3));
5202 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5203 		RREG32(SRBM_STATUS));
5204 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5205 		RREG32(SRBM_STATUS2));
5206 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5207 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5208 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5209 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5210 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5211 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5212 		 RREG32(CP_STALLED_STAT1));
5213 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5214 		 RREG32(CP_STALLED_STAT2));
5215 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5216 		 RREG32(CP_STALLED_STAT3));
5217 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5218 		 RREG32(CP_CPF_BUSY_STAT));
5219 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5220 		 RREG32(CP_CPF_STALLED_STAT1));
5221 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5222 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5223 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5224 		 RREG32(CP_CPC_STALLED_STAT1));
5225 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5226 }
5227 
5228 /**
5229  * cik_gpu_check_soft_reset - check which blocks are busy
5230  *
5231  * @rdev: radeon_device pointer
5232  *
5233  * Check which blocks are busy and return the relevant reset
5234  * mask to be used by cik_gpu_soft_reset().
5235  * Returns a mask of the blocks to be reset.
5236  */
5237 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5238 {
5239 	u32 reset_mask = 0;
5240 	u32 tmp;
5241 
5242 	/* GRBM_STATUS */
5243 	tmp = RREG32(GRBM_STATUS);
5244 	if (tmp & (PA_BUSY | SC_BUSY |
5245 		   BCI_BUSY | SX_BUSY |
5246 		   TA_BUSY | VGT_BUSY |
5247 		   DB_BUSY | CB_BUSY |
5248 		   GDS_BUSY | SPI_BUSY |
5249 		   IA_BUSY | IA_BUSY_NO_DMA))
5250 		reset_mask |= RADEON_RESET_GFX;
5251 
5252 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5253 		reset_mask |= RADEON_RESET_CP;
5254 
5255 	/* GRBM_STATUS2 */
5256 	tmp = RREG32(GRBM_STATUS2);
5257 	if (tmp & RLC_BUSY)
5258 		reset_mask |= RADEON_RESET_RLC;
5259 
5260 	/* SDMA0_STATUS_REG */
5261 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5262 	if (!(tmp & SDMA_IDLE))
5263 		reset_mask |= RADEON_RESET_DMA;
5264 
5265 	/* SDMA1_STATUS_REG */
5266 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5267 	if (!(tmp & SDMA_IDLE))
5268 		reset_mask |= RADEON_RESET_DMA1;
5269 
5270 	/* SRBM_STATUS2 */
5271 	tmp = RREG32(SRBM_STATUS2);
5272 	if (tmp & SDMA_BUSY)
5273 		reset_mask |= RADEON_RESET_DMA;
5274 
5275 	if (tmp & SDMA1_BUSY)
5276 		reset_mask |= RADEON_RESET_DMA1;
5277 
5278 	/* SRBM_STATUS */
5279 	tmp = RREG32(SRBM_STATUS);
5280 
5281 	if (tmp & IH_BUSY)
5282 		reset_mask |= RADEON_RESET_IH;
5283 
5284 	if (tmp & SEM_BUSY)
5285 		reset_mask |= RADEON_RESET_SEM;
5286 
5287 	if (tmp & GRBM_RQ_PENDING)
5288 		reset_mask |= RADEON_RESET_GRBM;
5289 
5290 	if (tmp & VMC_BUSY)
5291 		reset_mask |= RADEON_RESET_VMC;
5292 
5293 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5294 		   MCC_BUSY | MCD_BUSY))
5295 		reset_mask |= RADEON_RESET_MC;
5296 
5297 	if (evergreen_is_display_hung(rdev))
5298 		reset_mask |= RADEON_RESET_DISPLAY;
5299 
5300 	/* Skip MC reset as it's mostly likely not hung, just busy */
5301 	if (reset_mask & RADEON_RESET_MC) {
5302 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5303 		reset_mask &= ~RADEON_RESET_MC;
5304 	}
5305 
5306 	return reset_mask;
5307 }
5308 
5309 /**
5310  * cik_gpu_soft_reset - soft reset GPU
5311  *
5312  * @rdev: radeon_device pointer
5313  * @reset_mask: mask of which blocks to reset
5314  *
5315  * Soft reset the blocks specified in @reset_mask.
5316  */
5317 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5318 {
5319 	struct evergreen_mc_save save;
5320 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5321 	u32 tmp;
5322 
5323 	if (reset_mask == 0)
5324 		return;
5325 
5326 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5327 
5328 	cik_print_gpu_status_regs(rdev);
5329 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5330 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5331 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5332 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5333 
5334 	/* disable CG/PG */
5335 	cik_fini_pg(rdev);
5336 	cik_fini_cg(rdev);
5337 
5338 	/* stop the rlc */
5339 	cik_rlc_stop(rdev);
5340 
5341 	/* Disable GFX parsing/prefetching */
5342 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5343 
5344 	/* Disable MEC parsing/prefetching */
5345 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5346 
5347 	if (reset_mask & RADEON_RESET_DMA) {
5348 		/* sdma0 */
5349 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5350 		tmp |= SDMA_HALT;
5351 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5352 	}
5353 	if (reset_mask & RADEON_RESET_DMA1) {
5354 		/* sdma1 */
5355 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5356 		tmp |= SDMA_HALT;
5357 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5358 	}
5359 
5360 	evergreen_mc_stop(rdev, &save);
5361 	if (evergreen_mc_wait_for_idle(rdev)) {
5362 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5363 	}
5364 
5365 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5366 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5367 
5368 	if (reset_mask & RADEON_RESET_CP) {
5369 		grbm_soft_reset |= SOFT_RESET_CP;
5370 
5371 		srbm_soft_reset |= SOFT_RESET_GRBM;
5372 	}
5373 
5374 	if (reset_mask & RADEON_RESET_DMA)
5375 		srbm_soft_reset |= SOFT_RESET_SDMA;
5376 
5377 	if (reset_mask & RADEON_RESET_DMA1)
5378 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5379 
5380 	if (reset_mask & RADEON_RESET_DISPLAY)
5381 		srbm_soft_reset |= SOFT_RESET_DC;
5382 
5383 	if (reset_mask & RADEON_RESET_RLC)
5384 		grbm_soft_reset |= SOFT_RESET_RLC;
5385 
5386 	if (reset_mask & RADEON_RESET_SEM)
5387 		srbm_soft_reset |= SOFT_RESET_SEM;
5388 
5389 	if (reset_mask & RADEON_RESET_IH)
5390 		srbm_soft_reset |= SOFT_RESET_IH;
5391 
5392 	if (reset_mask & RADEON_RESET_GRBM)
5393 		srbm_soft_reset |= SOFT_RESET_GRBM;
5394 
5395 	if (reset_mask & RADEON_RESET_VMC)
5396 		srbm_soft_reset |= SOFT_RESET_VMC;
5397 
5398 	if (!(rdev->flags & RADEON_IS_IGP)) {
5399 		if (reset_mask & RADEON_RESET_MC)
5400 			srbm_soft_reset |= SOFT_RESET_MC;
5401 	}
5402 
5403 	if (grbm_soft_reset) {
5404 		tmp = RREG32(GRBM_SOFT_RESET);
5405 		tmp |= grbm_soft_reset;
5406 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5407 		WREG32(GRBM_SOFT_RESET, tmp);
5408 		tmp = RREG32(GRBM_SOFT_RESET);
5409 
5410 		udelay(50);
5411 
5412 		tmp &= ~grbm_soft_reset;
5413 		WREG32(GRBM_SOFT_RESET, tmp);
5414 		tmp = RREG32(GRBM_SOFT_RESET);
5415 	}
5416 
5417 	if (srbm_soft_reset) {
5418 		tmp = RREG32(SRBM_SOFT_RESET);
5419 		tmp |= srbm_soft_reset;
5420 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5421 		WREG32(SRBM_SOFT_RESET, tmp);
5422 		tmp = RREG32(SRBM_SOFT_RESET);
5423 
5424 		udelay(50);
5425 
5426 		tmp &= ~srbm_soft_reset;
5427 		WREG32(SRBM_SOFT_RESET, tmp);
5428 		tmp = RREG32(SRBM_SOFT_RESET);
5429 	}
5430 
5431 	/* Wait a little for things to settle down */
5432 	udelay(50);
5433 
5434 	evergreen_mc_resume(rdev, &save);
5435 	udelay(50);
5436 
5437 	cik_print_gpu_status_regs(rdev);
5438 }
5439 
5440 struct kv_reset_save_regs {
5441 	u32 gmcon_reng_execute;
5442 	u32 gmcon_misc;
5443 	u32 gmcon_misc3;
5444 };
5445 
5446 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5447 				   struct kv_reset_save_regs *save)
5448 {
5449 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5450 	save->gmcon_misc = RREG32(GMCON_MISC);
5451 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5452 
5453 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5454 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5455 						STCTRL_STUTTER_EN));
5456 }
5457 
5458 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5459 				      struct kv_reset_save_regs *save)
5460 {
5461 	int i;
5462 
5463 	WREG32(GMCON_PGFSM_WRITE, 0);
5464 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5465 
5466 	for (i = 0; i < 5; i++)
5467 		WREG32(GMCON_PGFSM_WRITE, 0);
5468 
5469 	WREG32(GMCON_PGFSM_WRITE, 0);
5470 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5471 
5472 	for (i = 0; i < 5; i++)
5473 		WREG32(GMCON_PGFSM_WRITE, 0);
5474 
5475 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5476 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5477 
5478 	for (i = 0; i < 5; i++)
5479 		WREG32(GMCON_PGFSM_WRITE, 0);
5480 
5481 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5482 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5483 
5484 	for (i = 0; i < 5; i++)
5485 		WREG32(GMCON_PGFSM_WRITE, 0);
5486 
5487 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5488 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5489 
5490 	for (i = 0; i < 5; i++)
5491 		WREG32(GMCON_PGFSM_WRITE, 0);
5492 
5493 	WREG32(GMCON_PGFSM_WRITE, 0);
5494 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5495 
5496 	for (i = 0; i < 5; i++)
5497 		WREG32(GMCON_PGFSM_WRITE, 0);
5498 
5499 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5500 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5501 
5502 	for (i = 0; i < 5; i++)
5503 		WREG32(GMCON_PGFSM_WRITE, 0);
5504 
5505 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5506 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5507 
5508 	for (i = 0; i < 5; i++)
5509 		WREG32(GMCON_PGFSM_WRITE, 0);
5510 
5511 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5512 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5513 
5514 	for (i = 0; i < 5; i++)
5515 		WREG32(GMCON_PGFSM_WRITE, 0);
5516 
5517 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5518 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5519 
5520 	for (i = 0; i < 5; i++)
5521 		WREG32(GMCON_PGFSM_WRITE, 0);
5522 
5523 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5524 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5525 
5526 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5527 	WREG32(GMCON_MISC, save->gmcon_misc);
5528 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5529 }
5530 
5531 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5532 {
5533 	struct evergreen_mc_save save;
5534 	struct kv_reset_save_regs kv_save = { 0 };
5535 	u32 tmp, i;
5536 
5537 	dev_info(rdev->dev, "GPU pci config reset\n");
5538 
5539 	/* disable dpm? */
5540 
5541 	/* disable cg/pg */
5542 	cik_fini_pg(rdev);
5543 	cik_fini_cg(rdev);
5544 
5545 	/* Disable GFX parsing/prefetching */
5546 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5547 
5548 	/* Disable MEC parsing/prefetching */
5549 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5550 
5551 	/* sdma0 */
5552 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5553 	tmp |= SDMA_HALT;
5554 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5555 	/* sdma1 */
5556 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5557 	tmp |= SDMA_HALT;
5558 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5559 	/* XXX other engines? */
5560 
5561 	/* halt the rlc, disable cp internal ints */
5562 	cik_rlc_stop(rdev);
5563 
5564 	udelay(50);
5565 
5566 	/* disable mem access */
5567 	evergreen_mc_stop(rdev, &save);
5568 	if (evergreen_mc_wait_for_idle(rdev)) {
5569 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5570 	}
5571 
5572 	if (rdev->flags & RADEON_IS_IGP)
5573 		kv_save_regs_for_reset(rdev, &kv_save);
5574 
5575 	/* disable BM */
5576 	pci_clear_master(rdev->pdev);
5577 	/* reset */
5578 	radeon_pci_config_reset(rdev);
5579 
5580 	udelay(100);
5581 
5582 	/* wait for asic to come out of reset */
5583 	for (i = 0; i < rdev->usec_timeout; i++) {
5584 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5585 			break;
5586 		udelay(1);
5587 	}
5588 
5589 	/* does asic init need to be run first??? */
5590 	if (rdev->flags & RADEON_IS_IGP)
5591 		kv_restore_regs_for_reset(rdev, &kv_save);
5592 }
5593 
5594 /**
5595  * cik_asic_reset - soft reset GPU
5596  *
5597  * @rdev: radeon_device pointer
5598  *
5599  * Look up which blocks are hung and attempt
5600  * to reset them.
5601  * Returns 0 for success.
5602  */
5603 int cik_asic_reset(struct radeon_device *rdev)
5604 {
5605 	u32 reset_mask;
5606 
5607 	reset_mask = cik_gpu_check_soft_reset(rdev);
5608 
5609 	if (reset_mask)
5610 		r600_set_bios_scratch_engine_hung(rdev, true);
5611 
5612 	/* try soft reset */
5613 	cik_gpu_soft_reset(rdev, reset_mask);
5614 
5615 	reset_mask = cik_gpu_check_soft_reset(rdev);
5616 
5617 	/* try pci config reset */
5618 	if (reset_mask && radeon_hard_reset)
5619 		cik_gpu_pci_config_reset(rdev);
5620 
5621 	reset_mask = cik_gpu_check_soft_reset(rdev);
5622 
5623 	if (!reset_mask)
5624 		r600_set_bios_scratch_engine_hung(rdev, false);
5625 
5626 	return 0;
5627 }
5628 
5629 /**
5630  * cik_gfx_is_lockup - check if the 3D engine is locked up
5631  *
5632  * @rdev: radeon_device pointer
5633  * @ring: radeon_ring structure holding ring information
5634  *
5635  * Check if the 3D engine is locked up (CIK).
5636  * Returns true if the engine is locked, false if not.
5637  */
5638 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5639 {
5640 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5641 
5642 	if (!(reset_mask & (RADEON_RESET_GFX |
5643 			    RADEON_RESET_COMPUTE |
5644 			    RADEON_RESET_CP))) {
5645 		radeon_ring_lockup_update(rdev, ring);
5646 		return false;
5647 	}
5648 	return radeon_ring_test_lockup(rdev, ring);
5649 }
5650 
5651 /* MC */
5652 /**
5653  * cik_mc_program - program the GPU memory controller
5654  *
5655  * @rdev: radeon_device pointer
5656  *
5657  * Set the location of vram, gart, and AGP in the GPU's
5658  * physical address space (CIK).
5659  */
5660 static void cik_mc_program(struct radeon_device *rdev)
5661 {
5662 	struct evergreen_mc_save save;
5663 	u32 tmp;
5664 	int i, j;
5665 
5666 	/* Initialize HDP */
5667 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5668 		WREG32((0x2c14 + j), 0x00000000);
5669 		WREG32((0x2c18 + j), 0x00000000);
5670 		WREG32((0x2c1c + j), 0x00000000);
5671 		WREG32((0x2c20 + j), 0x00000000);
5672 		WREG32((0x2c24 + j), 0x00000000);
5673 	}
5674 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5675 
5676 	evergreen_mc_stop(rdev, &save);
5677 	if (radeon_mc_wait_for_idle(rdev)) {
5678 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5679 	}
5680 	/* Lockout access through VGA aperture*/
5681 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5682 	/* Update configuration */
5683 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5684 	       rdev->mc.vram_start >> 12);
5685 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5686 	       rdev->mc.vram_end >> 12);
5687 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5688 	       rdev->vram_scratch.gpu_addr >> 12);
5689 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5690 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5691 	WREG32(MC_VM_FB_LOCATION, tmp);
5692 	/* XXX double check these! */
5693 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5694 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5695 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5696 	WREG32(MC_VM_AGP_BASE, 0);
5697 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5698 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5699 	if (radeon_mc_wait_for_idle(rdev)) {
5700 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5701 	}
5702 	evergreen_mc_resume(rdev, &save);
5703 	/* we need to own VRAM, so turn off the VGA renderer here
5704 	 * to stop it overwriting our objects */
5705 	rv515_vga_render_disable(rdev);
5706 }
5707 
5708 /**
5709  * cik_mc_init - initialize the memory controller driver params
5710  *
5711  * @rdev: radeon_device pointer
5712  *
5713  * Look up the amount of vram, vram width, and decide how to place
5714  * vram and gart within the GPU's physical address space (CIK).
5715  * Returns 0 for success.
5716  */
5717 static int cik_mc_init(struct radeon_device *rdev)
5718 {
5719 	u32 tmp;
5720 	int chansize, numchan;
5721 
5722 	/* Get VRAM informations */
5723 	rdev->mc.vram_is_ddr = true;
5724 	tmp = RREG32(MC_ARB_RAMCFG);
5725 	if (tmp & CHANSIZE_MASK) {
5726 		chansize = 64;
5727 	} else {
5728 		chansize = 32;
5729 	}
5730 	tmp = RREG32(MC_SHARED_CHMAP);
5731 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5732 	case 0:
5733 	default:
5734 		numchan = 1;
5735 		break;
5736 	case 1:
5737 		numchan = 2;
5738 		break;
5739 	case 2:
5740 		numchan = 4;
5741 		break;
5742 	case 3:
5743 		numchan = 8;
5744 		break;
5745 	case 4:
5746 		numchan = 3;
5747 		break;
5748 	case 5:
5749 		numchan = 6;
5750 		break;
5751 	case 6:
5752 		numchan = 10;
5753 		break;
5754 	case 7:
5755 		numchan = 12;
5756 		break;
5757 	case 8:
5758 		numchan = 16;
5759 		break;
5760 	}
5761 	rdev->mc.vram_width = numchan * chansize;
5762 	/* Could aper size report 0 ? */
5763 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5764 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5765 	/* size in MB on si */
5766 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5767 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5768 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5769 	si_vram_gtt_location(rdev, &rdev->mc);
5770 	radeon_update_bandwidth_info(rdev);
5771 
5772 	return 0;
5773 }
5774 
5775 /*
5776  * GART
5777  * VMID 0 is the physical GPU addresses as used by the kernel.
5778  * VMIDs 1-15 are used for userspace clients and are handled
5779  * by the radeon vm/hsa code.
5780  */
5781 /**
5782  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5783  *
5784  * @rdev: radeon_device pointer
5785  *
5786  * Flush the TLB for the VMID 0 page table (CIK).
5787  */
5788 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5789 {
5790 	/* flush hdp cache */
5791 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5792 
5793 	/* bits 0-15 are the VM contexts0-15 */
5794 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5795 }
5796 
5797 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5798 {
5799 	int i;
5800 	uint32_t sh_mem_bases, sh_mem_config;
5801 
5802 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5803 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5804 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5805 
5806 	mutex_lock(&rdev->srbm_mutex);
5807 	for (i = 8; i < 16; i++) {
5808 		cik_srbm_select(rdev, 0, 0, 0, i);
5809 		/* CP and shaders */
5810 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5811 		WREG32(SH_MEM_APE1_BASE, 1);
5812 		WREG32(SH_MEM_APE1_LIMIT, 0);
5813 		WREG32(SH_MEM_BASES, sh_mem_bases);
5814 	}
5815 	cik_srbm_select(rdev, 0, 0, 0, 0);
5816 	mutex_unlock(&rdev->srbm_mutex);
5817 }
5818 
5819 /**
5820  * cik_pcie_gart_enable - gart enable
5821  *
5822  * @rdev: radeon_device pointer
5823  *
5824  * This sets up the TLBs, programs the page tables for VMID0,
5825  * sets up the hw for VMIDs 1-15 which are allocated on
5826  * demand, and sets up the global locations for the LDS, GDS,
5827  * and GPUVM for FSA64 clients (CIK).
5828  * Returns 0 for success, errors for failure.
5829  */
5830 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5831 {
5832 	int r, i;
5833 
5834 	if (rdev->gart.robj == NULL) {
5835 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5836 		return -EINVAL;
5837 	}
5838 	r = radeon_gart_table_vram_pin(rdev);
5839 	if (r)
5840 		return r;
5841 	/* Setup TLB control */
5842 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5843 	       (0xA << 7) |
5844 	       ENABLE_L1_TLB |
5845 	       ENABLE_L1_FRAGMENT_PROCESSING |
5846 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5847 	       ENABLE_ADVANCED_DRIVER_MODEL |
5848 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5849 	/* Setup L2 cache */
5850 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5851 	       ENABLE_L2_FRAGMENT_PROCESSING |
5852 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5853 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5854 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5855 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5856 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5857 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5858 	       BANK_SELECT(4) |
5859 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5860 	/* setup context0 */
5861 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5862 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5863 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5864 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5865 			(u32)(rdev->dummy_page.addr >> 12));
5866 	WREG32(VM_CONTEXT0_CNTL2, 0);
5867 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5868 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5869 
5870 	WREG32(0x15D4, 0);
5871 	WREG32(0x15D8, 0);
5872 	WREG32(0x15DC, 0);
5873 
5874 	/* restore context1-15 */
5875 	/* set vm size, must be a multiple of 4 */
5876 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5877 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5878 	for (i = 1; i < 16; i++) {
5879 		if (i < 8)
5880 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5881 			       rdev->vm_manager.saved_table_addr[i]);
5882 		else
5883 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5884 			       rdev->vm_manager.saved_table_addr[i]);
5885 	}
5886 
5887 	/* enable context1-15 */
5888 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5889 	       (u32)(rdev->dummy_page.addr >> 12));
5890 	WREG32(VM_CONTEXT1_CNTL2, 4);
5891 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5892 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5893 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5894 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5895 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5896 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5897 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5898 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5899 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5900 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5901 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5902 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5903 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5904 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5905 
5906 	if (rdev->family == CHIP_KAVERI) {
5907 		u32 tmp = RREG32(CHUB_CONTROL);
5908 		tmp &= ~BYPASS_VM;
5909 		WREG32(CHUB_CONTROL, tmp);
5910 	}
5911 
5912 	/* XXX SH_MEM regs */
5913 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5914 	mutex_lock(&rdev->srbm_mutex);
5915 	for (i = 0; i < 16; i++) {
5916 		cik_srbm_select(rdev, 0, 0, 0, i);
5917 		/* CP and shaders */
5918 		WREG32(SH_MEM_CONFIG, 0);
5919 		WREG32(SH_MEM_APE1_BASE, 1);
5920 		WREG32(SH_MEM_APE1_LIMIT, 0);
5921 		WREG32(SH_MEM_BASES, 0);
5922 		/* SDMA GFX */
5923 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5924 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5925 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5926 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5927 		/* XXX SDMA RLC - todo */
5928 	}
5929 	cik_srbm_select(rdev, 0, 0, 0, 0);
5930 	mutex_unlock(&rdev->srbm_mutex);
5931 
5932 	cik_pcie_init_compute_vmid(rdev);
5933 
5934 	cik_pcie_gart_tlb_flush(rdev);
5935 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5936 		 (unsigned)(rdev->mc.gtt_size >> 20),
5937 		 (unsigned long long)rdev->gart.table_addr);
5938 	rdev->gart.ready = true;
5939 	return 0;
5940 }
5941 
5942 /**
5943  * cik_pcie_gart_disable - gart disable
5944  *
5945  * @rdev: radeon_device pointer
5946  *
5947  * This disables all VM page table (CIK).
5948  */
5949 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5950 {
5951 	unsigned i;
5952 
5953 	for (i = 1; i < 16; ++i) {
5954 		uint32_t reg;
5955 		if (i < 8)
5956 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5957 		else
5958 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5959 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5960 	}
5961 
5962 	/* Disable all tables */
5963 	WREG32(VM_CONTEXT0_CNTL, 0);
5964 	WREG32(VM_CONTEXT1_CNTL, 0);
5965 	/* Setup TLB control */
5966 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5967 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5968 	/* Setup L2 cache */
5969 	WREG32(VM_L2_CNTL,
5970 	       ENABLE_L2_FRAGMENT_PROCESSING |
5971 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5972 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5973 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5974 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5975 	WREG32(VM_L2_CNTL2, 0);
5976 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5977 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5978 	radeon_gart_table_vram_unpin(rdev);
5979 }
5980 
5981 /**
5982  * cik_pcie_gart_fini - vm fini callback
5983  *
5984  * @rdev: radeon_device pointer
5985  *
5986  * Tears down the driver GART/VM setup (CIK).
5987  */
5988 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5989 {
5990 	cik_pcie_gart_disable(rdev);
5991 	radeon_gart_table_vram_free(rdev);
5992 	radeon_gart_fini(rdev);
5993 }
5994 
5995 /* vm parser */
5996 /**
5997  * cik_ib_parse - vm ib_parse callback
5998  *
5999  * @rdev: radeon_device pointer
6000  * @ib: indirect buffer pointer
6001  *
6002  * CIK uses hw IB checking so this is a nop (CIK).
6003  */
6004 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6005 {
6006 	return 0;
6007 }
6008 
6009 /*
6010  * vm
6011  * VMID 0 is the physical GPU addresses as used by the kernel.
6012  * VMIDs 1-15 are used for userspace clients and are handled
6013  * by the radeon vm/hsa code.
6014  */
6015 /**
6016  * cik_vm_init - cik vm init callback
6017  *
6018  * @rdev: radeon_device pointer
6019  *
6020  * Inits cik specific vm parameters (number of VMs, base of vram for
6021  * VMIDs 1-15) (CIK).
6022  * Returns 0 for success.
6023  */
6024 int cik_vm_init(struct radeon_device *rdev)
6025 {
6026 	/*
6027 	 * number of VMs
6028 	 * VMID 0 is reserved for System
6029 	 * radeon graphics/compute will use VMIDs 1-7
6030 	 * amdkfd will use VMIDs 8-15
6031 	 */
6032 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6033 	/* base offset of vram pages */
6034 	if (rdev->flags & RADEON_IS_IGP) {
6035 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
6036 		tmp <<= 22;
6037 		rdev->vm_manager.vram_base_offset = tmp;
6038 	} else
6039 		rdev->vm_manager.vram_base_offset = 0;
6040 
6041 	return 0;
6042 }
6043 
6044 /**
6045  * cik_vm_fini - cik vm fini callback
6046  *
6047  * @rdev: radeon_device pointer
6048  *
6049  * Tear down any asic specific VM setup (CIK).
6050  */
6051 void cik_vm_fini(struct radeon_device *rdev)
6052 {
6053 }
6054 
6055 /**
6056  * cik_vm_decode_fault - print human readable fault info
6057  *
6058  * @rdev: radeon_device pointer
6059  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6060  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6061  *
6062  * Print human readable fault information (CIK).
6063  */
6064 static void cik_vm_decode_fault(struct radeon_device *rdev,
6065 				u32 status, u32 addr, u32 mc_client)
6066 {
6067 	u32 mc_id;
6068 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6069 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6070 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6071 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6072 
6073 	if (rdev->family == CHIP_HAWAII)
6074 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6075 	else
6076 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6077 
6078 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6079 	       protections, vmid, addr,
6080 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6081 	       block, mc_client, mc_id);
6082 }
6083 
6084 /**
6085  * cik_vm_flush - cik vm flush using the CP
6086  *
6087  * @rdev: radeon_device pointer
6088  *
6089  * Update the page table base and flush the VM TLB
6090  * using the CP (CIK).
6091  */
6092 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6093 		  unsigned vm_id, uint64_t pd_addr)
6094 {
6095 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6096 
6097 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6098 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6099 				 WRITE_DATA_DST_SEL(0)));
6100 	if (vm_id < 8) {
6101 		radeon_ring_write(ring,
6102 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6103 	} else {
6104 		radeon_ring_write(ring,
6105 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6106 	}
6107 	radeon_ring_write(ring, 0);
6108 	radeon_ring_write(ring, pd_addr >> 12);
6109 
6110 	/* update SH_MEM_* regs */
6111 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6112 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6113 				 WRITE_DATA_DST_SEL(0)));
6114 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6115 	radeon_ring_write(ring, 0);
6116 	radeon_ring_write(ring, VMID(vm_id));
6117 
6118 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6119 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6120 				 WRITE_DATA_DST_SEL(0)));
6121 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6122 	radeon_ring_write(ring, 0);
6123 
6124 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6125 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6126 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6127 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6128 
6129 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6130 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6131 				 WRITE_DATA_DST_SEL(0)));
6132 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6133 	radeon_ring_write(ring, 0);
6134 	radeon_ring_write(ring, VMID(0));
6135 
6136 	/* HDP flush */
6137 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6138 
6139 	/* bits 0-15 are the VM contexts0-15 */
6140 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6141 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6142 				 WRITE_DATA_DST_SEL(0)));
6143 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6144 	radeon_ring_write(ring, 0);
6145 	radeon_ring_write(ring, 1 << vm_id);
6146 
6147 	/* wait for the invalidate to complete */
6148 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6149 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6150 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6151 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6152 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6153 	radeon_ring_write(ring, 0);
6154 	radeon_ring_write(ring, 0); /* ref */
6155 	radeon_ring_write(ring, 0); /* mask */
6156 	radeon_ring_write(ring, 0x20); /* poll interval */
6157 
6158 	/* compute doesn't have PFP */
6159 	if (usepfp) {
6160 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6161 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6162 		radeon_ring_write(ring, 0x0);
6163 	}
6164 }
6165 
6166 /*
6167  * RLC
6168  * The RLC is a multi-purpose microengine that handles a
6169  * variety of functions, the most important of which is
6170  * the interrupt controller.
6171  */
6172 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6173 					  bool enable)
6174 {
6175 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6176 
6177 	if (enable)
6178 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6179 	else
6180 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6181 	WREG32(CP_INT_CNTL_RING0, tmp);
6182 }
6183 
6184 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6185 {
6186 	u32 tmp;
6187 
6188 	tmp = RREG32(RLC_LB_CNTL);
6189 	if (enable)
6190 		tmp |= LOAD_BALANCE_ENABLE;
6191 	else
6192 		tmp &= ~LOAD_BALANCE_ENABLE;
6193 	WREG32(RLC_LB_CNTL, tmp);
6194 }
6195 
6196 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6197 {
6198 	u32 i, j, k;
6199 	u32 mask;
6200 
6201 	mutex_lock(&rdev->grbm_idx_mutex);
6202 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6203 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6204 			cik_select_se_sh(rdev, i, j);
6205 			for (k = 0; k < rdev->usec_timeout; k++) {
6206 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6207 					break;
6208 				udelay(1);
6209 			}
6210 		}
6211 	}
6212 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6213 	mutex_unlock(&rdev->grbm_idx_mutex);
6214 
6215 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6216 	for (k = 0; k < rdev->usec_timeout; k++) {
6217 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6218 			break;
6219 		udelay(1);
6220 	}
6221 }
6222 
6223 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6224 {
6225 	u32 tmp;
6226 
6227 	tmp = RREG32(RLC_CNTL);
6228 	if (tmp != rlc)
6229 		WREG32(RLC_CNTL, rlc);
6230 }
6231 
6232 static u32 cik_halt_rlc(struct radeon_device *rdev)
6233 {
6234 	u32 data, orig;
6235 
6236 	orig = data = RREG32(RLC_CNTL);
6237 
6238 	if (data & RLC_ENABLE) {
6239 		u32 i;
6240 
6241 		data &= ~RLC_ENABLE;
6242 		WREG32(RLC_CNTL, data);
6243 
6244 		for (i = 0; i < rdev->usec_timeout; i++) {
6245 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6246 				break;
6247 			udelay(1);
6248 		}
6249 
6250 		cik_wait_for_rlc_serdes(rdev);
6251 	}
6252 
6253 	return orig;
6254 }
6255 
6256 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6257 {
6258 	u32 tmp, i, mask;
6259 
6260 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6261 	WREG32(RLC_GPR_REG2, tmp);
6262 
6263 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6264 	for (i = 0; i < rdev->usec_timeout; i++) {
6265 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6266 			break;
6267 		udelay(1);
6268 	}
6269 
6270 	for (i = 0; i < rdev->usec_timeout; i++) {
6271 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6272 			break;
6273 		udelay(1);
6274 	}
6275 }
6276 
6277 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6278 {
6279 	u32 tmp;
6280 
6281 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6282 	WREG32(RLC_GPR_REG2, tmp);
6283 }
6284 
6285 /**
6286  * cik_rlc_stop - stop the RLC ME
6287  *
6288  * @rdev: radeon_device pointer
6289  *
6290  * Halt the RLC ME (MicroEngine) (CIK).
6291  */
6292 static void cik_rlc_stop(struct radeon_device *rdev)
6293 {
6294 	WREG32(RLC_CNTL, 0);
6295 
6296 	cik_enable_gui_idle_interrupt(rdev, false);
6297 
6298 	cik_wait_for_rlc_serdes(rdev);
6299 }
6300 
6301 /**
6302  * cik_rlc_start - start the RLC ME
6303  *
6304  * @rdev: radeon_device pointer
6305  *
6306  * Unhalt the RLC ME (MicroEngine) (CIK).
6307  */
6308 static void cik_rlc_start(struct radeon_device *rdev)
6309 {
6310 	WREG32(RLC_CNTL, RLC_ENABLE);
6311 
6312 	cik_enable_gui_idle_interrupt(rdev, true);
6313 
6314 	udelay(50);
6315 }
6316 
6317 /**
6318  * cik_rlc_resume - setup the RLC hw
6319  *
6320  * @rdev: radeon_device pointer
6321  *
6322  * Initialize the RLC registers, load the ucode,
6323  * and start the RLC (CIK).
6324  * Returns 0 for success, -EINVAL if the ucode is not available.
6325  */
6326 static int cik_rlc_resume(struct radeon_device *rdev)
6327 {
6328 	u32 i, size, tmp;
6329 
6330 	if (!rdev->rlc_fw)
6331 		return -EINVAL;
6332 
6333 	cik_rlc_stop(rdev);
6334 
6335 	/* disable CG */
6336 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6337 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6338 
6339 	si_rlc_reset(rdev);
6340 
6341 	cik_init_pg(rdev);
6342 
6343 	cik_init_cg(rdev);
6344 
6345 	WREG32(RLC_LB_CNTR_INIT, 0);
6346 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6347 
6348 	mutex_lock(&rdev->grbm_idx_mutex);
6349 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6350 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6351 	WREG32(RLC_LB_PARAMS, 0x00600408);
6352 	WREG32(RLC_LB_CNTL, 0x80000004);
6353 	mutex_unlock(&rdev->grbm_idx_mutex);
6354 
6355 	WREG32(RLC_MC_CNTL, 0);
6356 	WREG32(RLC_UCODE_CNTL, 0);
6357 
6358 	if (rdev->new_fw) {
6359 		const struct rlc_firmware_header_v1_0 *hdr =
6360 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6361 		const __le32 *fw_data = (const __le32 *)
6362 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6363 
6364 		radeon_ucode_print_rlc_hdr(&hdr->header);
6365 
6366 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6367 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6368 		for (i = 0; i < size; i++)
6369 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6370 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6371 	} else {
6372 		const __be32 *fw_data;
6373 
6374 		switch (rdev->family) {
6375 		case CHIP_BONAIRE:
6376 		case CHIP_HAWAII:
6377 		default:
6378 			size = BONAIRE_RLC_UCODE_SIZE;
6379 			break;
6380 		case CHIP_KAVERI:
6381 			size = KV_RLC_UCODE_SIZE;
6382 			break;
6383 		case CHIP_KABINI:
6384 			size = KB_RLC_UCODE_SIZE;
6385 			break;
6386 		case CHIP_MULLINS:
6387 			size = ML_RLC_UCODE_SIZE;
6388 			break;
6389 		}
6390 
6391 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6392 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6393 		for (i = 0; i < size; i++)
6394 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6395 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6396 	}
6397 
6398 	/* XXX - find out what chips support lbpw */
6399 	cik_enable_lbpw(rdev, false);
6400 
6401 	if (rdev->family == CHIP_BONAIRE)
6402 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6403 
6404 	cik_rlc_start(rdev);
6405 
6406 	return 0;
6407 }
6408 
6409 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6410 {
6411 	u32 data, orig, tmp, tmp2;
6412 
6413 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6414 
6415 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6416 		cik_enable_gui_idle_interrupt(rdev, true);
6417 
6418 		tmp = cik_halt_rlc(rdev);
6419 
6420 		mutex_lock(&rdev->grbm_idx_mutex);
6421 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6422 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6423 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6424 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6425 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6426 		mutex_unlock(&rdev->grbm_idx_mutex);
6427 
6428 		cik_update_rlc(rdev, tmp);
6429 
6430 		data |= CGCG_EN | CGLS_EN;
6431 	} else {
6432 		cik_enable_gui_idle_interrupt(rdev, false);
6433 
6434 		RREG32(CB_CGTT_SCLK_CTRL);
6435 		RREG32(CB_CGTT_SCLK_CTRL);
6436 		RREG32(CB_CGTT_SCLK_CTRL);
6437 		RREG32(CB_CGTT_SCLK_CTRL);
6438 
6439 		data &= ~(CGCG_EN | CGLS_EN);
6440 	}
6441 
6442 	if (orig != data)
6443 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6444 
6445 }
6446 
6447 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6448 {
6449 	u32 data, orig, tmp = 0;
6450 
6451 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6452 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6453 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6454 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6455 				data |= CP_MEM_LS_EN;
6456 				if (orig != data)
6457 					WREG32(CP_MEM_SLP_CNTL, data);
6458 			}
6459 		}
6460 
6461 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6462 		data |= 0x00000001;
6463 		data &= 0xfffffffd;
6464 		if (orig != data)
6465 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6466 
6467 		tmp = cik_halt_rlc(rdev);
6468 
6469 		mutex_lock(&rdev->grbm_idx_mutex);
6470 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6471 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6472 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6473 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6474 		WREG32(RLC_SERDES_WR_CTRL, data);
6475 		mutex_unlock(&rdev->grbm_idx_mutex);
6476 
6477 		cik_update_rlc(rdev, tmp);
6478 
6479 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6480 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6481 			data &= ~SM_MODE_MASK;
6482 			data |= SM_MODE(0x2);
6483 			data |= SM_MODE_ENABLE;
6484 			data &= ~CGTS_OVERRIDE;
6485 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6486 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6487 				data &= ~CGTS_LS_OVERRIDE;
6488 			data &= ~ON_MONITOR_ADD_MASK;
6489 			data |= ON_MONITOR_ADD_EN;
6490 			data |= ON_MONITOR_ADD(0x96);
6491 			if (orig != data)
6492 				WREG32(CGTS_SM_CTRL_REG, data);
6493 		}
6494 	} else {
6495 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6496 		data |= 0x00000003;
6497 		if (orig != data)
6498 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6499 
6500 		data = RREG32(RLC_MEM_SLP_CNTL);
6501 		if (data & RLC_MEM_LS_EN) {
6502 			data &= ~RLC_MEM_LS_EN;
6503 			WREG32(RLC_MEM_SLP_CNTL, data);
6504 		}
6505 
6506 		data = RREG32(CP_MEM_SLP_CNTL);
6507 		if (data & CP_MEM_LS_EN) {
6508 			data &= ~CP_MEM_LS_EN;
6509 			WREG32(CP_MEM_SLP_CNTL, data);
6510 		}
6511 
6512 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6513 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6514 		if (orig != data)
6515 			WREG32(CGTS_SM_CTRL_REG, data);
6516 
6517 		tmp = cik_halt_rlc(rdev);
6518 
6519 		mutex_lock(&rdev->grbm_idx_mutex);
6520 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6521 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6522 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6523 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6524 		WREG32(RLC_SERDES_WR_CTRL, data);
6525 		mutex_unlock(&rdev->grbm_idx_mutex);
6526 
6527 		cik_update_rlc(rdev, tmp);
6528 	}
6529 }
6530 
6531 static const u32 mc_cg_registers[] =
6532 {
6533 	MC_HUB_MISC_HUB_CG,
6534 	MC_HUB_MISC_SIP_CG,
6535 	MC_HUB_MISC_VM_CG,
6536 	MC_XPB_CLK_GAT,
6537 	ATC_MISC_CG,
6538 	MC_CITF_MISC_WR_CG,
6539 	MC_CITF_MISC_RD_CG,
6540 	MC_CITF_MISC_VM_CG,
6541 	VM_L2_CG,
6542 };
6543 
6544 static void cik_enable_mc_ls(struct radeon_device *rdev,
6545 			     bool enable)
6546 {
6547 	int i;
6548 	u32 orig, data;
6549 
6550 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6551 		orig = data = RREG32(mc_cg_registers[i]);
6552 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6553 			data |= MC_LS_ENABLE;
6554 		else
6555 			data &= ~MC_LS_ENABLE;
6556 		if (data != orig)
6557 			WREG32(mc_cg_registers[i], data);
6558 	}
6559 }
6560 
6561 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6562 			       bool enable)
6563 {
6564 	int i;
6565 	u32 orig, data;
6566 
6567 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6568 		orig = data = RREG32(mc_cg_registers[i]);
6569 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6570 			data |= MC_CG_ENABLE;
6571 		else
6572 			data &= ~MC_CG_ENABLE;
6573 		if (data != orig)
6574 			WREG32(mc_cg_registers[i], data);
6575 	}
6576 }
6577 
6578 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6579 				 bool enable)
6580 {
6581 	u32 orig, data;
6582 
6583 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6584 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6585 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6586 	} else {
6587 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6588 		data |= 0xff000000;
6589 		if (data != orig)
6590 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6591 
6592 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6593 		data |= 0xff000000;
6594 		if (data != orig)
6595 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6596 	}
6597 }
6598 
6599 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6600 				 bool enable)
6601 {
6602 	u32 orig, data;
6603 
6604 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6605 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6606 		data |= 0x100;
6607 		if (orig != data)
6608 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6609 
6610 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6611 		data |= 0x100;
6612 		if (orig != data)
6613 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6614 	} else {
6615 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6616 		data &= ~0x100;
6617 		if (orig != data)
6618 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6619 
6620 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6621 		data &= ~0x100;
6622 		if (orig != data)
6623 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6624 	}
6625 }
6626 
6627 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6628 				bool enable)
6629 {
6630 	u32 orig, data;
6631 
6632 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6633 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6634 		data = 0xfff;
6635 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6636 
6637 		orig = data = RREG32(UVD_CGC_CTRL);
6638 		data |= DCM;
6639 		if (orig != data)
6640 			WREG32(UVD_CGC_CTRL, data);
6641 	} else {
6642 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6643 		data &= ~0xfff;
6644 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6645 
6646 		orig = data = RREG32(UVD_CGC_CTRL);
6647 		data &= ~DCM;
6648 		if (orig != data)
6649 			WREG32(UVD_CGC_CTRL, data);
6650 	}
6651 }
6652 
6653 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6654 			       bool enable)
6655 {
6656 	u32 orig, data;
6657 
6658 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6659 
6660 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6661 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6662 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6663 	else
6664 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6665 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6666 
6667 	if (orig != data)
6668 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6669 }
6670 
6671 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6672 				bool enable)
6673 {
6674 	u32 orig, data;
6675 
6676 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6677 
6678 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6679 		data &= ~CLOCK_GATING_DIS;
6680 	else
6681 		data |= CLOCK_GATING_DIS;
6682 
6683 	if (orig != data)
6684 		WREG32(HDP_HOST_PATH_CNTL, data);
6685 }
6686 
6687 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6688 			      bool enable)
6689 {
6690 	u32 orig, data;
6691 
6692 	orig = data = RREG32(HDP_MEM_POWER_LS);
6693 
6694 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6695 		data |= HDP_LS_ENABLE;
6696 	else
6697 		data &= ~HDP_LS_ENABLE;
6698 
6699 	if (orig != data)
6700 		WREG32(HDP_MEM_POWER_LS, data);
6701 }
6702 
6703 void cik_update_cg(struct radeon_device *rdev,
6704 		   u32 block, bool enable)
6705 {
6706 
6707 	if (block & RADEON_CG_BLOCK_GFX) {
6708 		cik_enable_gui_idle_interrupt(rdev, false);
6709 		/* order matters! */
6710 		if (enable) {
6711 			cik_enable_mgcg(rdev, true);
6712 			cik_enable_cgcg(rdev, true);
6713 		} else {
6714 			cik_enable_cgcg(rdev, false);
6715 			cik_enable_mgcg(rdev, false);
6716 		}
6717 		cik_enable_gui_idle_interrupt(rdev, true);
6718 	}
6719 
6720 	if (block & RADEON_CG_BLOCK_MC) {
6721 		if (!(rdev->flags & RADEON_IS_IGP)) {
6722 			cik_enable_mc_mgcg(rdev, enable);
6723 			cik_enable_mc_ls(rdev, enable);
6724 		}
6725 	}
6726 
6727 	if (block & RADEON_CG_BLOCK_SDMA) {
6728 		cik_enable_sdma_mgcg(rdev, enable);
6729 		cik_enable_sdma_mgls(rdev, enable);
6730 	}
6731 
6732 	if (block & RADEON_CG_BLOCK_BIF) {
6733 		cik_enable_bif_mgls(rdev, enable);
6734 	}
6735 
6736 	if (block & RADEON_CG_BLOCK_UVD) {
6737 		if (rdev->has_uvd)
6738 			cik_enable_uvd_mgcg(rdev, enable);
6739 	}
6740 
6741 	if (block & RADEON_CG_BLOCK_HDP) {
6742 		cik_enable_hdp_mgcg(rdev, enable);
6743 		cik_enable_hdp_ls(rdev, enable);
6744 	}
6745 
6746 	if (block & RADEON_CG_BLOCK_VCE) {
6747 		vce_v2_0_enable_mgcg(rdev, enable);
6748 	}
6749 }
6750 
6751 static void cik_init_cg(struct radeon_device *rdev)
6752 {
6753 
6754 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6755 
6756 	if (rdev->has_uvd)
6757 		si_init_uvd_internal_cg(rdev);
6758 
6759 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6760 			     RADEON_CG_BLOCK_SDMA |
6761 			     RADEON_CG_BLOCK_BIF |
6762 			     RADEON_CG_BLOCK_UVD |
6763 			     RADEON_CG_BLOCK_HDP), true);
6764 }
6765 
6766 static void cik_fini_cg(struct radeon_device *rdev)
6767 {
6768 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6769 			     RADEON_CG_BLOCK_SDMA |
6770 			     RADEON_CG_BLOCK_BIF |
6771 			     RADEON_CG_BLOCK_UVD |
6772 			     RADEON_CG_BLOCK_HDP), false);
6773 
6774 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6775 }
6776 
6777 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6778 					  bool enable)
6779 {
6780 	u32 data, orig;
6781 
6782 	orig = data = RREG32(RLC_PG_CNTL);
6783 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6784 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6785 	else
6786 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6787 	if (orig != data)
6788 		WREG32(RLC_PG_CNTL, data);
6789 }
6790 
6791 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6792 					  bool enable)
6793 {
6794 	u32 data, orig;
6795 
6796 	orig = data = RREG32(RLC_PG_CNTL);
6797 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6798 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6799 	else
6800 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6801 	if (orig != data)
6802 		WREG32(RLC_PG_CNTL, data);
6803 }
6804 
6805 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6806 {
6807 	u32 data, orig;
6808 
6809 	orig = data = RREG32(RLC_PG_CNTL);
6810 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6811 		data &= ~DISABLE_CP_PG;
6812 	else
6813 		data |= DISABLE_CP_PG;
6814 	if (orig != data)
6815 		WREG32(RLC_PG_CNTL, data);
6816 }
6817 
6818 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6819 {
6820 	u32 data, orig;
6821 
6822 	orig = data = RREG32(RLC_PG_CNTL);
6823 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6824 		data &= ~DISABLE_GDS_PG;
6825 	else
6826 		data |= DISABLE_GDS_PG;
6827 	if (orig != data)
6828 		WREG32(RLC_PG_CNTL, data);
6829 }
6830 
6831 #define CP_ME_TABLE_SIZE    96
6832 #define CP_ME_TABLE_OFFSET  2048
6833 #define CP_MEC_TABLE_OFFSET 4096
6834 
6835 void cik_init_cp_pg_table(struct radeon_device *rdev)
6836 {
6837 	volatile u32 *dst_ptr;
6838 	int me, i, max_me = 4;
6839 	u32 bo_offset = 0;
6840 	u32 table_offset, table_size;
6841 
6842 	if (rdev->family == CHIP_KAVERI)
6843 		max_me = 5;
6844 
6845 	if (rdev->rlc.cp_table_ptr == NULL)
6846 		return;
6847 
6848 	/* write the cp table buffer */
6849 	dst_ptr = rdev->rlc.cp_table_ptr;
6850 	for (me = 0; me < max_me; me++) {
6851 		if (rdev->new_fw) {
6852 			const __le32 *fw_data;
6853 			const struct gfx_firmware_header_v1_0 *hdr;
6854 
6855 			if (me == 0) {
6856 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6857 				fw_data = (const __le32 *)
6858 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6859 				table_offset = le32_to_cpu(hdr->jt_offset);
6860 				table_size = le32_to_cpu(hdr->jt_size);
6861 			} else if (me == 1) {
6862 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6863 				fw_data = (const __le32 *)
6864 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6865 				table_offset = le32_to_cpu(hdr->jt_offset);
6866 				table_size = le32_to_cpu(hdr->jt_size);
6867 			} else if (me == 2) {
6868 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6869 				fw_data = (const __le32 *)
6870 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6871 				table_offset = le32_to_cpu(hdr->jt_offset);
6872 				table_size = le32_to_cpu(hdr->jt_size);
6873 			} else if (me == 3) {
6874 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6875 				fw_data = (const __le32 *)
6876 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6877 				table_offset = le32_to_cpu(hdr->jt_offset);
6878 				table_size = le32_to_cpu(hdr->jt_size);
6879 			} else {
6880 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6881 				fw_data = (const __le32 *)
6882 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6883 				table_offset = le32_to_cpu(hdr->jt_offset);
6884 				table_size = le32_to_cpu(hdr->jt_size);
6885 			}
6886 
6887 			for (i = 0; i < table_size; i ++) {
6888 				dst_ptr[bo_offset + i] =
6889 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6890 			}
6891 			bo_offset += table_size;
6892 		} else {
6893 			const __be32 *fw_data;
6894 			table_size = CP_ME_TABLE_SIZE;
6895 
6896 			if (me == 0) {
6897 				fw_data = (const __be32 *)rdev->ce_fw->data;
6898 				table_offset = CP_ME_TABLE_OFFSET;
6899 			} else if (me == 1) {
6900 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6901 				table_offset = CP_ME_TABLE_OFFSET;
6902 			} else if (me == 2) {
6903 				fw_data = (const __be32 *)rdev->me_fw->data;
6904 				table_offset = CP_ME_TABLE_OFFSET;
6905 			} else {
6906 				fw_data = (const __be32 *)rdev->mec_fw->data;
6907 				table_offset = CP_MEC_TABLE_OFFSET;
6908 			}
6909 
6910 			for (i = 0; i < table_size; i ++) {
6911 				dst_ptr[bo_offset + i] =
6912 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6913 			}
6914 			bo_offset += table_size;
6915 		}
6916 	}
6917 }
6918 
6919 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6920 				bool enable)
6921 {
6922 	u32 data, orig;
6923 
6924 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6925 		orig = data = RREG32(RLC_PG_CNTL);
6926 		data |= GFX_PG_ENABLE;
6927 		if (orig != data)
6928 			WREG32(RLC_PG_CNTL, data);
6929 
6930 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6931 		data |= AUTO_PG_EN;
6932 		if (orig != data)
6933 			WREG32(RLC_AUTO_PG_CTRL, data);
6934 	} else {
6935 		orig = data = RREG32(RLC_PG_CNTL);
6936 		data &= ~GFX_PG_ENABLE;
6937 		if (orig != data)
6938 			WREG32(RLC_PG_CNTL, data);
6939 
6940 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6941 		data &= ~AUTO_PG_EN;
6942 		if (orig != data)
6943 			WREG32(RLC_AUTO_PG_CTRL, data);
6944 
6945 		data = RREG32(DB_RENDER_CONTROL);
6946 	}
6947 }
6948 
6949 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6950 {
6951 	u32 mask = 0, tmp, tmp1;
6952 	int i;
6953 
6954 	mutex_lock(&rdev->grbm_idx_mutex);
6955 	cik_select_se_sh(rdev, se, sh);
6956 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6957 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6958 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6959 	mutex_unlock(&rdev->grbm_idx_mutex);
6960 
6961 	tmp &= 0xffff0000;
6962 
6963 	tmp |= tmp1;
6964 	tmp >>= 16;
6965 
6966 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6967 		mask <<= 1;
6968 		mask |= 1;
6969 	}
6970 
6971 	return (~tmp) & mask;
6972 }
6973 
6974 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6975 {
6976 	u32 i, j, k, active_cu_number = 0;
6977 	u32 mask, counter, cu_bitmap;
6978 	u32 tmp = 0;
6979 
6980 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6981 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6982 			mask = 1;
6983 			cu_bitmap = 0;
6984 			counter = 0;
6985 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6986 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6987 					if (counter < 2)
6988 						cu_bitmap |= mask;
6989 					counter ++;
6990 				}
6991 				mask <<= 1;
6992 			}
6993 
6994 			active_cu_number += counter;
6995 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6996 		}
6997 	}
6998 
6999 	WREG32(RLC_PG_AO_CU_MASK, tmp);
7000 
7001 	tmp = RREG32(RLC_MAX_PG_CU);
7002 	tmp &= ~MAX_PU_CU_MASK;
7003 	tmp |= MAX_PU_CU(active_cu_number);
7004 	WREG32(RLC_MAX_PG_CU, tmp);
7005 }
7006 
7007 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7008 				       bool enable)
7009 {
7010 	u32 data, orig;
7011 
7012 	orig = data = RREG32(RLC_PG_CNTL);
7013 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7014 		data |= STATIC_PER_CU_PG_ENABLE;
7015 	else
7016 		data &= ~STATIC_PER_CU_PG_ENABLE;
7017 	if (orig != data)
7018 		WREG32(RLC_PG_CNTL, data);
7019 }
7020 
7021 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7022 					bool enable)
7023 {
7024 	u32 data, orig;
7025 
7026 	orig = data = RREG32(RLC_PG_CNTL);
7027 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7028 		data |= DYN_PER_CU_PG_ENABLE;
7029 	else
7030 		data &= ~DYN_PER_CU_PG_ENABLE;
7031 	if (orig != data)
7032 		WREG32(RLC_PG_CNTL, data);
7033 }
7034 
7035 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7036 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7037 
7038 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7039 {
7040 	u32 data, orig;
7041 	u32 i;
7042 
7043 	if (rdev->rlc.cs_data) {
7044 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7045 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7046 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7047 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7048 	} else {
7049 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7050 		for (i = 0; i < 3; i++)
7051 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
7052 	}
7053 	if (rdev->rlc.reg_list) {
7054 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7055 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
7056 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7057 	}
7058 
7059 	orig = data = RREG32(RLC_PG_CNTL);
7060 	data |= GFX_PG_SRC;
7061 	if (orig != data)
7062 		WREG32(RLC_PG_CNTL, data);
7063 
7064 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7065 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7066 
7067 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
7068 	data &= ~IDLE_POLL_COUNT_MASK;
7069 	data |= IDLE_POLL_COUNT(0x60);
7070 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7071 
7072 	data = 0x10101010;
7073 	WREG32(RLC_PG_DELAY, data);
7074 
7075 	data = RREG32(RLC_PG_DELAY_2);
7076 	data &= ~0xff;
7077 	data |= 0x3;
7078 	WREG32(RLC_PG_DELAY_2, data);
7079 
7080 	data = RREG32(RLC_AUTO_PG_CTRL);
7081 	data &= ~GRBM_REG_SGIT_MASK;
7082 	data |= GRBM_REG_SGIT(0x700);
7083 	WREG32(RLC_AUTO_PG_CTRL, data);
7084 
7085 }
7086 
7087 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7088 {
7089 	cik_enable_gfx_cgpg(rdev, enable);
7090 	cik_enable_gfx_static_mgpg(rdev, enable);
7091 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7092 }
7093 
7094 u32 cik_get_csb_size(struct radeon_device *rdev)
7095 {
7096 	u32 count = 0;
7097 	const struct cs_section_def *sect = NULL;
7098 	const struct cs_extent_def *ext = NULL;
7099 
7100 	if (rdev->rlc.cs_data == NULL)
7101 		return 0;
7102 
7103 	/* begin clear state */
7104 	count += 2;
7105 	/* context control state */
7106 	count += 3;
7107 
7108 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7109 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7110 			if (sect->id == SECT_CONTEXT)
7111 				count += 2 + ext->reg_count;
7112 			else
7113 				return 0;
7114 		}
7115 	}
7116 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7117 	count += 4;
7118 	/* end clear state */
7119 	count += 2;
7120 	/* clear state */
7121 	count += 2;
7122 
7123 	return count;
7124 }
7125 
7126 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7127 {
7128 	u32 count = 0, i;
7129 	const struct cs_section_def *sect = NULL;
7130 	const struct cs_extent_def *ext = NULL;
7131 
7132 	if (rdev->rlc.cs_data == NULL)
7133 		return;
7134 	if (buffer == NULL)
7135 		return;
7136 
7137 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7138 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7139 
7140 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7141 	buffer[count++] = cpu_to_le32(0x80000000);
7142 	buffer[count++] = cpu_to_le32(0x80000000);
7143 
7144 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7145 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7146 			if (sect->id == SECT_CONTEXT) {
7147 				buffer[count++] =
7148 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7149 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7150 				for (i = 0; i < ext->reg_count; i++)
7151 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7152 			} else {
7153 				return;
7154 			}
7155 		}
7156 	}
7157 
7158 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7159 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7160 	switch (rdev->family) {
7161 	case CHIP_BONAIRE:
7162 		buffer[count++] = cpu_to_le32(0x16000012);
7163 		buffer[count++] = cpu_to_le32(0x00000000);
7164 		break;
7165 	case CHIP_KAVERI:
7166 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7167 		buffer[count++] = cpu_to_le32(0x00000000);
7168 		break;
7169 	case CHIP_KABINI:
7170 	case CHIP_MULLINS:
7171 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7172 		buffer[count++] = cpu_to_le32(0x00000000);
7173 		break;
7174 	case CHIP_HAWAII:
7175 		buffer[count++] = cpu_to_le32(0x3a00161a);
7176 		buffer[count++] = cpu_to_le32(0x0000002e);
7177 		break;
7178 	default:
7179 		buffer[count++] = cpu_to_le32(0x00000000);
7180 		buffer[count++] = cpu_to_le32(0x00000000);
7181 		break;
7182 	}
7183 
7184 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7185 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7186 
7187 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7188 	buffer[count++] = cpu_to_le32(0);
7189 }
7190 
7191 static void cik_init_pg(struct radeon_device *rdev)
7192 {
7193 	if (rdev->pg_flags) {
7194 		cik_enable_sck_slowdown_on_pu(rdev, true);
7195 		cik_enable_sck_slowdown_on_pd(rdev, true);
7196 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7197 			cik_init_gfx_cgpg(rdev);
7198 			cik_enable_cp_pg(rdev, true);
7199 			cik_enable_gds_pg(rdev, true);
7200 		}
7201 		cik_init_ao_cu_mask(rdev);
7202 		cik_update_gfx_pg(rdev, true);
7203 	}
7204 }
7205 
7206 static void cik_fini_pg(struct radeon_device *rdev)
7207 {
7208 	if (rdev->pg_flags) {
7209 		cik_update_gfx_pg(rdev, false);
7210 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7211 			cik_enable_cp_pg(rdev, false);
7212 			cik_enable_gds_pg(rdev, false);
7213 		}
7214 	}
7215 }
7216 
7217 /*
7218  * Interrupts
7219  * Starting with r6xx, interrupts are handled via a ring buffer.
7220  * Ring buffers are areas of GPU accessible memory that the GPU
7221  * writes interrupt vectors into and the host reads vectors out of.
7222  * There is a rptr (read pointer) that determines where the
7223  * host is currently reading, and a wptr (write pointer)
7224  * which determines where the GPU has written.  When the
7225  * pointers are equal, the ring is idle.  When the GPU
7226  * writes vectors to the ring buffer, it increments the
7227  * wptr.  When there is an interrupt, the host then starts
7228  * fetching commands and processing them until the pointers are
7229  * equal again at which point it updates the rptr.
7230  */
7231 
7232 /**
7233  * cik_enable_interrupts - Enable the interrupt ring buffer
7234  *
7235  * @rdev: radeon_device pointer
7236  *
7237  * Enable the interrupt ring buffer (CIK).
7238  */
7239 static void cik_enable_interrupts(struct radeon_device *rdev)
7240 {
7241 	u32 ih_cntl = RREG32(IH_CNTL);
7242 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7243 
7244 	ih_cntl |= ENABLE_INTR;
7245 	ih_rb_cntl |= IH_RB_ENABLE;
7246 	WREG32(IH_CNTL, ih_cntl);
7247 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7248 	rdev->ih.enabled = true;
7249 }
7250 
7251 /**
7252  * cik_disable_interrupts - Disable the interrupt ring buffer
7253  *
7254  * @rdev: radeon_device pointer
7255  *
7256  * Disable the interrupt ring buffer (CIK).
7257  */
7258 static void cik_disable_interrupts(struct radeon_device *rdev)
7259 {
7260 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7261 	u32 ih_cntl = RREG32(IH_CNTL);
7262 
7263 	ih_rb_cntl &= ~IH_RB_ENABLE;
7264 	ih_cntl &= ~ENABLE_INTR;
7265 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7266 	WREG32(IH_CNTL, ih_cntl);
7267 	/* set rptr, wptr to 0 */
7268 	WREG32(IH_RB_RPTR, 0);
7269 	WREG32(IH_RB_WPTR, 0);
7270 	rdev->ih.enabled = false;
7271 	rdev->ih.rptr = 0;
7272 }
7273 
7274 /**
7275  * cik_disable_interrupt_state - Disable all interrupt sources
7276  *
7277  * @rdev: radeon_device pointer
7278  *
7279  * Clear all interrupt enable bits used by the driver (CIK).
7280  */
7281 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7282 {
7283 	u32 tmp;
7284 
7285 	/* gfx ring */
7286 	tmp = RREG32(CP_INT_CNTL_RING0) &
7287 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7288 	WREG32(CP_INT_CNTL_RING0, tmp);
7289 	/* sdma */
7290 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7291 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7292 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7293 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7294 	/* compute queues */
7295 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7296 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7297 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7298 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7299 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7300 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7301 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7302 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7303 	/* grbm */
7304 	WREG32(GRBM_INT_CNTL, 0);
7305 	/* SRBM */
7306 	WREG32(SRBM_INT_CNTL, 0);
7307 	/* vline/vblank, etc. */
7308 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7309 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7310 	if (rdev->num_crtc >= 4) {
7311 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7312 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7313 	}
7314 	if (rdev->num_crtc >= 6) {
7315 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7316 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7317 	}
7318 	/* pflip */
7319 	if (rdev->num_crtc >= 2) {
7320 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7321 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7322 	}
7323 	if (rdev->num_crtc >= 4) {
7324 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7325 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7326 	}
7327 	if (rdev->num_crtc >= 6) {
7328 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7329 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7330 	}
7331 
7332 	/* dac hotplug */
7333 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7334 
7335 	/* digital hotplug */
7336 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7337 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7338 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7339 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7340 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7341 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7342 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7343 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7344 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7345 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7346 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7347 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7348 
7349 }
7350 
7351 /**
7352  * cik_irq_init - init and enable the interrupt ring
7353  *
7354  * @rdev: radeon_device pointer
7355  *
7356  * Allocate a ring buffer for the interrupt controller,
7357  * enable the RLC, disable interrupts, enable the IH
7358  * ring buffer and enable it (CIK).
7359  * Called at device load and reume.
7360  * Returns 0 for success, errors for failure.
7361  */
7362 static int cik_irq_init(struct radeon_device *rdev)
7363 {
7364 	int ret = 0;
7365 	int rb_bufsz;
7366 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7367 
7368 	/* allocate ring */
7369 	ret = r600_ih_ring_alloc(rdev);
7370 	if (ret)
7371 		return ret;
7372 
7373 	/* disable irqs */
7374 	cik_disable_interrupts(rdev);
7375 
7376 	/* init rlc */
7377 	ret = cik_rlc_resume(rdev);
7378 	if (ret) {
7379 		r600_ih_ring_fini(rdev);
7380 		return ret;
7381 	}
7382 
7383 	/* setup interrupt control */
7384 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7385 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7386 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7387 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7388 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7389 	 */
7390 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7391 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7392 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7393 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7394 
7395 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7396 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7397 
7398 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7399 		      IH_WPTR_OVERFLOW_CLEAR |
7400 		      (rb_bufsz << 1));
7401 
7402 	if (rdev->wb.enabled)
7403 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7404 
7405 	/* set the writeback address whether it's enabled or not */
7406 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7407 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7408 
7409 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7410 
7411 	/* set rptr, wptr to 0 */
7412 	WREG32(IH_RB_RPTR, 0);
7413 	WREG32(IH_RB_WPTR, 0);
7414 
7415 	/* Default settings for IH_CNTL (disabled at first) */
7416 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7417 	/* RPTR_REARM only works if msi's are enabled */
7418 	if (rdev->msi_enabled)
7419 		ih_cntl |= RPTR_REARM;
7420 	WREG32(IH_CNTL, ih_cntl);
7421 
7422 	/* force the active interrupt state to all disabled */
7423 	cik_disable_interrupt_state(rdev);
7424 
7425 	pci_set_master(rdev->pdev);
7426 
7427 	/* enable irqs */
7428 	cik_enable_interrupts(rdev);
7429 
7430 	return ret;
7431 }
7432 
7433 /**
7434  * cik_irq_set - enable/disable interrupt sources
7435  *
7436  * @rdev: radeon_device pointer
7437  *
7438  * Enable interrupt sources on the GPU (vblanks, hpd,
7439  * etc.) (CIK).
7440  * Returns 0 for success, errors for failure.
7441  */
7442 int cik_irq_set(struct radeon_device *rdev)
7443 {
7444 	u32 cp_int_cntl;
7445 	u32 cp_m1p0;
7446 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7447 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7448 	u32 grbm_int_cntl = 0;
7449 	u32 dma_cntl, dma_cntl1;
7450 
7451 	if (!rdev->irq.installed) {
7452 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7453 		return -EINVAL;
7454 	}
7455 	/* don't enable anything if the ih is disabled */
7456 	if (!rdev->ih.enabled) {
7457 		cik_disable_interrupts(rdev);
7458 		/* force the active interrupt state to all disabled */
7459 		cik_disable_interrupt_state(rdev);
7460 		return 0;
7461 	}
7462 
7463 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7464 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7465 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7466 
7467 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7468 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7469 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7470 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7471 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7472 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7473 
7474 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7475 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7476 
7477 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7478 
7479 	/* enable CP interrupts on all rings */
7480 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7481 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7482 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7483 	}
7484 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7485 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7486 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7487 		if (ring->me == 1) {
7488 			switch (ring->pipe) {
7489 			case 0:
7490 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7491 				break;
7492 			default:
7493 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7494 				break;
7495 			}
7496 		} else {
7497 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7498 		}
7499 	}
7500 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7501 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7502 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7503 		if (ring->me == 1) {
7504 			switch (ring->pipe) {
7505 			case 0:
7506 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7507 				break;
7508 			default:
7509 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7510 				break;
7511 			}
7512 		} else {
7513 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7514 		}
7515 	}
7516 
7517 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7518 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7519 		dma_cntl |= TRAP_ENABLE;
7520 	}
7521 
7522 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7523 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7524 		dma_cntl1 |= TRAP_ENABLE;
7525 	}
7526 
7527 	if (rdev->irq.crtc_vblank_int[0] ||
7528 	    atomic_read(&rdev->irq.pflip[0])) {
7529 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7530 		crtc1 |= VBLANK_INTERRUPT_MASK;
7531 	}
7532 	if (rdev->irq.crtc_vblank_int[1] ||
7533 	    atomic_read(&rdev->irq.pflip[1])) {
7534 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7535 		crtc2 |= VBLANK_INTERRUPT_MASK;
7536 	}
7537 	if (rdev->irq.crtc_vblank_int[2] ||
7538 	    atomic_read(&rdev->irq.pflip[2])) {
7539 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7540 		crtc3 |= VBLANK_INTERRUPT_MASK;
7541 	}
7542 	if (rdev->irq.crtc_vblank_int[3] ||
7543 	    atomic_read(&rdev->irq.pflip[3])) {
7544 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7545 		crtc4 |= VBLANK_INTERRUPT_MASK;
7546 	}
7547 	if (rdev->irq.crtc_vblank_int[4] ||
7548 	    atomic_read(&rdev->irq.pflip[4])) {
7549 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7550 		crtc5 |= VBLANK_INTERRUPT_MASK;
7551 	}
7552 	if (rdev->irq.crtc_vblank_int[5] ||
7553 	    atomic_read(&rdev->irq.pflip[5])) {
7554 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7555 		crtc6 |= VBLANK_INTERRUPT_MASK;
7556 	}
7557 	if (rdev->irq.hpd[0]) {
7558 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7559 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7560 	}
7561 	if (rdev->irq.hpd[1]) {
7562 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7563 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7564 	}
7565 	if (rdev->irq.hpd[2]) {
7566 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7567 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7568 	}
7569 	if (rdev->irq.hpd[3]) {
7570 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7571 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7572 	}
7573 	if (rdev->irq.hpd[4]) {
7574 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7575 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7576 	}
7577 	if (rdev->irq.hpd[5]) {
7578 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7579 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7580 	}
7581 
7582 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7583 
7584 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7585 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7586 
7587 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7588 
7589 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7590 
7591 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7592 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7593 	if (rdev->num_crtc >= 4) {
7594 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7595 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7596 	}
7597 	if (rdev->num_crtc >= 6) {
7598 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7599 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7600 	}
7601 
7602 	if (rdev->num_crtc >= 2) {
7603 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7604 		       GRPH_PFLIP_INT_MASK);
7605 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7606 		       GRPH_PFLIP_INT_MASK);
7607 	}
7608 	if (rdev->num_crtc >= 4) {
7609 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7610 		       GRPH_PFLIP_INT_MASK);
7611 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7612 		       GRPH_PFLIP_INT_MASK);
7613 	}
7614 	if (rdev->num_crtc >= 6) {
7615 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7616 		       GRPH_PFLIP_INT_MASK);
7617 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7618 		       GRPH_PFLIP_INT_MASK);
7619 	}
7620 
7621 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7622 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7623 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7624 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7625 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7626 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7627 
7628 	/* posting read */
7629 	RREG32(SRBM_STATUS);
7630 
7631 	return 0;
7632 }
7633 
7634 /**
7635  * cik_irq_ack - ack interrupt sources
7636  *
7637  * @rdev: radeon_device pointer
7638  *
7639  * Ack interrupt sources on the GPU (vblanks, hpd,
7640  * etc.) (CIK).  Certain interrupts sources are sw
7641  * generated and do not require an explicit ack.
7642  */
7643 static inline void cik_irq_ack(struct radeon_device *rdev)
7644 {
7645 	u32 tmp;
7646 
7647 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7648 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7649 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7650 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7651 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7652 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7653 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7654 
7655 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7656 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7657 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7658 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7659 	if (rdev->num_crtc >= 4) {
7660 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7661 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7662 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7663 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7664 	}
7665 	if (rdev->num_crtc >= 6) {
7666 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7667 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7668 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7669 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7670 	}
7671 
7672 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7673 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7674 		       GRPH_PFLIP_INT_CLEAR);
7675 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7676 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7677 		       GRPH_PFLIP_INT_CLEAR);
7678 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7679 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7680 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7681 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7682 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7683 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7684 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7685 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7686 
7687 	if (rdev->num_crtc >= 4) {
7688 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7689 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7690 			       GRPH_PFLIP_INT_CLEAR);
7691 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7692 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7693 			       GRPH_PFLIP_INT_CLEAR);
7694 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7695 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7696 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7697 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7698 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7699 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7700 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7701 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7702 	}
7703 
7704 	if (rdev->num_crtc >= 6) {
7705 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7706 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7707 			       GRPH_PFLIP_INT_CLEAR);
7708 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7709 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7710 			       GRPH_PFLIP_INT_CLEAR);
7711 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7712 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7713 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7714 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7715 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7716 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7717 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7718 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7719 	}
7720 
7721 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7722 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7723 		tmp |= DC_HPDx_INT_ACK;
7724 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7725 	}
7726 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7727 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7728 		tmp |= DC_HPDx_INT_ACK;
7729 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7730 	}
7731 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7732 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7733 		tmp |= DC_HPDx_INT_ACK;
7734 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7735 	}
7736 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7737 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7738 		tmp |= DC_HPDx_INT_ACK;
7739 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7740 	}
7741 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7742 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7743 		tmp |= DC_HPDx_INT_ACK;
7744 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7745 	}
7746 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7747 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7748 		tmp |= DC_HPDx_INT_ACK;
7749 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7750 	}
7751 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7752 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7753 		tmp |= DC_HPDx_RX_INT_ACK;
7754 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7755 	}
7756 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7757 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7758 		tmp |= DC_HPDx_RX_INT_ACK;
7759 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7760 	}
7761 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7762 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7763 		tmp |= DC_HPDx_RX_INT_ACK;
7764 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7765 	}
7766 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7767 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7768 		tmp |= DC_HPDx_RX_INT_ACK;
7769 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7770 	}
7771 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7772 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7773 		tmp |= DC_HPDx_RX_INT_ACK;
7774 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7775 	}
7776 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7777 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7778 		tmp |= DC_HPDx_RX_INT_ACK;
7779 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7780 	}
7781 }
7782 
7783 /**
7784  * cik_irq_disable - disable interrupts
7785  *
7786  * @rdev: radeon_device pointer
7787  *
7788  * Disable interrupts on the hw (CIK).
7789  */
7790 static void cik_irq_disable(struct radeon_device *rdev)
7791 {
7792 	cik_disable_interrupts(rdev);
7793 	/* Wait and acknowledge irq */
7794 	mdelay(1);
7795 	cik_irq_ack(rdev);
7796 	cik_disable_interrupt_state(rdev);
7797 }
7798 
7799 /**
7800  * cik_irq_disable - disable interrupts for suspend
7801  *
7802  * @rdev: radeon_device pointer
7803  *
7804  * Disable interrupts and stop the RLC (CIK).
7805  * Used for suspend.
7806  */
7807 static void cik_irq_suspend(struct radeon_device *rdev)
7808 {
7809 	cik_irq_disable(rdev);
7810 	cik_rlc_stop(rdev);
7811 }
7812 
7813 /**
7814  * cik_irq_fini - tear down interrupt support
7815  *
7816  * @rdev: radeon_device pointer
7817  *
7818  * Disable interrupts on the hw and free the IH ring
7819  * buffer (CIK).
7820  * Used for driver unload.
7821  */
7822 static void cik_irq_fini(struct radeon_device *rdev)
7823 {
7824 	cik_irq_suspend(rdev);
7825 	r600_ih_ring_fini(rdev);
7826 }
7827 
7828 /**
7829  * cik_get_ih_wptr - get the IH ring buffer wptr
7830  *
7831  * @rdev: radeon_device pointer
7832  *
7833  * Get the IH ring buffer wptr from either the register
7834  * or the writeback memory buffer (CIK).  Also check for
7835  * ring buffer overflow and deal with it.
7836  * Used by cik_irq_process().
7837  * Returns the value of the wptr.
7838  */
7839 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7840 {
7841 	u32 wptr, tmp;
7842 
7843 	if (rdev->wb.enabled)
7844 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7845 	else
7846 		wptr = RREG32(IH_RB_WPTR);
7847 
7848 	if (wptr & RB_OVERFLOW) {
7849 		wptr &= ~RB_OVERFLOW;
7850 		/* When a ring buffer overflow happen start parsing interrupt
7851 		 * from the last not overwritten vector (wptr + 16). Hopefully
7852 		 * this should allow us to catchup.
7853 		 */
7854 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7855 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7856 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7857 		tmp = RREG32(IH_RB_CNTL);
7858 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7859 		WREG32(IH_RB_CNTL, tmp);
7860 	}
7861 	return (wptr & rdev->ih.ptr_mask);
7862 }
7863 
7864 /*        CIK IV Ring
7865  * Each IV ring entry is 128 bits:
7866  * [7:0]    - interrupt source id
7867  * [31:8]   - reserved
7868  * [59:32]  - interrupt source data
7869  * [63:60]  - reserved
7870  * [71:64]  - RINGID
7871  *            CP:
7872  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7873  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7874  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7875  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7876  *            PIPE_ID - ME0 0=3D
7877  *                    - ME1&2 compute dispatcher (4 pipes each)
7878  *            SDMA:
7879  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7880  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7881  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7882  * [79:72]  - VMID
7883  * [95:80]  - PASID
7884  * [127:96] - reserved
7885  */
7886 /**
7887  * cik_irq_process - interrupt handler
7888  *
7889  * @rdev: radeon_device pointer
7890  *
7891  * Interrupt hander (CIK).  Walk the IH ring,
7892  * ack interrupts and schedule work to handle
7893  * interrupt events.
7894  * Returns irq process return code.
7895  */
7896 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7897 {
7898 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7899 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7900 	u32 wptr;
7901 	u32 rptr;
7902 	u32 src_id, src_data, ring_id;
7903 	u8 me_id, pipe_id, queue_id;
7904 	u32 ring_index;
7905 	bool queue_hotplug = false;
7906 	bool queue_dp = false;
7907 	bool queue_reset = false;
7908 	u32 addr, status, mc_client;
7909 	bool queue_thermal = false;
7910 
7911 	if (!rdev->ih.enabled || rdev->shutdown)
7912 		return IRQ_NONE;
7913 
7914 	wptr = cik_get_ih_wptr(rdev);
7915 
7916 restart_ih:
7917 	/* is somebody else already processing irqs? */
7918 	if (atomic_xchg(&rdev->ih.lock, 1))
7919 		return IRQ_NONE;
7920 
7921 	rptr = rdev->ih.rptr;
7922 	DRM_DEBUG_VBLANK("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7923 
7924 	/* Order reading of wptr vs. reading of IH ring data */
7925 	rmb();
7926 
7927 	/* display interrupts */
7928 	cik_irq_ack(rdev);
7929 
7930 	while (rptr != wptr) {
7931 		/* wptr/rptr are in bytes! */
7932 		ring_index = rptr / 4;
7933 
7934 #pragma GCC diagnostic push
7935 #pragma GCC diagnostic ignored "-Wcast-qual"
7936 		radeon_kfd_interrupt(rdev,
7937 				(const void *) &rdev->ih.ring[ring_index]);
7938 #pragma GCC diagnostic pop
7939 
7940 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7941 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7942 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7943 
7944 		switch (src_id) {
7945 		case 1: /* D1 vblank/vline */
7946 			switch (src_data) {
7947 			case 0: /* D1 vblank */
7948 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7949 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7950 
7951 				if (rdev->irq.crtc_vblank_int[0]) {
7952 					drm_handle_vblank(rdev->ddev, 0);
7953 					rdev->pm.vblank_sync = true;
7954 					wake_up(&rdev->irq.vblank_queue);
7955 				}
7956 				if (atomic_read(&rdev->irq.pflip[0]))
7957 					radeon_crtc_handle_vblank(rdev, 0);
7958 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7959 				DRM_DEBUG_VBLANK("IH: D1 vblank\n");
7960 
7961 				break;
7962 			case 1: /* D1 vline */
7963 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7964 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7965 
7966 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7967 				DRM_DEBUG_VBLANK("IH: D1 vline\n");
7968 
7969 				break;
7970 			default:
7971 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7972 				break;
7973 			}
7974 			break;
7975 		case 2: /* D2 vblank/vline */
7976 			switch (src_data) {
7977 			case 0: /* D2 vblank */
7978 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7979 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7980 
7981 				if (rdev->irq.crtc_vblank_int[1]) {
7982 					drm_handle_vblank(rdev->ddev, 1);
7983 					rdev->pm.vblank_sync = true;
7984 					wake_up(&rdev->irq.vblank_queue);
7985 				}
7986 				if (atomic_read(&rdev->irq.pflip[1]))
7987 					radeon_crtc_handle_vblank(rdev, 1);
7988 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7989 				DRM_DEBUG_VBLANK("IH: D2 vblank\n");
7990 
7991 				break;
7992 			case 1: /* D2 vline */
7993 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7994 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7995 
7996 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7997 				DRM_DEBUG_VBLANK("IH: D2 vline\n");
7998 
7999 				break;
8000 			default:
8001 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8002 				break;
8003 			}
8004 			break;
8005 		case 3: /* D3 vblank/vline */
8006 			switch (src_data) {
8007 			case 0: /* D3 vblank */
8008 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
8009 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8010 
8011 				if (rdev->irq.crtc_vblank_int[2]) {
8012 					drm_handle_vblank(rdev->ddev, 2);
8013 					rdev->pm.vblank_sync = true;
8014 					wake_up(&rdev->irq.vblank_queue);
8015 				}
8016 				if (atomic_read(&rdev->irq.pflip[2]))
8017 					radeon_crtc_handle_vblank(rdev, 2);
8018 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8019 				DRM_DEBUG_VBLANK("IH: D3 vblank\n");
8020 
8021 				break;
8022 			case 1: /* D3 vline */
8023 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
8024 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8025 
8026 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8027 				DRM_DEBUG_VBLANK("IH: D3 vline\n");
8028 
8029 				break;
8030 			default:
8031 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8032 				break;
8033 			}
8034 			break;
8035 		case 4: /* D4 vblank/vline */
8036 			switch (src_data) {
8037 			case 0: /* D4 vblank */
8038 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
8039 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8040 
8041 				if (rdev->irq.crtc_vblank_int[3]) {
8042 					drm_handle_vblank(rdev->ddev, 3);
8043 					rdev->pm.vblank_sync = true;
8044 					wake_up(&rdev->irq.vblank_queue);
8045 				}
8046 				if (atomic_read(&rdev->irq.pflip[3]))
8047 					radeon_crtc_handle_vblank(rdev, 3);
8048 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8049 				DRM_DEBUG_VBLANK("IH: D4 vblank\n");
8050 
8051 				break;
8052 			case 1: /* D4 vline */
8053 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
8054 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8055 
8056 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8057 				DRM_DEBUG_VBLANK("IH: D4 vline\n");
8058 
8059 				break;
8060 			default:
8061 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8062 				break;
8063 			}
8064 			break;
8065 		case 5: /* D5 vblank/vline */
8066 			switch (src_data) {
8067 			case 0: /* D5 vblank */
8068 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
8069 					DRM_DEBUG_VBLANK("IH: IH event w/o asserted irq bit?\n");
8070 
8071 				if (rdev->irq.crtc_vblank_int[4]) {
8072 					drm_handle_vblank(rdev->ddev, 4);
8073 					rdev->pm.vblank_sync = true;
8074 					wake_up(&rdev->irq.vblank_queue);
8075 				}
8076 				if (atomic_read(&rdev->irq.pflip[4]))
8077 					radeon_crtc_handle_vblank(rdev, 4);
8078 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8079 				DRM_DEBUG_VBLANK("IH: D5 vblank\n");
8080 
8081 				break;
8082 			case 1: /* D5 vline */
8083 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
8084 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8085 
8086 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8087 				DRM_DEBUG("IH: D5 vline\n");
8088 
8089 				break;
8090 			default:
8091 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8092 				break;
8093 			}
8094 			break;
8095 		case 6: /* D6 vblank/vline */
8096 			switch (src_data) {
8097 			case 0: /* D6 vblank */
8098 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8099 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8100 
8101 				if (rdev->irq.crtc_vblank_int[5]) {
8102 					drm_handle_vblank(rdev->ddev, 5);
8103 					rdev->pm.vblank_sync = true;
8104 					wake_up(&rdev->irq.vblank_queue);
8105 				}
8106 				if (atomic_read(&rdev->irq.pflip[5]))
8107 					radeon_crtc_handle_vblank(rdev, 5);
8108 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8109 				DRM_DEBUG_VBLANK("IH: D6 vblank\n");
8110 
8111 				break;
8112 			case 1: /* D6 vline */
8113 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8114 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8115 
8116 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8117 				DRM_DEBUG_VBLANK("IH: D6 vline\n");
8118 
8119 				break;
8120 			default:
8121 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8122 				break;
8123 			}
8124 			break;
8125 		case 8: /* D1 page flip */
8126 		case 10: /* D2 page flip */
8127 		case 12: /* D3 page flip */
8128 		case 14: /* D4 page flip */
8129 		case 16: /* D5 page flip */
8130 		case 18: /* D6 page flip */
8131 			DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8132 			if (radeon_use_pflipirq > 0)
8133 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8134 			break;
8135 		case 42: /* HPD hotplug */
8136 			switch (src_data) {
8137 			case 0:
8138 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8139 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8140 
8141 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8142 				queue_hotplug = true;
8143 				DRM_DEBUG("IH: HPD1\n");
8144 
8145 				break;
8146 			case 1:
8147 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8148 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8149 
8150 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8151 				queue_hotplug = true;
8152 				DRM_DEBUG("IH: HPD2\n");
8153 
8154 				break;
8155 			case 2:
8156 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8157 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8158 
8159 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8160 				queue_hotplug = true;
8161 				DRM_DEBUG("IH: HPD3\n");
8162 
8163 				break;
8164 			case 3:
8165 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8166 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8167 
8168 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8169 				queue_hotplug = true;
8170 				DRM_DEBUG("IH: HPD4\n");
8171 
8172 				break;
8173 			case 4:
8174 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8175 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8176 
8177 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8178 				queue_hotplug = true;
8179 				DRM_DEBUG("IH: HPD5\n");
8180 
8181 				break;
8182 			case 5:
8183 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8184 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8185 
8186 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8187 				queue_hotplug = true;
8188 				DRM_DEBUG("IH: HPD6\n");
8189 
8190 				break;
8191 			case 6:
8192 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8193 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8194 
8195 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8196 				queue_dp = true;
8197 				DRM_DEBUG("IH: HPD_RX 1\n");
8198 
8199 				break;
8200 			case 7:
8201 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8202 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8203 
8204 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8205 				queue_dp = true;
8206 				DRM_DEBUG("IH: HPD_RX 2\n");
8207 
8208 				break;
8209 			case 8:
8210 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8211 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8212 
8213 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8214 				queue_dp = true;
8215 				DRM_DEBUG("IH: HPD_RX 3\n");
8216 
8217 				break;
8218 			case 9:
8219 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8220 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8221 
8222 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8223 				queue_dp = true;
8224 				DRM_DEBUG("IH: HPD_RX 4\n");
8225 
8226 				break;
8227 			case 10:
8228 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8229 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8230 
8231 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8232 				queue_dp = true;
8233 				DRM_DEBUG("IH: HPD_RX 5\n");
8234 
8235 				break;
8236 			case 11:
8237 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8238 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8239 
8240 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8241 				queue_dp = true;
8242 				DRM_DEBUG("IH: HPD_RX 6\n");
8243 
8244 				break;
8245 			default:
8246 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8247 				break;
8248 			}
8249 			break;
8250 		case 96:
8251 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8252 			WREG32(SRBM_INT_ACK, 0x1);
8253 			break;
8254 		case 124: /* UVD */
8255 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8256 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8257 			break;
8258 		case 146:
8259 		case 147:
8260 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8261 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8262 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8263 			/* reset addr and status */
8264 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8265 			if (addr == 0x0 && status == 0x0)
8266 				break;
8267 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8268 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8269 				addr);
8270 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8271 				status);
8272 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8273 			break;
8274 		case 167: /* VCE */
8275 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8276 			switch (src_data) {
8277 			case 0:
8278 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8279 				break;
8280 			case 1:
8281 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8282 				break;
8283 			default:
8284 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8285 				break;
8286 			}
8287 			break;
8288 		case 176: /* GFX RB CP_INT */
8289 		case 177: /* GFX IB CP_INT */
8290 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8291 			break;
8292 		case 181: /* CP EOP event */
8293 			DRM_DEBUG("IH: CP EOP\n");
8294 			/* XXX check the bitfield order! */
8295 			me_id = (ring_id & 0x60) >> 5;
8296 			pipe_id = (ring_id & 0x18) >> 3;
8297 			queue_id = (ring_id & 0x7) >> 0;
8298 			switch (me_id) {
8299 			case 0:
8300 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8301 				break;
8302 			case 1:
8303 			case 2:
8304 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8305 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8306 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8307 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8308 				break;
8309 			}
8310 			break;
8311 		case 184: /* CP Privileged reg access */
8312 			DRM_ERROR("Illegal register access in command stream\n");
8313 			/* XXX check the bitfield order! */
8314 			me_id = (ring_id & 0x60) >> 5;
8315 			pipe_id = (ring_id & 0x18) >> 3;
8316 			queue_id = (ring_id & 0x7) >> 0;
8317 			switch (me_id) {
8318 			case 0:
8319 				/* This results in a full GPU reset, but all we need to do is soft
8320 				 * reset the CP for gfx
8321 				 */
8322 				queue_reset = true;
8323 				break;
8324 			case 1:
8325 				/* XXX compute */
8326 				queue_reset = true;
8327 				break;
8328 			case 2:
8329 				/* XXX compute */
8330 				queue_reset = true;
8331 				break;
8332 			}
8333 			break;
8334 		case 185: /* CP Privileged inst */
8335 			DRM_ERROR("Illegal instruction in command stream\n");
8336 			/* XXX check the bitfield order! */
8337 			me_id = (ring_id & 0x60) >> 5;
8338 			pipe_id = (ring_id & 0x18) >> 3;
8339 			queue_id = (ring_id & 0x7) >> 0;
8340 			switch (me_id) {
8341 			case 0:
8342 				/* This results in a full GPU reset, but all we need to do is soft
8343 				 * reset the CP for gfx
8344 				 */
8345 				queue_reset = true;
8346 				break;
8347 			case 1:
8348 				/* XXX compute */
8349 				queue_reset = true;
8350 				break;
8351 			case 2:
8352 				/* XXX compute */
8353 				queue_reset = true;
8354 				break;
8355 			}
8356 			break;
8357 		case 224: /* SDMA trap event */
8358 			/* XXX check the bitfield order! */
8359 			me_id = (ring_id & 0x3) >> 0;
8360 			queue_id = (ring_id & 0xc) >> 2;
8361 			DRM_DEBUG("IH: SDMA trap\n");
8362 			switch (me_id) {
8363 			case 0:
8364 				switch (queue_id) {
8365 				case 0:
8366 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8367 					break;
8368 				case 1:
8369 					/* XXX compute */
8370 					break;
8371 				case 2:
8372 					/* XXX compute */
8373 					break;
8374 				}
8375 				break;
8376 			case 1:
8377 				switch (queue_id) {
8378 				case 0:
8379 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8380 					break;
8381 				case 1:
8382 					/* XXX compute */
8383 					break;
8384 				case 2:
8385 					/* XXX compute */
8386 					break;
8387 				}
8388 				break;
8389 			}
8390 			break;
8391 		case 230: /* thermal low to high */
8392 			DRM_DEBUG("IH: thermal low to high\n");
8393 			rdev->pm.dpm.thermal.high_to_low = false;
8394 			queue_thermal = true;
8395 			break;
8396 		case 231: /* thermal high to low */
8397 			DRM_DEBUG("IH: thermal high to low\n");
8398 			rdev->pm.dpm.thermal.high_to_low = true;
8399 			queue_thermal = true;
8400 			break;
8401 		case 233: /* GUI IDLE */
8402 			DRM_DEBUG("IH: GUI idle\n");
8403 			break;
8404 		case 241: /* SDMA Privileged inst */
8405 		case 247: /* SDMA Privileged inst */
8406 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8407 			/* XXX check the bitfield order! */
8408 			me_id = (ring_id & 0x3) >> 0;
8409 			queue_id = (ring_id & 0xc) >> 2;
8410 			switch (me_id) {
8411 			case 0:
8412 				switch (queue_id) {
8413 				case 0:
8414 					queue_reset = true;
8415 					break;
8416 				case 1:
8417 					/* XXX compute */
8418 					queue_reset = true;
8419 					break;
8420 				case 2:
8421 					/* XXX compute */
8422 					queue_reset = true;
8423 					break;
8424 				}
8425 				break;
8426 			case 1:
8427 				switch (queue_id) {
8428 				case 0:
8429 					queue_reset = true;
8430 					break;
8431 				case 1:
8432 					/* XXX compute */
8433 					queue_reset = true;
8434 					break;
8435 				case 2:
8436 					/* XXX compute */
8437 					queue_reset = true;
8438 					break;
8439 				}
8440 				break;
8441 			}
8442 			break;
8443 		default:
8444 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8445 			break;
8446 		}
8447 
8448 		/* wptr/rptr are in bytes! */
8449 		rptr += 16;
8450 		rptr &= rdev->ih.ptr_mask;
8451 		WREG32(IH_RB_RPTR, rptr);
8452 	}
8453 	if (queue_dp)
8454 		schedule_work(&rdev->dp_work);
8455 	if (queue_hotplug)
8456 		schedule_delayed_work(&rdev->hotplug_work, 0);
8457 	if (queue_reset) {
8458 		rdev->needs_reset = true;
8459 		wake_up_all(&rdev->fence_queue);
8460 	}
8461 	if (queue_thermal)
8462 		schedule_work(&rdev->pm.dpm.thermal.work);
8463 	rdev->ih.rptr = rptr;
8464 	atomic_set(&rdev->ih.lock, 0);
8465 
8466 	/* make sure wptr hasn't changed while processing */
8467 	wptr = cik_get_ih_wptr(rdev);
8468 	if (wptr != rptr)
8469 		goto restart_ih;
8470 
8471 	return IRQ_HANDLED;
8472 }
8473 
8474 /*
8475  * startup/shutdown callbacks
8476  */
8477 /**
8478  * cik_startup - program the asic to a functional state
8479  *
8480  * @rdev: radeon_device pointer
8481  *
8482  * Programs the asic to a functional state (CIK).
8483  * Called by cik_init() and cik_resume().
8484  * Returns 0 for success, error for failure.
8485  */
8486 static int cik_startup(struct radeon_device *rdev)
8487 {
8488 	struct radeon_ring *ring;
8489 	u32 nop;
8490 	int r;
8491 
8492 	/* enable pcie gen2/3 link */
8493 	cik_pcie_gen3_enable(rdev);
8494 	/* enable aspm */
8495 	cik_program_aspm(rdev);
8496 
8497 	/* scratch needs to be initialized before MC */
8498 	r = r600_vram_scratch_init(rdev);
8499 	if (r)
8500 		return r;
8501 
8502 	cik_mc_program(rdev);
8503 
8504 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8505 		r = ci_mc_load_microcode(rdev);
8506 		if (r) {
8507 			DRM_ERROR("Failed to load MC firmware!\n");
8508 			return r;
8509 		}
8510 	}
8511 
8512 	r = cik_pcie_gart_enable(rdev);
8513 	if (r)
8514 		return r;
8515 	cik_gpu_init(rdev);
8516 
8517 	/* allocate rlc buffers */
8518 	if (rdev->flags & RADEON_IS_IGP) {
8519 		if (rdev->family == CHIP_KAVERI) {
8520 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8521 			rdev->rlc.reg_list_size =
8522 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8523 		} else {
8524 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8525 			rdev->rlc.reg_list_size =
8526 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8527 		}
8528 	}
8529 	rdev->rlc.cs_data = ci_cs_data;
8530 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8531 	r = sumo_rlc_init(rdev);
8532 	if (r) {
8533 		DRM_ERROR("Failed to init rlc BOs!\n");
8534 		return r;
8535 	}
8536 
8537 	/* allocate wb buffer */
8538 	r = radeon_wb_init(rdev);
8539 	if (r)
8540 		return r;
8541 
8542 	/* allocate mec buffers */
8543 	r = cik_mec_init(rdev);
8544 	if (r) {
8545 		DRM_ERROR("Failed to init MEC BOs!\n");
8546 		return r;
8547 	}
8548 
8549 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8550 	if (r) {
8551 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8552 		return r;
8553 	}
8554 
8555 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8556 	if (r) {
8557 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8558 		return r;
8559 	}
8560 
8561 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8562 	if (r) {
8563 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8564 		return r;
8565 	}
8566 
8567 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8568 	if (r) {
8569 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8570 		return r;
8571 	}
8572 
8573 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8574 	if (r) {
8575 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8576 		return r;
8577 	}
8578 
8579 	r = radeon_uvd_resume(rdev);
8580 	if (!r) {
8581 		r = uvd_v4_2_resume(rdev);
8582 		if (!r) {
8583 			r = radeon_fence_driver_start_ring(rdev,
8584 							   R600_RING_TYPE_UVD_INDEX);
8585 			if (r)
8586 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8587 		}
8588 	}
8589 	if (r)
8590 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8591 
8592 	r = radeon_vce_resume(rdev);
8593 	if (!r) {
8594 		r = vce_v2_0_resume(rdev);
8595 		if (!r)
8596 			r = radeon_fence_driver_start_ring(rdev,
8597 							   TN_RING_TYPE_VCE1_INDEX);
8598 		if (!r)
8599 			r = radeon_fence_driver_start_ring(rdev,
8600 							   TN_RING_TYPE_VCE2_INDEX);
8601 	}
8602 	if (r) {
8603 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8604 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8605 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8606 	}
8607 
8608 	/* Enable IRQ */
8609 	if (!rdev->irq.installed) {
8610 		r = radeon_irq_kms_init(rdev);
8611 		if (r)
8612 			return r;
8613 	}
8614 
8615 	r = cik_irq_init(rdev);
8616 	if (r) {
8617 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8618 		radeon_irq_kms_fini(rdev);
8619 		return r;
8620 	}
8621 	cik_irq_set(rdev);
8622 
8623 	if (rdev->family == CHIP_HAWAII) {
8624 		if (rdev->new_fw)
8625 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8626 		else
8627 			nop = RADEON_CP_PACKET2;
8628 	} else {
8629 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8630 	}
8631 
8632 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8633 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8634 			     nop);
8635 	if (r)
8636 		return r;
8637 
8638 	/* set up the compute queues */
8639 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8640 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8641 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8642 			     nop);
8643 	if (r)
8644 		return r;
8645 	ring->me = 1; /* first MEC */
8646 	ring->pipe = 0; /* first pipe */
8647 	ring->queue = 0; /* first queue */
8648 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8649 
8650 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8651 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8652 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8653 			     nop);
8654 	if (r)
8655 		return r;
8656 	/* dGPU only have 1 MEC */
8657 	ring->me = 1; /* first MEC */
8658 	ring->pipe = 0; /* first pipe */
8659 	ring->queue = 1; /* second queue */
8660 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8661 
8662 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8663 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8664 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8665 	if (r)
8666 		return r;
8667 
8668 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8669 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8670 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8671 	if (r)
8672 		return r;
8673 
8674 	r = cik_cp_resume(rdev);
8675 	if (r)
8676 		return r;
8677 
8678 	r = cik_sdma_resume(rdev);
8679 	if (r)
8680 		return r;
8681 
8682 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8683 	if (ring->ring_size) {
8684 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8685 				     RADEON_CP_PACKET2);
8686 		if (!r)
8687 			r = uvd_v1_0_init(rdev);
8688 		if (r)
8689 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8690 	}
8691 
8692 	r = -ENOENT;
8693 
8694 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8695 	if (ring->ring_size)
8696 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8697 				     VCE_CMD_NO_OP);
8698 
8699 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8700 	if (ring->ring_size)
8701 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8702 				     VCE_CMD_NO_OP);
8703 
8704 	if (!r)
8705 		r = vce_v1_0_init(rdev);
8706 	else if (r != -ENOENT)
8707 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8708 
8709 	r = radeon_ib_pool_init(rdev);
8710 	if (r) {
8711 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8712 		return r;
8713 	}
8714 
8715 	r = radeon_vm_manager_init(rdev);
8716 	if (r) {
8717 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8718 		return r;
8719 	}
8720 
8721 	r = radeon_audio_init(rdev);
8722 	if (r)
8723 		return r;
8724 
8725 	r = radeon_kfd_resume(rdev);
8726 	if (r)
8727 		return r;
8728 
8729 	return 0;
8730 }
8731 
8732 /**
8733  * cik_resume - resume the asic to a functional state
8734  *
8735  * @rdev: radeon_device pointer
8736  *
8737  * Programs the asic to a functional state (CIK).
8738  * Called at resume.
8739  * Returns 0 for success, error for failure.
8740  */
8741 int cik_resume(struct radeon_device *rdev)
8742 {
8743 	int r;
8744 
8745 	/* post card */
8746 	atom_asic_init(rdev->mode_info.atom_context);
8747 
8748 	/* init golden registers */
8749 	cik_init_golden_registers(rdev);
8750 
8751 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8752 		radeon_pm_resume(rdev);
8753 
8754 	rdev->accel_working = true;
8755 	r = cik_startup(rdev);
8756 	if (r) {
8757 		DRM_ERROR("cik startup failed on resume\n");
8758 		rdev->accel_working = false;
8759 		return r;
8760 	}
8761 
8762 	return r;
8763 
8764 }
8765 
8766 /**
8767  * cik_suspend - suspend the asic
8768  *
8769  * @rdev: radeon_device pointer
8770  *
8771  * Bring the chip into a state suitable for suspend (CIK).
8772  * Called at suspend.
8773  * Returns 0 for success.
8774  */
8775 int cik_suspend(struct radeon_device *rdev)
8776 {
8777 	radeon_kfd_suspend(rdev);
8778 	radeon_pm_suspend(rdev);
8779 	radeon_audio_fini(rdev);
8780 	radeon_vm_manager_fini(rdev);
8781 	cik_cp_enable(rdev, false);
8782 	cik_sdma_enable(rdev, false);
8783 	uvd_v1_0_fini(rdev);
8784 	radeon_uvd_suspend(rdev);
8785 	radeon_vce_suspend(rdev);
8786 	cik_fini_pg(rdev);
8787 	cik_fini_cg(rdev);
8788 	cik_irq_suspend(rdev);
8789 	radeon_wb_disable(rdev);
8790 	cik_pcie_gart_disable(rdev);
8791 	return 0;
8792 }
8793 
8794 /* Plan is to move initialization in that function and use
8795  * helper function so that radeon_device_init pretty much
8796  * do nothing more than calling asic specific function. This
8797  * should also allow to remove a bunch of callback function
8798  * like vram_info.
8799  */
8800 /**
8801  * cik_init - asic specific driver and hw init
8802  *
8803  * @rdev: radeon_device pointer
8804  *
8805  * Setup asic specific driver variables and program the hw
8806  * to a functional state (CIK).
8807  * Called at driver startup.
8808  * Returns 0 for success, errors for failure.
8809  */
8810 int cik_init(struct radeon_device *rdev)
8811 {
8812 	struct radeon_ring *ring;
8813 	int r;
8814 
8815 	/* Read BIOS */
8816 	if (!radeon_get_bios(rdev)) {
8817 		if (ASIC_IS_AVIVO(rdev))
8818 			return -EINVAL;
8819 	}
8820 	/* Must be an ATOMBIOS */
8821 	if (!rdev->is_atom_bios) {
8822 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8823 		return -EINVAL;
8824 	}
8825 	r = radeon_atombios_init(rdev);
8826 	if (r)
8827 		return r;
8828 
8829 	/* Post card if necessary */
8830 	if (!radeon_card_posted(rdev)) {
8831 		if (!rdev->bios) {
8832 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8833 			return -EINVAL;
8834 		}
8835 		DRM_INFO("GPU not posted. posting now...\n");
8836 		atom_asic_init(rdev->mode_info.atom_context);
8837 	}
8838 	/* init golden registers */
8839 	cik_init_golden_registers(rdev);
8840 	/* Initialize scratch registers */
8841 	cik_scratch_init(rdev);
8842 	/* Initialize surface registers */
8843 	radeon_surface_init(rdev);
8844 	/* Initialize clocks */
8845 	radeon_get_clock_info(rdev->ddev);
8846 
8847 	/* Fence driver */
8848 	r = radeon_fence_driver_init(rdev);
8849 	if (r)
8850 		return r;
8851 
8852 	/* initialize memory controller */
8853 	r = cik_mc_init(rdev);
8854 	if (r)
8855 		return r;
8856 	/* Memory manager */
8857 	r = radeon_bo_init(rdev);
8858 	if (r)
8859 		return r;
8860 
8861 	if (rdev->flags & RADEON_IS_IGP) {
8862 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8863 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8864 			r = cik_init_microcode(rdev);
8865 			if (r) {
8866 				DRM_ERROR("Failed to load firmware!\n");
8867 				return r;
8868 			}
8869 		}
8870 	} else {
8871 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8872 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8873 		    !rdev->mc_fw) {
8874 			r = cik_init_microcode(rdev);
8875 			if (r) {
8876 				DRM_ERROR("Failed to load firmware!\n");
8877 				return r;
8878 			}
8879 		}
8880 	}
8881 
8882 	/* Initialize power management */
8883 	radeon_pm_init(rdev);
8884 
8885 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8886 	ring->ring_obj = NULL;
8887 	r600_ring_init(rdev, ring, 1024 * 1024);
8888 
8889 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8890 	ring->ring_obj = NULL;
8891 	r600_ring_init(rdev, ring, 1024 * 1024);
8892 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8893 	if (r)
8894 		return r;
8895 
8896 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8897 	ring->ring_obj = NULL;
8898 	r600_ring_init(rdev, ring, 1024 * 1024);
8899 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8900 	if (r)
8901 		return r;
8902 
8903 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8904 	ring->ring_obj = NULL;
8905 	r600_ring_init(rdev, ring, 256 * 1024);
8906 
8907 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8908 	ring->ring_obj = NULL;
8909 	r600_ring_init(rdev, ring, 256 * 1024);
8910 
8911 	r = radeon_uvd_init(rdev);
8912 	if (!r) {
8913 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8914 		ring->ring_obj = NULL;
8915 		r600_ring_init(rdev, ring, 4096);
8916 	}
8917 
8918 	r = radeon_vce_init(rdev);
8919 	if (!r) {
8920 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8921 		ring->ring_obj = NULL;
8922 		r600_ring_init(rdev, ring, 4096);
8923 
8924 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8925 		ring->ring_obj = NULL;
8926 		r600_ring_init(rdev, ring, 4096);
8927 	}
8928 
8929 	rdev->ih.ring_obj = NULL;
8930 	r600_ih_ring_init(rdev, 64 * 1024);
8931 
8932 	r = r600_pcie_gart_init(rdev);
8933 	if (r)
8934 		return r;
8935 
8936 	rdev->accel_working = true;
8937 	r = cik_startup(rdev);
8938 	if (r) {
8939 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8940 		cik_cp_fini(rdev);
8941 		cik_sdma_fini(rdev);
8942 		cik_irq_fini(rdev);
8943 		sumo_rlc_fini(rdev);
8944 		cik_mec_fini(rdev);
8945 		radeon_wb_fini(rdev);
8946 		radeon_ib_pool_fini(rdev);
8947 		radeon_vm_manager_fini(rdev);
8948 		radeon_irq_kms_fini(rdev);
8949 		cik_pcie_gart_fini(rdev);
8950 		rdev->accel_working = false;
8951 	}
8952 
8953 	/* Don't start up if the MC ucode is missing.
8954 	 * The default clocks and voltages before the MC ucode
8955 	 * is loaded are not suffient for advanced operations.
8956 	 */
8957 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8958 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8959 		return -EINVAL;
8960 	}
8961 
8962 	return 0;
8963 }
8964 
8965 /**
8966  * cik_fini - asic specific driver and hw fini
8967  *
8968  * @rdev: radeon_device pointer
8969  *
8970  * Tear down the asic specific driver variables and program the hw
8971  * to an idle state (CIK).
8972  * Called at driver unload.
8973  */
8974 void cik_fini(struct radeon_device *rdev)
8975 {
8976 	radeon_pm_fini(rdev);
8977 	cik_cp_fini(rdev);
8978 	cik_sdma_fini(rdev);
8979 	cik_fini_pg(rdev);
8980 	cik_fini_cg(rdev);
8981 	cik_irq_fini(rdev);
8982 	sumo_rlc_fini(rdev);
8983 	cik_mec_fini(rdev);
8984 	radeon_wb_fini(rdev);
8985 	radeon_vm_manager_fini(rdev);
8986 	radeon_ib_pool_fini(rdev);
8987 	radeon_irq_kms_fini(rdev);
8988 	uvd_v1_0_fini(rdev);
8989 	radeon_uvd_fini(rdev);
8990 	radeon_vce_fini(rdev);
8991 	cik_pcie_gart_fini(rdev);
8992 	r600_vram_scratch_fini(rdev);
8993 	radeon_gem_fini(rdev);
8994 	radeon_fence_driver_fini(rdev);
8995 	radeon_bo_fini(rdev);
8996 	radeon_atombios_fini(rdev);
8997 	cik_fini_microcode(rdev);
8998 	kfree(rdev->bios);
8999 	rdev->bios = NULL;
9000 }
9001 
9002 void dce8_program_fmt(struct drm_encoder *encoder)
9003 {
9004 	struct drm_device *dev = encoder->dev;
9005 	struct radeon_device *rdev = dev->dev_private;
9006 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
9007 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
9008 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
9009 	int bpc = 0;
9010 	u32 tmp = 0;
9011 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
9012 
9013 	if (connector) {
9014 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
9015 		bpc = radeon_get_monitor_bpc(connector);
9016 		dither = radeon_connector->dither;
9017 	}
9018 
9019 	/* LVDS/eDP FMT is set up by atom */
9020 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
9021 		return;
9022 
9023 	/* not needed for analog */
9024 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
9025 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
9026 		return;
9027 
9028 	if (bpc == 0)
9029 		return;
9030 
9031 	switch (bpc) {
9032 	case 6:
9033 		if (dither == RADEON_FMT_DITHER_ENABLE)
9034 			/* XXX sort out optimal dither settings */
9035 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9036 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9037 		else
9038 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9039 		break;
9040 	case 8:
9041 		if (dither == RADEON_FMT_DITHER_ENABLE)
9042 			/* XXX sort out optimal dither settings */
9043 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9044 				FMT_RGB_RANDOM_ENABLE |
9045 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9046 		else
9047 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9048 		break;
9049 	case 10:
9050 		if (dither == RADEON_FMT_DITHER_ENABLE)
9051 			/* XXX sort out optimal dither settings */
9052 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9053 				FMT_RGB_RANDOM_ENABLE |
9054 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9055 		else
9056 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9057 		break;
9058 	default:
9059 		/* not needed */
9060 		break;
9061 	}
9062 
9063 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9064 }
9065 
9066 /* display watermark setup */
9067 /**
9068  * dce8_line_buffer_adjust - Set up the line buffer
9069  *
9070  * @rdev: radeon_device pointer
9071  * @radeon_crtc: the selected display controller
9072  * @mode: the current display mode on the selected display
9073  * controller
9074  *
9075  * Setup up the line buffer allocation for
9076  * the selected display controller (CIK).
9077  * Returns the line buffer size in pixels.
9078  */
9079 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9080 				   struct radeon_crtc *radeon_crtc,
9081 				   struct drm_display_mode *mode)
9082 {
9083 	u32 tmp, buffer_alloc, i;
9084 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9085 	/*
9086 	 * Line Buffer Setup
9087 	 * There are 6 line buffers, one for each display controllers.
9088 	 * There are 3 partitions per LB. Select the number of partitions
9089 	 * to enable based on the display width.  For display widths larger
9090 	 * than 4096, you need use to use 2 display controllers and combine
9091 	 * them using the stereo blender.
9092 	 */
9093 	if (radeon_crtc->base.enabled && mode) {
9094 		if (mode->crtc_hdisplay < 1920) {
9095 			tmp = 1;
9096 			buffer_alloc = 2;
9097 		} else if (mode->crtc_hdisplay < 2560) {
9098 			tmp = 2;
9099 			buffer_alloc = 2;
9100 		} else if (mode->crtc_hdisplay < 4096) {
9101 			tmp = 0;
9102 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9103 		} else {
9104 			DRM_DEBUG_KMS("Mode too big for LB!\n");
9105 			tmp = 0;
9106 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9107 		}
9108 	} else {
9109 		tmp = 1;
9110 		buffer_alloc = 0;
9111 	}
9112 
9113 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9114 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9115 
9116 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9117 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9118 	for (i = 0; i < rdev->usec_timeout; i++) {
9119 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9120 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
9121 			break;
9122 		udelay(1);
9123 	}
9124 
9125 	if (radeon_crtc->base.enabled && mode) {
9126 		switch (tmp) {
9127 		case 0:
9128 		default:
9129 			return 4096 * 2;
9130 		case 1:
9131 			return 1920 * 2;
9132 		case 2:
9133 			return 2560 * 2;
9134 		}
9135 	}
9136 
9137 	/* controller not enabled, so no lb used */
9138 	return 0;
9139 }
9140 
9141 /**
9142  * cik_get_number_of_dram_channels - get the number of dram channels
9143  *
9144  * @rdev: radeon_device pointer
9145  *
9146  * Look up the number of video ram channels (CIK).
9147  * Used for display watermark bandwidth calculations
9148  * Returns the number of dram channels
9149  */
9150 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9151 {
9152 	u32 tmp = RREG32(MC_SHARED_CHMAP);
9153 
9154 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9155 	case 0:
9156 	default:
9157 		return 1;
9158 	case 1:
9159 		return 2;
9160 	case 2:
9161 		return 4;
9162 	case 3:
9163 		return 8;
9164 	case 4:
9165 		return 3;
9166 	case 5:
9167 		return 6;
9168 	case 6:
9169 		return 10;
9170 	case 7:
9171 		return 12;
9172 	case 8:
9173 		return 16;
9174 	}
9175 }
9176 
9177 struct dce8_wm_params {
9178 	u32 dram_channels; /* number of dram channels */
9179 	u32 yclk;          /* bandwidth per dram data pin in kHz */
9180 	u32 sclk;          /* engine clock in kHz */
9181 	u32 disp_clk;      /* display clock in kHz */
9182 	u32 src_width;     /* viewport width */
9183 	u32 active_time;   /* active display time in ns */
9184 	u32 blank_time;    /* blank time in ns */
9185 	bool interlaced;    /* mode is interlaced */
9186 	fixed20_12 vsc;    /* vertical scale ratio */
9187 	u32 num_heads;     /* number of active crtcs */
9188 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9189 	u32 lb_size;       /* line buffer allocated to pipe */
9190 	u32 vtaps;         /* vertical scaler taps */
9191 };
9192 
9193 /**
9194  * dce8_dram_bandwidth - get the dram bandwidth
9195  *
9196  * @wm: watermark calculation data
9197  *
9198  * Calculate the raw dram bandwidth (CIK).
9199  * Used for display watermark bandwidth calculations
9200  * Returns the dram bandwidth in MBytes/s
9201  */
9202 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9203 {
9204 	/* Calculate raw DRAM Bandwidth */
9205 	fixed20_12 dram_efficiency; /* 0.7 */
9206 	fixed20_12 yclk, dram_channels, bandwidth;
9207 	fixed20_12 a;
9208 
9209 	a.full = dfixed_const(1000);
9210 	yclk.full = dfixed_const(wm->yclk);
9211 	yclk.full = dfixed_div(yclk, a);
9212 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9213 	a.full = dfixed_const(10);
9214 	dram_efficiency.full = dfixed_const(7);
9215 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9216 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9217 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9218 
9219 	return dfixed_trunc(bandwidth);
9220 }
9221 
9222 /**
9223  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9224  *
9225  * @wm: watermark calculation data
9226  *
9227  * Calculate the dram bandwidth used for display (CIK).
9228  * Used for display watermark bandwidth calculations
9229  * Returns the dram bandwidth for display in MBytes/s
9230  */
9231 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9232 {
9233 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9234 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9235 	fixed20_12 yclk, dram_channels, bandwidth;
9236 	fixed20_12 a;
9237 
9238 	a.full = dfixed_const(1000);
9239 	yclk.full = dfixed_const(wm->yclk);
9240 	yclk.full = dfixed_div(yclk, a);
9241 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9242 	a.full = dfixed_const(10);
9243 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9244 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9245 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9246 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9247 
9248 	return dfixed_trunc(bandwidth);
9249 }
9250 
9251 /**
9252  * dce8_data_return_bandwidth - get the data return bandwidth
9253  *
9254  * @wm: watermark calculation data
9255  *
9256  * Calculate the data return bandwidth used for display (CIK).
9257  * Used for display watermark bandwidth calculations
9258  * Returns the data return bandwidth in MBytes/s
9259  */
9260 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9261 {
9262 	/* Calculate the display Data return Bandwidth */
9263 	fixed20_12 return_efficiency; /* 0.8 */
9264 	fixed20_12 sclk, bandwidth;
9265 	fixed20_12 a;
9266 
9267 	a.full = dfixed_const(1000);
9268 	sclk.full = dfixed_const(wm->sclk);
9269 	sclk.full = dfixed_div(sclk, a);
9270 	a.full = dfixed_const(10);
9271 	return_efficiency.full = dfixed_const(8);
9272 	return_efficiency.full = dfixed_div(return_efficiency, a);
9273 	a.full = dfixed_const(32);
9274 	bandwidth.full = dfixed_mul(a, sclk);
9275 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9276 
9277 	return dfixed_trunc(bandwidth);
9278 }
9279 
9280 /**
9281  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9282  *
9283  * @wm: watermark calculation data
9284  *
9285  * Calculate the dmif bandwidth used for display (CIK).
9286  * Used for display watermark bandwidth calculations
9287  * Returns the dmif bandwidth in MBytes/s
9288  */
9289 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9290 {
9291 	/* Calculate the DMIF Request Bandwidth */
9292 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9293 	fixed20_12 disp_clk, bandwidth;
9294 	fixed20_12 a, b;
9295 
9296 	a.full = dfixed_const(1000);
9297 	disp_clk.full = dfixed_const(wm->disp_clk);
9298 	disp_clk.full = dfixed_div(disp_clk, a);
9299 	a.full = dfixed_const(32);
9300 	b.full = dfixed_mul(a, disp_clk);
9301 
9302 	a.full = dfixed_const(10);
9303 	disp_clk_request_efficiency.full = dfixed_const(8);
9304 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9305 
9306 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9307 
9308 	return dfixed_trunc(bandwidth);
9309 }
9310 
9311 /**
9312  * dce8_available_bandwidth - get the min available bandwidth
9313  *
9314  * @wm: watermark calculation data
9315  *
9316  * Calculate the min available bandwidth used for display (CIK).
9317  * Used for display watermark bandwidth calculations
9318  * Returns the min available bandwidth in MBytes/s
9319  */
9320 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9321 {
9322 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9323 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9324 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9325 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9326 
9327 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9328 }
9329 
9330 /**
9331  * dce8_average_bandwidth - get the average available bandwidth
9332  *
9333  * @wm: watermark calculation data
9334  *
9335  * Calculate the average available bandwidth used for display (CIK).
9336  * Used for display watermark bandwidth calculations
9337  * Returns the average available bandwidth in MBytes/s
9338  */
9339 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9340 {
9341 	/* Calculate the display mode Average Bandwidth
9342 	 * DisplayMode should contain the source and destination dimensions,
9343 	 * timing, etc.
9344 	 */
9345 	fixed20_12 bpp;
9346 	fixed20_12 line_time;
9347 	fixed20_12 src_width;
9348 	fixed20_12 bandwidth;
9349 	fixed20_12 a;
9350 
9351 	a.full = dfixed_const(1000);
9352 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9353 	line_time.full = dfixed_div(line_time, a);
9354 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9355 	src_width.full = dfixed_const(wm->src_width);
9356 	bandwidth.full = dfixed_mul(src_width, bpp);
9357 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9358 	bandwidth.full = dfixed_div(bandwidth, line_time);
9359 
9360 	return dfixed_trunc(bandwidth);
9361 }
9362 
9363 /**
9364  * dce8_latency_watermark - get the latency watermark
9365  *
9366  * @wm: watermark calculation data
9367  *
9368  * Calculate the latency watermark (CIK).
9369  * Used for display watermark bandwidth calculations
9370  * Returns the latency watermark in ns
9371  */
9372 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9373 {
9374 	/* First calculate the latency in ns */
9375 	u32 mc_latency = 2000; /* 2000 ns. */
9376 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9377 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9378 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9379 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9380 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9381 		(wm->num_heads * cursor_line_pair_return_time);
9382 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9383 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9384 	u32 tmp, dmif_size = 12288;
9385 	fixed20_12 a, b, c;
9386 
9387 	if (wm->num_heads == 0)
9388 		return 0;
9389 
9390 	a.full = dfixed_const(2);
9391 	b.full = dfixed_const(1);
9392 	if ((wm->vsc.full > a.full) ||
9393 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9394 	    (wm->vtaps >= 5) ||
9395 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9396 		max_src_lines_per_dst_line = 4;
9397 	else
9398 		max_src_lines_per_dst_line = 2;
9399 
9400 	a.full = dfixed_const(available_bandwidth);
9401 	b.full = dfixed_const(wm->num_heads);
9402 	a.full = dfixed_div(a, b);
9403 
9404 	b.full = dfixed_const(mc_latency + 512);
9405 	c.full = dfixed_const(wm->disp_clk);
9406 	b.full = dfixed_div(b, c);
9407 
9408 	c.full = dfixed_const(dmif_size);
9409 	b.full = dfixed_div(c, b);
9410 
9411 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9412 
9413 	b.full = dfixed_const(1000);
9414 	c.full = dfixed_const(wm->disp_clk);
9415 	b.full = dfixed_div(c, b);
9416 	c.full = dfixed_const(wm->bytes_per_pixel);
9417 	b.full = dfixed_mul(b, c);
9418 
9419 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9420 
9421 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9422 	b.full = dfixed_const(1000);
9423 	c.full = dfixed_const(lb_fill_bw);
9424 	b.full = dfixed_div(c, b);
9425 	a.full = dfixed_div(a, b);
9426 	line_fill_time = dfixed_trunc(a);
9427 
9428 	if (line_fill_time < wm->active_time)
9429 		return latency;
9430 	else
9431 		return latency + (line_fill_time - wm->active_time);
9432 
9433 }
9434 
9435 /**
9436  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9437  * average and available dram bandwidth
9438  *
9439  * @wm: watermark calculation data
9440  *
9441  * Check if the display average bandwidth fits in the display
9442  * dram bandwidth (CIK).
9443  * Used for display watermark bandwidth calculations
9444  * Returns true if the display fits, false if not.
9445  */
9446 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9447 {
9448 	if (dce8_average_bandwidth(wm) <=
9449 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9450 		return true;
9451 	else
9452 		return false;
9453 }
9454 
9455 /**
9456  * dce8_average_bandwidth_vs_available_bandwidth - check
9457  * average and available bandwidth
9458  *
9459  * @wm: watermark calculation data
9460  *
9461  * Check if the display average bandwidth fits in the display
9462  * available bandwidth (CIK).
9463  * Used for display watermark bandwidth calculations
9464  * Returns true if the display fits, false if not.
9465  */
9466 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9467 {
9468 	if (dce8_average_bandwidth(wm) <=
9469 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9470 		return true;
9471 	else
9472 		return false;
9473 }
9474 
9475 /**
9476  * dce8_check_latency_hiding - check latency hiding
9477  *
9478  * @wm: watermark calculation data
9479  *
9480  * Check latency hiding (CIK).
9481  * Used for display watermark bandwidth calculations
9482  * Returns true if the display fits, false if not.
9483  */
9484 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9485 {
9486 	u32 lb_partitions = wm->lb_size / wm->src_width;
9487 	u32 line_time = wm->active_time + wm->blank_time;
9488 	u32 latency_tolerant_lines;
9489 	u32 latency_hiding;
9490 	fixed20_12 a;
9491 
9492 	a.full = dfixed_const(1);
9493 	if (wm->vsc.full > a.full)
9494 		latency_tolerant_lines = 1;
9495 	else {
9496 		if (lb_partitions <= (wm->vtaps + 1))
9497 			latency_tolerant_lines = 1;
9498 		else
9499 			latency_tolerant_lines = 2;
9500 	}
9501 
9502 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9503 
9504 	if (dce8_latency_watermark(wm) <= latency_hiding)
9505 		return true;
9506 	else
9507 		return false;
9508 }
9509 
9510 /**
9511  * dce8_program_watermarks - program display watermarks
9512  *
9513  * @rdev: radeon_device pointer
9514  * @radeon_crtc: the selected display controller
9515  * @lb_size: line buffer size
9516  * @num_heads: number of display controllers in use
9517  *
9518  * Calculate and program the display watermarks for the
9519  * selected display controller (CIK).
9520  */
9521 static void dce8_program_watermarks(struct radeon_device *rdev,
9522 				    struct radeon_crtc *radeon_crtc,
9523 				    u32 lb_size, u32 num_heads)
9524 {
9525 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9526 	struct dce8_wm_params wm_low, wm_high;
9527 	u32 pixel_period;
9528 	u32 line_time = 0;
9529 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9530 	u32 tmp, wm_mask;
9531 
9532 	if (radeon_crtc->base.enabled && num_heads && mode) {
9533 		pixel_period = 1000000 / (u32)mode->clock;
9534 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9535 
9536 		/* watermark for high clocks */
9537 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9538 		    rdev->pm.dpm_enabled) {
9539 			wm_high.yclk =
9540 				radeon_dpm_get_mclk(rdev, false) * 10;
9541 			wm_high.sclk =
9542 				radeon_dpm_get_sclk(rdev, false) * 10;
9543 		} else {
9544 			wm_high.yclk = rdev->pm.current_mclk * 10;
9545 			wm_high.sclk = rdev->pm.current_sclk * 10;
9546 		}
9547 
9548 		wm_high.disp_clk = mode->clock;
9549 		wm_high.src_width = mode->crtc_hdisplay;
9550 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9551 		wm_high.blank_time = line_time - wm_high.active_time;
9552 		wm_high.interlaced = false;
9553 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9554 			wm_high.interlaced = true;
9555 		wm_high.vsc = radeon_crtc->vsc;
9556 		wm_high.vtaps = 1;
9557 		if (radeon_crtc->rmx_type != RMX_OFF)
9558 			wm_high.vtaps = 2;
9559 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9560 		wm_high.lb_size = lb_size;
9561 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9562 		wm_high.num_heads = num_heads;
9563 
9564 		/* set for high clocks */
9565 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9566 
9567 		/* possibly force display priority to high */
9568 		/* should really do this at mode validation time... */
9569 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9570 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9571 		    !dce8_check_latency_hiding(&wm_high) ||
9572 		    (rdev->disp_priority == 2)) {
9573 			DRM_DEBUG_KMS("force priority to high\n");
9574 		}
9575 
9576 		/* watermark for low clocks */
9577 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9578 		    rdev->pm.dpm_enabled) {
9579 			wm_low.yclk =
9580 				radeon_dpm_get_mclk(rdev, true) * 10;
9581 			wm_low.sclk =
9582 				radeon_dpm_get_sclk(rdev, true) * 10;
9583 		} else {
9584 			wm_low.yclk = rdev->pm.current_mclk * 10;
9585 			wm_low.sclk = rdev->pm.current_sclk * 10;
9586 		}
9587 
9588 		wm_low.disp_clk = mode->clock;
9589 		wm_low.src_width = mode->crtc_hdisplay;
9590 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9591 		wm_low.blank_time = line_time - wm_low.active_time;
9592 		wm_low.interlaced = false;
9593 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9594 			wm_low.interlaced = true;
9595 		wm_low.vsc = radeon_crtc->vsc;
9596 		wm_low.vtaps = 1;
9597 		if (radeon_crtc->rmx_type != RMX_OFF)
9598 			wm_low.vtaps = 2;
9599 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9600 		wm_low.lb_size = lb_size;
9601 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9602 		wm_low.num_heads = num_heads;
9603 
9604 		/* set for low clocks */
9605 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9606 
9607 		/* possibly force display priority to high */
9608 		/* should really do this at mode validation time... */
9609 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9610 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9611 		    !dce8_check_latency_hiding(&wm_low) ||
9612 		    (rdev->disp_priority == 2)) {
9613 			DRM_DEBUG_KMS("force priority to high\n");
9614 		}
9615 
9616 		/* Save number of lines the linebuffer leads before the scanout */
9617 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9618 	}
9619 
9620 	/* select wm A */
9621 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9622 	tmp = wm_mask;
9623 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9624 	tmp |= LATENCY_WATERMARK_MASK(1);
9625 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9626 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9627 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9628 		LATENCY_HIGH_WATERMARK(line_time)));
9629 	/* select wm B */
9630 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9631 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9632 	tmp |= LATENCY_WATERMARK_MASK(2);
9633 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9634 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9635 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9636 		LATENCY_HIGH_WATERMARK(line_time)));
9637 	/* restore original selection */
9638 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9639 
9640 	/* save values for DPM */
9641 	radeon_crtc->line_time = line_time;
9642 	radeon_crtc->wm_high = latency_watermark_a;
9643 	radeon_crtc->wm_low = latency_watermark_b;
9644 }
9645 
9646 /**
9647  * dce8_bandwidth_update - program display watermarks
9648  *
9649  * @rdev: radeon_device pointer
9650  *
9651  * Calculate and program the display watermarks and line
9652  * buffer allocation (CIK).
9653  */
9654 void dce8_bandwidth_update(struct radeon_device *rdev)
9655 {
9656 	struct drm_display_mode *mode = NULL;
9657 	u32 num_heads = 0, lb_size;
9658 	int i;
9659 
9660 	if (!rdev->mode_info.mode_config_initialized)
9661 		return;
9662 
9663 	radeon_update_display_priority(rdev);
9664 
9665 	for (i = 0; i < rdev->num_crtc; i++) {
9666 		if (rdev->mode_info.crtcs[i]->base.enabled)
9667 			num_heads++;
9668 	}
9669 	for (i = 0; i < rdev->num_crtc; i++) {
9670 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9671 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9672 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9673 	}
9674 }
9675 
9676 /**
9677  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9678  *
9679  * @rdev: radeon_device pointer
9680  *
9681  * Fetches a GPU clock counter snapshot (SI).
9682  * Returns the 64 bit clock counter snapshot.
9683  */
9684 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9685 {
9686 	uint64_t clock;
9687 
9688 	mutex_lock(&rdev->gpu_clock_mutex);
9689 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9690 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9691 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9692 	mutex_unlock(&rdev->gpu_clock_mutex);
9693 	return clock;
9694 }
9695 
9696 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9697                               u32 cntl_reg, u32 status_reg)
9698 {
9699 	int r, i;
9700 	struct atom_clock_dividers dividers;
9701 	uint32_t tmp;
9702 
9703 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9704 					   clock, false, &dividers);
9705 	if (r)
9706 		return r;
9707 
9708 	tmp = RREG32_SMC(cntl_reg);
9709 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9710 	tmp |= dividers.post_divider;
9711 	WREG32_SMC(cntl_reg, tmp);
9712 
9713 	for (i = 0; i < 100; i++) {
9714 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9715 			break;
9716 		mdelay(10);
9717 	}
9718 	if (i == 100)
9719 		return -ETIMEDOUT;
9720 
9721 	return 0;
9722 }
9723 
9724 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9725 {
9726 	int r = 0;
9727 
9728 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9729 	if (r)
9730 		return r;
9731 
9732 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9733 	return r;
9734 }
9735 
9736 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9737 {
9738 	int r, i;
9739 	struct atom_clock_dividers dividers;
9740 	u32 tmp;
9741 
9742 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9743 					   ecclk, false, &dividers);
9744 	if (r)
9745 		return r;
9746 
9747 	for (i = 0; i < 100; i++) {
9748 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9749 			break;
9750 		mdelay(10);
9751 	}
9752 	if (i == 100)
9753 		return -ETIMEDOUT;
9754 
9755 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9756 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9757 	tmp |= dividers.post_divider;
9758 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9759 
9760 	for (i = 0; i < 100; i++) {
9761 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9762 			break;
9763 		mdelay(10);
9764 	}
9765 	if (i == 100)
9766 		return -ETIMEDOUT;
9767 
9768 	return 0;
9769 }
9770 
9771 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9772 {
9773 	struct pci_dev *root = rdev->pdev->bus->self;
9774 	int bridge_pos, gpu_pos;
9775 	u32 speed_cntl, mask, current_data_rate;
9776 	int ret, i;
9777 	u16 tmp16;
9778 
9779 #if 0
9780 	if (pci_is_root_bus(rdev->pdev->bus))
9781 		return;
9782 #endif
9783 
9784 	if (radeon_pcie_gen2 == 0)
9785 		return;
9786 
9787 	if (rdev->flags & RADEON_IS_IGP)
9788 		return;
9789 
9790 	if (!(rdev->flags & RADEON_IS_PCIE))
9791 		return;
9792 
9793 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9794 	if (ret != 0)
9795 		return;
9796 
9797 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9798 		return;
9799 
9800 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9801 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9802 		LC_CURRENT_DATA_RATE_SHIFT;
9803 	if (mask & DRM_PCIE_SPEED_80) {
9804 		if (current_data_rate == 2) {
9805 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9806 			return;
9807 		}
9808 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9809 	} else if (mask & DRM_PCIE_SPEED_50) {
9810 		if (current_data_rate == 1) {
9811 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9812 			return;
9813 		}
9814 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9815 	}
9816 
9817 	bridge_pos = pci_pcie_cap(root);
9818 	if (!bridge_pos)
9819 		return;
9820 
9821 	gpu_pos = pci_pcie_cap(rdev->pdev);
9822 	if (!gpu_pos)
9823 		return;
9824 
9825 	if (mask & DRM_PCIE_SPEED_80) {
9826 		/* re-try equalization if gen3 is not already enabled */
9827 		if (current_data_rate != 2) {
9828 			u16 bridge_cfg, gpu_cfg;
9829 			u16 bridge_cfg2, gpu_cfg2;
9830 			u32 max_lw, current_lw, tmp;
9831 
9832 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9833 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9834 
9835 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9836 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9837 
9838 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9839 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9840 
9841 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9842 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9843 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9844 
9845 			if (current_lw < max_lw) {
9846 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9847 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9848 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9849 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9850 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9851 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9852 				}
9853 			}
9854 
9855 			for (i = 0; i < 10; i++) {
9856 				/* check status */
9857 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9858 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9859 					break;
9860 
9861 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9862 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9863 
9864 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9865 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9866 
9867 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9868 				tmp |= LC_SET_QUIESCE;
9869 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9870 
9871 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9872 				tmp |= LC_REDO_EQ;
9873 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9874 
9875 				mdelay(100);
9876 
9877 				/* linkctl */
9878 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9879 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9880 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9881 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9882 
9883 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9884 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9885 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9886 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9887 
9888 				/* linkctl2 */
9889 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9890 				tmp16 &= ~((1 << 4) | (7 << 9));
9891 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9892 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9893 
9894 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9895 				tmp16 &= ~((1 << 4) | (7 << 9));
9896 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9897 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9898 
9899 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9900 				tmp &= ~LC_SET_QUIESCE;
9901 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9902 			}
9903 		}
9904 	}
9905 
9906 	/* set the link speed */
9907 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9908 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9909 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9910 
9911 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9912 	tmp16 &= ~0xf;
9913 	if (mask & DRM_PCIE_SPEED_80)
9914 		tmp16 |= 3; /* gen3 */
9915 	else if (mask & DRM_PCIE_SPEED_50)
9916 		tmp16 |= 2; /* gen2 */
9917 	else
9918 		tmp16 |= 1; /* gen1 */
9919 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9920 
9921 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9922 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9923 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9924 
9925 	for (i = 0; i < rdev->usec_timeout; i++) {
9926 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9927 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9928 			break;
9929 		udelay(1);
9930 	}
9931 }
9932 
9933 static void cik_program_aspm(struct radeon_device *rdev)
9934 {
9935 	u32 data, orig;
9936 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9937 #if 0
9938 	bool disable_clkreq = false;
9939 #endif
9940 
9941 	if (radeon_aspm == 0)
9942 		return;
9943 
9944 	/* XXX double check IGPs */
9945 	if (rdev->flags & RADEON_IS_IGP)
9946 		return;
9947 
9948 	if (!(rdev->flags & RADEON_IS_PCIE))
9949 		return;
9950 
9951 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9952 	data &= ~LC_XMIT_N_FTS_MASK;
9953 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9954 	if (orig != data)
9955 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9956 
9957 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9958 	data |= LC_GO_TO_RECOVERY;
9959 	if (orig != data)
9960 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9961 
9962 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9963 	data |= P_IGNORE_EDB_ERR;
9964 	if (orig != data)
9965 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9966 
9967 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9968 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9969 	data |= LC_PMI_TO_L1_DIS;
9970 	if (!disable_l0s)
9971 		data |= LC_L0S_INACTIVITY(7);
9972 
9973 	if (!disable_l1) {
9974 		data |= LC_L1_INACTIVITY(7);
9975 		data &= ~LC_PMI_TO_L1_DIS;
9976 		if (orig != data)
9977 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9978 
9979 		if (!disable_plloff_in_l1) {
9980 			bool clk_req_support;
9981 
9982 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9983 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9984 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9985 			if (orig != data)
9986 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9987 
9988 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9989 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9990 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9991 			if (orig != data)
9992 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9993 
9994 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9995 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9996 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9997 			if (orig != data)
9998 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9999 
10000 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
10001 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10002 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10003 			if (orig != data)
10004 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
10005 
10006 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
10007 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
10008 			data |= LC_DYN_LANES_PWR_STATE(3);
10009 			if (orig != data)
10010 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
10011 
10012 #ifdef zMN_TODO
10013 			if (!disable_clkreq &&
10014 			    !pci_is_root_bus(rdev->pdev->bus)) {
10015 				struct pci_dev *root = rdev->pdev->bus->self;
10016 				u32 lnkcap;
10017 
10018 				clk_req_support = false;
10019 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
10020 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
10021 					clk_req_support = true;
10022 			} else {
10023 				clk_req_support = false;
10024 			}
10025 #else
10026 			clk_req_support = false;
10027 #endif
10028 
10029 			if (clk_req_support) {
10030 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
10031 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
10032 				if (orig != data)
10033 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
10034 
10035 				orig = data = RREG32_SMC(THM_CLK_CNTL);
10036 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
10037 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
10038 				if (orig != data)
10039 					WREG32_SMC(THM_CLK_CNTL, data);
10040 
10041 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
10042 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10043 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10044 				if (orig != data)
10045 					WREG32_SMC(MISC_CLK_CTRL, data);
10046 
10047 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10048 				data &= ~BCLK_AS_XCLK;
10049 				if (orig != data)
10050 					WREG32_SMC(CG_CLKPIN_CNTL, data);
10051 
10052 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10053 				data &= ~FORCE_BIF_REFCLK_EN;
10054 				if (orig != data)
10055 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10056 
10057 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10058 				data &= ~MPLL_CLKOUT_SEL_MASK;
10059 				data |= MPLL_CLKOUT_SEL(4);
10060 				if (orig != data)
10061 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10062 			}
10063 		}
10064 	} else {
10065 		if (orig != data)
10066 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10067 	}
10068 
10069 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10070 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10071 	if (orig != data)
10072 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
10073 
10074 	if (!disable_l0s) {
10075 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10076 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10077 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10078 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10079 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10080 				data &= ~LC_L0S_INACTIVITY_MASK;
10081 				if (orig != data)
10082 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10083 			}
10084 		}
10085 	}
10086 }
10087