1 /*	$NetBSD: radeon_cik.c,v 1.7 2022/10/17 03:05:32 mrg Exp $	*/
2 
3 /*
4  * Copyright 2012 Advanced Micro Devices, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Alex Deucher
25  */
26 
27 #include <sys/cdefs.h>
28 __KERNEL_RCSID(0, "$NetBSD: radeon_cik.c,v 1.7 2022/10/17 03:05:32 mrg Exp $");
29 
30 #include <linux/firmware.h>
31 #include <linux/module.h>
32 #include <linux/pci.h>
33 #include <linux/slab.h>
34 
35 #include <drm/drm_vblank.h>
36 
37 #include "atom.h"
38 #include "cik_blit_shaders.h"
39 #include "cikd.h"
40 #include "clearstate_ci.h"
41 #include "radeon.h"
42 #include "radeon_asic.h"
43 #include "radeon_audio.h"
44 #include "radeon_ucode.h"
45 
46 #define SH_MEM_CONFIG_GFX_DEFAULT \
47 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
48 
49 #include <linux/nbsd-namespace.h>
50 
51 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
57 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
58 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
59 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
60 
61 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
62 MODULE_FIRMWARE("radeon/bonaire_me.bin");
63 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
64 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
65 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
66 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
67 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
68 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
69 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
70 
71 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
72 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
73 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
74 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
75 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
76 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
77 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
78 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
79 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
80 
81 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
82 MODULE_FIRMWARE("radeon/hawaii_me.bin");
83 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
84 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
85 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
86 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
87 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
88 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
89 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
90 
91 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
93 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
94 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
95 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
99 MODULE_FIRMWARE("radeon/kaveri_me.bin");
100 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
101 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
102 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
103 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
104 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
105 
106 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
107 MODULE_FIRMWARE("radeon/KABINI_me.bin");
108 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
109 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
110 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
111 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
112 
113 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
114 MODULE_FIRMWARE("radeon/kabini_me.bin");
115 MODULE_FIRMWARE("radeon/kabini_ce.bin");
116 MODULE_FIRMWARE("radeon/kabini_mec.bin");
117 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
118 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
119 
120 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
121 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
122 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
123 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
124 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
125 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
126 
127 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
128 MODULE_FIRMWARE("radeon/mullins_me.bin");
129 MODULE_FIRMWARE("radeon/mullins_ce.bin");
130 MODULE_FIRMWARE("radeon/mullins_mec.bin");
131 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
132 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
133 
134 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
135 extern void r600_ih_ring_fini(struct radeon_device *rdev);
136 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
137 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
138 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
139 extern void sumo_rlc_fini(struct radeon_device *rdev);
140 extern int sumo_rlc_init(struct radeon_device *rdev);
141 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
142 extern void si_rlc_reset(struct radeon_device *rdev);
143 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
144 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
145 extern int cik_sdma_resume(struct radeon_device *rdev);
146 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
147 extern void cik_sdma_fini(struct radeon_device *rdev);
148 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
149 static void cik_rlc_stop(struct radeon_device *rdev);
150 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
151 static void cik_program_aspm(struct radeon_device *rdev);
152 static void cik_init_pg(struct radeon_device *rdev);
153 static void cik_init_cg(struct radeon_device *rdev);
154 static void cik_fini_pg(struct radeon_device *rdev);
155 static void cik_fini_cg(struct radeon_device *rdev);
156 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
157 					  bool enable);
158 
159 /**
160  * cik_get_allowed_info_register - fetch the register for the info ioctl
161  *
162  * @rdev: radeon_device pointer
163  * @reg: register offset in bytes
164  * @val: register value
165  *
166  * Returns 0 for success or -EINVAL for an invalid register
167  *
168  */
cik_get_allowed_info_register(struct radeon_device * rdev,u32 reg,u32 * val)169 int cik_get_allowed_info_register(struct radeon_device *rdev,
170 				  u32 reg, u32 *val)
171 {
172 	switch (reg) {
173 	case GRBM_STATUS:
174 	case GRBM_STATUS2:
175 	case GRBM_STATUS_SE0:
176 	case GRBM_STATUS_SE1:
177 	case GRBM_STATUS_SE2:
178 	case GRBM_STATUS_SE3:
179 	case SRBM_STATUS:
180 	case SRBM_STATUS2:
181 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
182 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
183 	case UVD_STATUS:
184 	/* TODO VCE */
185 		*val = RREG32(reg);
186 		return 0;
187 	default:
188 		return -EINVAL;
189 	}
190 }
191 
192 /*
193  * Indirect registers accessor
194  */
cik_didt_rreg(struct radeon_device * rdev,u32 reg)195 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
196 {
197 	unsigned long flags;
198 	u32 r;
199 
200 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
201 	WREG32(CIK_DIDT_IND_INDEX, (reg));
202 	r = RREG32(CIK_DIDT_IND_DATA);
203 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
204 	return r;
205 }
206 
cik_didt_wreg(struct radeon_device * rdev,u32 reg,u32 v)207 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
208 {
209 	unsigned long flags;
210 
211 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
212 	WREG32(CIK_DIDT_IND_INDEX, (reg));
213 	WREG32(CIK_DIDT_IND_DATA, (v));
214 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
215 }
216 
217 /* get temperature in millidegrees */
ci_get_temp(struct radeon_device * rdev)218 int ci_get_temp(struct radeon_device *rdev)
219 {
220 	u32 temp;
221 	int actual_temp = 0;
222 
223 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
224 		CTF_TEMP_SHIFT;
225 
226 	if (temp & 0x200)
227 		actual_temp = 255;
228 	else
229 		actual_temp = temp & 0x1ff;
230 
231 	return actual_temp * 1000;
232 }
233 
234 /* get temperature in millidegrees */
kv_get_temp(struct radeon_device * rdev)235 int kv_get_temp(struct radeon_device *rdev)
236 {
237 	u32 temp;
238 	int actual_temp = 0;
239 
240 	temp = RREG32_SMC(0xC0300E0C);
241 
242 	if (temp)
243 		actual_temp = (temp / 8) - 49;
244 	else
245 		actual_temp = 0;
246 
247 	return actual_temp * 1000;
248 }
249 
250 /*
251  * Indirect registers accessor
252  */
cik_pciep_rreg(struct radeon_device * rdev,u32 reg)253 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
254 {
255 	unsigned long flags;
256 	u32 r;
257 
258 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
259 	WREG32(PCIE_INDEX, reg);
260 	(void)RREG32(PCIE_INDEX);
261 	r = RREG32(PCIE_DATA);
262 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
263 	return r;
264 }
265 
cik_pciep_wreg(struct radeon_device * rdev,u32 reg,u32 v)266 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
267 {
268 	unsigned long flags;
269 
270 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
271 	WREG32(PCIE_INDEX, reg);
272 	(void)RREG32(PCIE_INDEX);
273 	WREG32(PCIE_DATA, v);
274 	(void)RREG32(PCIE_DATA);
275 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
276 }
277 
278 static const u32 spectre_rlc_save_restore_register_list[] =
279 {
280 	(0x0e00 << 16) | (0xc12c >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc140 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0xc150 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0xc15c >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0xc168 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0xc170 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0xc178 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0xc204 >> 2),
295 	0x00000000,
296 	(0x0e00 << 16) | (0xc2b4 >> 2),
297 	0x00000000,
298 	(0x0e00 << 16) | (0xc2b8 >> 2),
299 	0x00000000,
300 	(0x0e00 << 16) | (0xc2bc >> 2),
301 	0x00000000,
302 	(0x0e00 << 16) | (0xc2c0 >> 2),
303 	0x00000000,
304 	(0x0e00 << 16) | (0x8228 >> 2),
305 	0x00000000,
306 	(0x0e00 << 16) | (0x829c >> 2),
307 	0x00000000,
308 	(0x0e00 << 16) | (0x869c >> 2),
309 	0x00000000,
310 	(0x0600 << 16) | (0x98f4 >> 2),
311 	0x00000000,
312 	(0x0e00 << 16) | (0x98f8 >> 2),
313 	0x00000000,
314 	(0x0e00 << 16) | (0x9900 >> 2),
315 	0x00000000,
316 	(0x0e00 << 16) | (0xc260 >> 2),
317 	0x00000000,
318 	(0x0e00 << 16) | (0x90e8 >> 2),
319 	0x00000000,
320 	(0x0e00 << 16) | (0x3c000 >> 2),
321 	0x00000000,
322 	(0x0e00 << 16) | (0x3c00c >> 2),
323 	0x00000000,
324 	(0x0e00 << 16) | (0x8c1c >> 2),
325 	0x00000000,
326 	(0x0e00 << 16) | (0x9700 >> 2),
327 	0x00000000,
328 	(0x0e00 << 16) | (0xcd20 >> 2),
329 	0x00000000,
330 	(0x4e00 << 16) | (0xcd20 >> 2),
331 	0x00000000,
332 	(0x5e00 << 16) | (0xcd20 >> 2),
333 	0x00000000,
334 	(0x6e00 << 16) | (0xcd20 >> 2),
335 	0x00000000,
336 	(0x7e00 << 16) | (0xcd20 >> 2),
337 	0x00000000,
338 	(0x8e00 << 16) | (0xcd20 >> 2),
339 	0x00000000,
340 	(0x9e00 << 16) | (0xcd20 >> 2),
341 	0x00000000,
342 	(0xae00 << 16) | (0xcd20 >> 2),
343 	0x00000000,
344 	(0xbe00 << 16) | (0xcd20 >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0x89bc >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0x8900 >> 2),
349 	0x00000000,
350 	0x3,
351 	(0x0e00 << 16) | (0xc130 >> 2),
352 	0x00000000,
353 	(0x0e00 << 16) | (0xc134 >> 2),
354 	0x00000000,
355 	(0x0e00 << 16) | (0xc1fc >> 2),
356 	0x00000000,
357 	(0x0e00 << 16) | (0xc208 >> 2),
358 	0x00000000,
359 	(0x0e00 << 16) | (0xc264 >> 2),
360 	0x00000000,
361 	(0x0e00 << 16) | (0xc268 >> 2),
362 	0x00000000,
363 	(0x0e00 << 16) | (0xc26c >> 2),
364 	0x00000000,
365 	(0x0e00 << 16) | (0xc270 >> 2),
366 	0x00000000,
367 	(0x0e00 << 16) | (0xc274 >> 2),
368 	0x00000000,
369 	(0x0e00 << 16) | (0xc278 >> 2),
370 	0x00000000,
371 	(0x0e00 << 16) | (0xc27c >> 2),
372 	0x00000000,
373 	(0x0e00 << 16) | (0xc280 >> 2),
374 	0x00000000,
375 	(0x0e00 << 16) | (0xc284 >> 2),
376 	0x00000000,
377 	(0x0e00 << 16) | (0xc288 >> 2),
378 	0x00000000,
379 	(0x0e00 << 16) | (0xc28c >> 2),
380 	0x00000000,
381 	(0x0e00 << 16) | (0xc290 >> 2),
382 	0x00000000,
383 	(0x0e00 << 16) | (0xc294 >> 2),
384 	0x00000000,
385 	(0x0e00 << 16) | (0xc298 >> 2),
386 	0x00000000,
387 	(0x0e00 << 16) | (0xc29c >> 2),
388 	0x00000000,
389 	(0x0e00 << 16) | (0xc2a0 >> 2),
390 	0x00000000,
391 	(0x0e00 << 16) | (0xc2a4 >> 2),
392 	0x00000000,
393 	(0x0e00 << 16) | (0xc2a8 >> 2),
394 	0x00000000,
395 	(0x0e00 << 16) | (0xc2ac  >> 2),
396 	0x00000000,
397 	(0x0e00 << 16) | (0xc2b0 >> 2),
398 	0x00000000,
399 	(0x0e00 << 16) | (0x301d0 >> 2),
400 	0x00000000,
401 	(0x0e00 << 16) | (0x30238 >> 2),
402 	0x00000000,
403 	(0x0e00 << 16) | (0x30250 >> 2),
404 	0x00000000,
405 	(0x0e00 << 16) | (0x30254 >> 2),
406 	0x00000000,
407 	(0x0e00 << 16) | (0x30258 >> 2),
408 	0x00000000,
409 	(0x0e00 << 16) | (0x3025c >> 2),
410 	0x00000000,
411 	(0x4e00 << 16) | (0xc900 >> 2),
412 	0x00000000,
413 	(0x5e00 << 16) | (0xc900 >> 2),
414 	0x00000000,
415 	(0x6e00 << 16) | (0xc900 >> 2),
416 	0x00000000,
417 	(0x7e00 << 16) | (0xc900 >> 2),
418 	0x00000000,
419 	(0x8e00 << 16) | (0xc900 >> 2),
420 	0x00000000,
421 	(0x9e00 << 16) | (0xc900 >> 2),
422 	0x00000000,
423 	(0xae00 << 16) | (0xc900 >> 2),
424 	0x00000000,
425 	(0xbe00 << 16) | (0xc900 >> 2),
426 	0x00000000,
427 	(0x4e00 << 16) | (0xc904 >> 2),
428 	0x00000000,
429 	(0x5e00 << 16) | (0xc904 >> 2),
430 	0x00000000,
431 	(0x6e00 << 16) | (0xc904 >> 2),
432 	0x00000000,
433 	(0x7e00 << 16) | (0xc904 >> 2),
434 	0x00000000,
435 	(0x8e00 << 16) | (0xc904 >> 2),
436 	0x00000000,
437 	(0x9e00 << 16) | (0xc904 >> 2),
438 	0x00000000,
439 	(0xae00 << 16) | (0xc904 >> 2),
440 	0x00000000,
441 	(0xbe00 << 16) | (0xc904 >> 2),
442 	0x00000000,
443 	(0x4e00 << 16) | (0xc908 >> 2),
444 	0x00000000,
445 	(0x5e00 << 16) | (0xc908 >> 2),
446 	0x00000000,
447 	(0x6e00 << 16) | (0xc908 >> 2),
448 	0x00000000,
449 	(0x7e00 << 16) | (0xc908 >> 2),
450 	0x00000000,
451 	(0x8e00 << 16) | (0xc908 >> 2),
452 	0x00000000,
453 	(0x9e00 << 16) | (0xc908 >> 2),
454 	0x00000000,
455 	(0xae00 << 16) | (0xc908 >> 2),
456 	0x00000000,
457 	(0xbe00 << 16) | (0xc908 >> 2),
458 	0x00000000,
459 	(0x4e00 << 16) | (0xc90c >> 2),
460 	0x00000000,
461 	(0x5e00 << 16) | (0xc90c >> 2),
462 	0x00000000,
463 	(0x6e00 << 16) | (0xc90c >> 2),
464 	0x00000000,
465 	(0x7e00 << 16) | (0xc90c >> 2),
466 	0x00000000,
467 	(0x8e00 << 16) | (0xc90c >> 2),
468 	0x00000000,
469 	(0x9e00 << 16) | (0xc90c >> 2),
470 	0x00000000,
471 	(0xae00 << 16) | (0xc90c >> 2),
472 	0x00000000,
473 	(0xbe00 << 16) | (0xc90c >> 2),
474 	0x00000000,
475 	(0x4e00 << 16) | (0xc910 >> 2),
476 	0x00000000,
477 	(0x5e00 << 16) | (0xc910 >> 2),
478 	0x00000000,
479 	(0x6e00 << 16) | (0xc910 >> 2),
480 	0x00000000,
481 	(0x7e00 << 16) | (0xc910 >> 2),
482 	0x00000000,
483 	(0x8e00 << 16) | (0xc910 >> 2),
484 	0x00000000,
485 	(0x9e00 << 16) | (0xc910 >> 2),
486 	0x00000000,
487 	(0xae00 << 16) | (0xc910 >> 2),
488 	0x00000000,
489 	(0xbe00 << 16) | (0xc910 >> 2),
490 	0x00000000,
491 	(0x0e00 << 16) | (0xc99c >> 2),
492 	0x00000000,
493 	(0x0e00 << 16) | (0x9834 >> 2),
494 	0x00000000,
495 	(0x0000 << 16) | (0x30f00 >> 2),
496 	0x00000000,
497 	(0x0001 << 16) | (0x30f00 >> 2),
498 	0x00000000,
499 	(0x0000 << 16) | (0x30f04 >> 2),
500 	0x00000000,
501 	(0x0001 << 16) | (0x30f04 >> 2),
502 	0x00000000,
503 	(0x0000 << 16) | (0x30f08 >> 2),
504 	0x00000000,
505 	(0x0001 << 16) | (0x30f08 >> 2),
506 	0x00000000,
507 	(0x0000 << 16) | (0x30f0c >> 2),
508 	0x00000000,
509 	(0x0001 << 16) | (0x30f0c >> 2),
510 	0x00000000,
511 	(0x0600 << 16) | (0x9b7c >> 2),
512 	0x00000000,
513 	(0x0e00 << 16) | (0x8a14 >> 2),
514 	0x00000000,
515 	(0x0e00 << 16) | (0x8a18 >> 2),
516 	0x00000000,
517 	(0x0600 << 16) | (0x30a00 >> 2),
518 	0x00000000,
519 	(0x0e00 << 16) | (0x8bf0 >> 2),
520 	0x00000000,
521 	(0x0e00 << 16) | (0x8bcc >> 2),
522 	0x00000000,
523 	(0x0e00 << 16) | (0x8b24 >> 2),
524 	0x00000000,
525 	(0x0e00 << 16) | (0x30a04 >> 2),
526 	0x00000000,
527 	(0x0600 << 16) | (0x30a10 >> 2),
528 	0x00000000,
529 	(0x0600 << 16) | (0x30a14 >> 2),
530 	0x00000000,
531 	(0x0600 << 16) | (0x30a18 >> 2),
532 	0x00000000,
533 	(0x0600 << 16) | (0x30a2c >> 2),
534 	0x00000000,
535 	(0x0e00 << 16) | (0xc700 >> 2),
536 	0x00000000,
537 	(0x0e00 << 16) | (0xc704 >> 2),
538 	0x00000000,
539 	(0x0e00 << 16) | (0xc708 >> 2),
540 	0x00000000,
541 	(0x0e00 << 16) | (0xc768 >> 2),
542 	0x00000000,
543 	(0x0400 << 16) | (0xc770 >> 2),
544 	0x00000000,
545 	(0x0400 << 16) | (0xc774 >> 2),
546 	0x00000000,
547 	(0x0400 << 16) | (0xc778 >> 2),
548 	0x00000000,
549 	(0x0400 << 16) | (0xc77c >> 2),
550 	0x00000000,
551 	(0x0400 << 16) | (0xc780 >> 2),
552 	0x00000000,
553 	(0x0400 << 16) | (0xc784 >> 2),
554 	0x00000000,
555 	(0x0400 << 16) | (0xc788 >> 2),
556 	0x00000000,
557 	(0x0400 << 16) | (0xc78c >> 2),
558 	0x00000000,
559 	(0x0400 << 16) | (0xc798 >> 2),
560 	0x00000000,
561 	(0x0400 << 16) | (0xc79c >> 2),
562 	0x00000000,
563 	(0x0400 << 16) | (0xc7a0 >> 2),
564 	0x00000000,
565 	(0x0400 << 16) | (0xc7a4 >> 2),
566 	0x00000000,
567 	(0x0400 << 16) | (0xc7a8 >> 2),
568 	0x00000000,
569 	(0x0400 << 16) | (0xc7ac >> 2),
570 	0x00000000,
571 	(0x0400 << 16) | (0xc7b0 >> 2),
572 	0x00000000,
573 	(0x0400 << 16) | (0xc7b4 >> 2),
574 	0x00000000,
575 	(0x0e00 << 16) | (0x9100 >> 2),
576 	0x00000000,
577 	(0x0e00 << 16) | (0x3c010 >> 2),
578 	0x00000000,
579 	(0x0e00 << 16) | (0x92a8 >> 2),
580 	0x00000000,
581 	(0x0e00 << 16) | (0x92ac >> 2),
582 	0x00000000,
583 	(0x0e00 << 16) | (0x92b4 >> 2),
584 	0x00000000,
585 	(0x0e00 << 16) | (0x92b8 >> 2),
586 	0x00000000,
587 	(0x0e00 << 16) | (0x92bc >> 2),
588 	0x00000000,
589 	(0x0e00 << 16) | (0x92c0 >> 2),
590 	0x00000000,
591 	(0x0e00 << 16) | (0x92c4 >> 2),
592 	0x00000000,
593 	(0x0e00 << 16) | (0x92c8 >> 2),
594 	0x00000000,
595 	(0x0e00 << 16) | (0x92cc >> 2),
596 	0x00000000,
597 	(0x0e00 << 16) | (0x92d0 >> 2),
598 	0x00000000,
599 	(0x0e00 << 16) | (0x8c00 >> 2),
600 	0x00000000,
601 	(0x0e00 << 16) | (0x8c04 >> 2),
602 	0x00000000,
603 	(0x0e00 << 16) | (0x8c20 >> 2),
604 	0x00000000,
605 	(0x0e00 << 16) | (0x8c38 >> 2),
606 	0x00000000,
607 	(0x0e00 << 16) | (0x8c3c >> 2),
608 	0x00000000,
609 	(0x0e00 << 16) | (0xae00 >> 2),
610 	0x00000000,
611 	(0x0e00 << 16) | (0x9604 >> 2),
612 	0x00000000,
613 	(0x0e00 << 16) | (0xac08 >> 2),
614 	0x00000000,
615 	(0x0e00 << 16) | (0xac0c >> 2),
616 	0x00000000,
617 	(0x0e00 << 16) | (0xac10 >> 2),
618 	0x00000000,
619 	(0x0e00 << 16) | (0xac14 >> 2),
620 	0x00000000,
621 	(0x0e00 << 16) | (0xac58 >> 2),
622 	0x00000000,
623 	(0x0e00 << 16) | (0xac68 >> 2),
624 	0x00000000,
625 	(0x0e00 << 16) | (0xac6c >> 2),
626 	0x00000000,
627 	(0x0e00 << 16) | (0xac70 >> 2),
628 	0x00000000,
629 	(0x0e00 << 16) | (0xac74 >> 2),
630 	0x00000000,
631 	(0x0e00 << 16) | (0xac78 >> 2),
632 	0x00000000,
633 	(0x0e00 << 16) | (0xac7c >> 2),
634 	0x00000000,
635 	(0x0e00 << 16) | (0xac80 >> 2),
636 	0x00000000,
637 	(0x0e00 << 16) | (0xac84 >> 2),
638 	0x00000000,
639 	(0x0e00 << 16) | (0xac88 >> 2),
640 	0x00000000,
641 	(0x0e00 << 16) | (0xac8c >> 2),
642 	0x00000000,
643 	(0x0e00 << 16) | (0x970c >> 2),
644 	0x00000000,
645 	(0x0e00 << 16) | (0x9714 >> 2),
646 	0x00000000,
647 	(0x0e00 << 16) | (0x9718 >> 2),
648 	0x00000000,
649 	(0x0e00 << 16) | (0x971c >> 2),
650 	0x00000000,
651 	(0x0e00 << 16) | (0x31068 >> 2),
652 	0x00000000,
653 	(0x4e00 << 16) | (0x31068 >> 2),
654 	0x00000000,
655 	(0x5e00 << 16) | (0x31068 >> 2),
656 	0x00000000,
657 	(0x6e00 << 16) | (0x31068 >> 2),
658 	0x00000000,
659 	(0x7e00 << 16) | (0x31068 >> 2),
660 	0x00000000,
661 	(0x8e00 << 16) | (0x31068 >> 2),
662 	0x00000000,
663 	(0x9e00 << 16) | (0x31068 >> 2),
664 	0x00000000,
665 	(0xae00 << 16) | (0x31068 >> 2),
666 	0x00000000,
667 	(0xbe00 << 16) | (0x31068 >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0xcd10 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0xcd14 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0x88b0 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0x88b4 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0x88b8 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0x88bc >> 2),
680 	0x00000000,
681 	(0x0400 << 16) | (0x89c0 >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0x88c4 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0x88c8 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0x88d0 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0x88d4 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0x88d8 >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0x8980 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0x30938 >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0x3093c >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0x30940 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0x89a0 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0x30900 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0x30904 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0x89b4 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0x3c210 >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0x3c214 >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0x3c218 >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0x8904 >> 2),
716 	0x00000000,
717 	0x5,
718 	(0x0e00 << 16) | (0x8c28 >> 2),
719 	(0x0e00 << 16) | (0x8c2c >> 2),
720 	(0x0e00 << 16) | (0x8c30 >> 2),
721 	(0x0e00 << 16) | (0x8c34 >> 2),
722 	(0x0e00 << 16) | (0x9600 >> 2),
723 };
724 
725 static const u32 kalindi_rlc_save_restore_register_list[] =
726 {
727 	(0x0e00 << 16) | (0xc12c >> 2),
728 	0x00000000,
729 	(0x0e00 << 16) | (0xc140 >> 2),
730 	0x00000000,
731 	(0x0e00 << 16) | (0xc150 >> 2),
732 	0x00000000,
733 	(0x0e00 << 16) | (0xc15c >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0xc168 >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0xc170 >> 2),
738 	0x00000000,
739 	(0x0e00 << 16) | (0xc204 >> 2),
740 	0x00000000,
741 	(0x0e00 << 16) | (0xc2b4 >> 2),
742 	0x00000000,
743 	(0x0e00 << 16) | (0xc2b8 >> 2),
744 	0x00000000,
745 	(0x0e00 << 16) | (0xc2bc >> 2),
746 	0x00000000,
747 	(0x0e00 << 16) | (0xc2c0 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0x8228 >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x829c >> 2),
752 	0x00000000,
753 	(0x0e00 << 16) | (0x869c >> 2),
754 	0x00000000,
755 	(0x0600 << 16) | (0x98f4 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x98f8 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0x9900 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0xc260 >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0x90e8 >> 2),
764 	0x00000000,
765 	(0x0e00 << 16) | (0x3c000 >> 2),
766 	0x00000000,
767 	(0x0e00 << 16) | (0x3c00c >> 2),
768 	0x00000000,
769 	(0x0e00 << 16) | (0x8c1c >> 2),
770 	0x00000000,
771 	(0x0e00 << 16) | (0x9700 >> 2),
772 	0x00000000,
773 	(0x0e00 << 16) | (0xcd20 >> 2),
774 	0x00000000,
775 	(0x4e00 << 16) | (0xcd20 >> 2),
776 	0x00000000,
777 	(0x5e00 << 16) | (0xcd20 >> 2),
778 	0x00000000,
779 	(0x6e00 << 16) | (0xcd20 >> 2),
780 	0x00000000,
781 	(0x7e00 << 16) | (0xcd20 >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0x89bc >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0x8900 >> 2),
786 	0x00000000,
787 	0x3,
788 	(0x0e00 << 16) | (0xc130 >> 2),
789 	0x00000000,
790 	(0x0e00 << 16) | (0xc134 >> 2),
791 	0x00000000,
792 	(0x0e00 << 16) | (0xc1fc >> 2),
793 	0x00000000,
794 	(0x0e00 << 16) | (0xc208 >> 2),
795 	0x00000000,
796 	(0x0e00 << 16) | (0xc264 >> 2),
797 	0x00000000,
798 	(0x0e00 << 16) | (0xc268 >> 2),
799 	0x00000000,
800 	(0x0e00 << 16) | (0xc26c >> 2),
801 	0x00000000,
802 	(0x0e00 << 16) | (0xc270 >> 2),
803 	0x00000000,
804 	(0x0e00 << 16) | (0xc274 >> 2),
805 	0x00000000,
806 	(0x0e00 << 16) | (0xc28c >> 2),
807 	0x00000000,
808 	(0x0e00 << 16) | (0xc290 >> 2),
809 	0x00000000,
810 	(0x0e00 << 16) | (0xc294 >> 2),
811 	0x00000000,
812 	(0x0e00 << 16) | (0xc298 >> 2),
813 	0x00000000,
814 	(0x0e00 << 16) | (0xc2a0 >> 2),
815 	0x00000000,
816 	(0x0e00 << 16) | (0xc2a4 >> 2),
817 	0x00000000,
818 	(0x0e00 << 16) | (0xc2a8 >> 2),
819 	0x00000000,
820 	(0x0e00 << 16) | (0xc2ac >> 2),
821 	0x00000000,
822 	(0x0e00 << 16) | (0x301d0 >> 2),
823 	0x00000000,
824 	(0x0e00 << 16) | (0x30238 >> 2),
825 	0x00000000,
826 	(0x0e00 << 16) | (0x30250 >> 2),
827 	0x00000000,
828 	(0x0e00 << 16) | (0x30254 >> 2),
829 	0x00000000,
830 	(0x0e00 << 16) | (0x30258 >> 2),
831 	0x00000000,
832 	(0x0e00 << 16) | (0x3025c >> 2),
833 	0x00000000,
834 	(0x4e00 << 16) | (0xc900 >> 2),
835 	0x00000000,
836 	(0x5e00 << 16) | (0xc900 >> 2),
837 	0x00000000,
838 	(0x6e00 << 16) | (0xc900 >> 2),
839 	0x00000000,
840 	(0x7e00 << 16) | (0xc900 >> 2),
841 	0x00000000,
842 	(0x4e00 << 16) | (0xc904 >> 2),
843 	0x00000000,
844 	(0x5e00 << 16) | (0xc904 >> 2),
845 	0x00000000,
846 	(0x6e00 << 16) | (0xc904 >> 2),
847 	0x00000000,
848 	(0x7e00 << 16) | (0xc904 >> 2),
849 	0x00000000,
850 	(0x4e00 << 16) | (0xc908 >> 2),
851 	0x00000000,
852 	(0x5e00 << 16) | (0xc908 >> 2),
853 	0x00000000,
854 	(0x6e00 << 16) | (0xc908 >> 2),
855 	0x00000000,
856 	(0x7e00 << 16) | (0xc908 >> 2),
857 	0x00000000,
858 	(0x4e00 << 16) | (0xc90c >> 2),
859 	0x00000000,
860 	(0x5e00 << 16) | (0xc90c >> 2),
861 	0x00000000,
862 	(0x6e00 << 16) | (0xc90c >> 2),
863 	0x00000000,
864 	(0x7e00 << 16) | (0xc90c >> 2),
865 	0x00000000,
866 	(0x4e00 << 16) | (0xc910 >> 2),
867 	0x00000000,
868 	(0x5e00 << 16) | (0xc910 >> 2),
869 	0x00000000,
870 	(0x6e00 << 16) | (0xc910 >> 2),
871 	0x00000000,
872 	(0x7e00 << 16) | (0xc910 >> 2),
873 	0x00000000,
874 	(0x0e00 << 16) | (0xc99c >> 2),
875 	0x00000000,
876 	(0x0e00 << 16) | (0x9834 >> 2),
877 	0x00000000,
878 	(0x0000 << 16) | (0x30f00 >> 2),
879 	0x00000000,
880 	(0x0000 << 16) | (0x30f04 >> 2),
881 	0x00000000,
882 	(0x0000 << 16) | (0x30f08 >> 2),
883 	0x00000000,
884 	(0x0000 << 16) | (0x30f0c >> 2),
885 	0x00000000,
886 	(0x0600 << 16) | (0x9b7c >> 2),
887 	0x00000000,
888 	(0x0e00 << 16) | (0x8a14 >> 2),
889 	0x00000000,
890 	(0x0e00 << 16) | (0x8a18 >> 2),
891 	0x00000000,
892 	(0x0600 << 16) | (0x30a00 >> 2),
893 	0x00000000,
894 	(0x0e00 << 16) | (0x8bf0 >> 2),
895 	0x00000000,
896 	(0x0e00 << 16) | (0x8bcc >> 2),
897 	0x00000000,
898 	(0x0e00 << 16) | (0x8b24 >> 2),
899 	0x00000000,
900 	(0x0e00 << 16) | (0x30a04 >> 2),
901 	0x00000000,
902 	(0x0600 << 16) | (0x30a10 >> 2),
903 	0x00000000,
904 	(0x0600 << 16) | (0x30a14 >> 2),
905 	0x00000000,
906 	(0x0600 << 16) | (0x30a18 >> 2),
907 	0x00000000,
908 	(0x0600 << 16) | (0x30a2c >> 2),
909 	0x00000000,
910 	(0x0e00 << 16) | (0xc700 >> 2),
911 	0x00000000,
912 	(0x0e00 << 16) | (0xc704 >> 2),
913 	0x00000000,
914 	(0x0e00 << 16) | (0xc708 >> 2),
915 	0x00000000,
916 	(0x0e00 << 16) | (0xc768 >> 2),
917 	0x00000000,
918 	(0x0400 << 16) | (0xc770 >> 2),
919 	0x00000000,
920 	(0x0400 << 16) | (0xc774 >> 2),
921 	0x00000000,
922 	(0x0400 << 16) | (0xc798 >> 2),
923 	0x00000000,
924 	(0x0400 << 16) | (0xc79c >> 2),
925 	0x00000000,
926 	(0x0e00 << 16) | (0x9100 >> 2),
927 	0x00000000,
928 	(0x0e00 << 16) | (0x3c010 >> 2),
929 	0x00000000,
930 	(0x0e00 << 16) | (0x8c00 >> 2),
931 	0x00000000,
932 	(0x0e00 << 16) | (0x8c04 >> 2),
933 	0x00000000,
934 	(0x0e00 << 16) | (0x8c20 >> 2),
935 	0x00000000,
936 	(0x0e00 << 16) | (0x8c38 >> 2),
937 	0x00000000,
938 	(0x0e00 << 16) | (0x8c3c >> 2),
939 	0x00000000,
940 	(0x0e00 << 16) | (0xae00 >> 2),
941 	0x00000000,
942 	(0x0e00 << 16) | (0x9604 >> 2),
943 	0x00000000,
944 	(0x0e00 << 16) | (0xac08 >> 2),
945 	0x00000000,
946 	(0x0e00 << 16) | (0xac0c >> 2),
947 	0x00000000,
948 	(0x0e00 << 16) | (0xac10 >> 2),
949 	0x00000000,
950 	(0x0e00 << 16) | (0xac14 >> 2),
951 	0x00000000,
952 	(0x0e00 << 16) | (0xac58 >> 2),
953 	0x00000000,
954 	(0x0e00 << 16) | (0xac68 >> 2),
955 	0x00000000,
956 	(0x0e00 << 16) | (0xac6c >> 2),
957 	0x00000000,
958 	(0x0e00 << 16) | (0xac70 >> 2),
959 	0x00000000,
960 	(0x0e00 << 16) | (0xac74 >> 2),
961 	0x00000000,
962 	(0x0e00 << 16) | (0xac78 >> 2),
963 	0x00000000,
964 	(0x0e00 << 16) | (0xac7c >> 2),
965 	0x00000000,
966 	(0x0e00 << 16) | (0xac80 >> 2),
967 	0x00000000,
968 	(0x0e00 << 16) | (0xac84 >> 2),
969 	0x00000000,
970 	(0x0e00 << 16) | (0xac88 >> 2),
971 	0x00000000,
972 	(0x0e00 << 16) | (0xac8c >> 2),
973 	0x00000000,
974 	(0x0e00 << 16) | (0x970c >> 2),
975 	0x00000000,
976 	(0x0e00 << 16) | (0x9714 >> 2),
977 	0x00000000,
978 	(0x0e00 << 16) | (0x9718 >> 2),
979 	0x00000000,
980 	(0x0e00 << 16) | (0x971c >> 2),
981 	0x00000000,
982 	(0x0e00 << 16) | (0x31068 >> 2),
983 	0x00000000,
984 	(0x4e00 << 16) | (0x31068 >> 2),
985 	0x00000000,
986 	(0x5e00 << 16) | (0x31068 >> 2),
987 	0x00000000,
988 	(0x6e00 << 16) | (0x31068 >> 2),
989 	0x00000000,
990 	(0x7e00 << 16) | (0x31068 >> 2),
991 	0x00000000,
992 	(0x0e00 << 16) | (0xcd10 >> 2),
993 	0x00000000,
994 	(0x0e00 << 16) | (0xcd14 >> 2),
995 	0x00000000,
996 	(0x0e00 << 16) | (0x88b0 >> 2),
997 	0x00000000,
998 	(0x0e00 << 16) | (0x88b4 >> 2),
999 	0x00000000,
1000 	(0x0e00 << 16) | (0x88b8 >> 2),
1001 	0x00000000,
1002 	(0x0e00 << 16) | (0x88bc >> 2),
1003 	0x00000000,
1004 	(0x0400 << 16) | (0x89c0 >> 2),
1005 	0x00000000,
1006 	(0x0e00 << 16) | (0x88c4 >> 2),
1007 	0x00000000,
1008 	(0x0e00 << 16) | (0x88c8 >> 2),
1009 	0x00000000,
1010 	(0x0e00 << 16) | (0x88d0 >> 2),
1011 	0x00000000,
1012 	(0x0e00 << 16) | (0x88d4 >> 2),
1013 	0x00000000,
1014 	(0x0e00 << 16) | (0x88d8 >> 2),
1015 	0x00000000,
1016 	(0x0e00 << 16) | (0x8980 >> 2),
1017 	0x00000000,
1018 	(0x0e00 << 16) | (0x30938 >> 2),
1019 	0x00000000,
1020 	(0x0e00 << 16) | (0x3093c >> 2),
1021 	0x00000000,
1022 	(0x0e00 << 16) | (0x30940 >> 2),
1023 	0x00000000,
1024 	(0x0e00 << 16) | (0x89a0 >> 2),
1025 	0x00000000,
1026 	(0x0e00 << 16) | (0x30900 >> 2),
1027 	0x00000000,
1028 	(0x0e00 << 16) | (0x30904 >> 2),
1029 	0x00000000,
1030 	(0x0e00 << 16) | (0x89b4 >> 2),
1031 	0x00000000,
1032 	(0x0e00 << 16) | (0x3e1fc >> 2),
1033 	0x00000000,
1034 	(0x0e00 << 16) | (0x3c210 >> 2),
1035 	0x00000000,
1036 	(0x0e00 << 16) | (0x3c214 >> 2),
1037 	0x00000000,
1038 	(0x0e00 << 16) | (0x3c218 >> 2),
1039 	0x00000000,
1040 	(0x0e00 << 16) | (0x8904 >> 2),
1041 	0x00000000,
1042 	0x5,
1043 	(0x0e00 << 16) | (0x8c28 >> 2),
1044 	(0x0e00 << 16) | (0x8c2c >> 2),
1045 	(0x0e00 << 16) | (0x8c30 >> 2),
1046 	(0x0e00 << 16) | (0x8c34 >> 2),
1047 	(0x0e00 << 16) | (0x9600 >> 2),
1048 };
1049 
1050 static const u32 bonaire_golden_spm_registers[] =
1051 {
1052 	0x30800, 0xe0ffffff, 0xe0000000
1053 };
1054 
1055 static const u32 bonaire_golden_common_registers[] =
1056 {
1057 	0xc770, 0xffffffff, 0x00000800,
1058 	0xc774, 0xffffffff, 0x00000800,
1059 	0xc798, 0xffffffff, 0x00007fbf,
1060 	0xc79c, 0xffffffff, 0x00007faf
1061 };
1062 
1063 static const u32 bonaire_golden_registers[] =
1064 {
1065 	0x3354, 0x00000333, 0x00000333,
1066 	0x3350, 0x000c0fc0, 0x00040200,
1067 	0x9a10, 0x00010000, 0x00058208,
1068 	0x3c000, 0xffff1fff, 0x00140000,
1069 	0x3c200, 0xfdfc0fff, 0x00000100,
1070 	0x3c234, 0x40000000, 0x40000200,
1071 	0x9830, 0xffffffff, 0x00000000,
1072 	0x9834, 0xf00fffff, 0x00000400,
1073 	0x9838, 0x0002021c, 0x00020200,
1074 	0xc78, 0x00000080, 0x00000000,
1075 	0x5bb0, 0x000000f0, 0x00000070,
1076 	0x5bc0, 0xf0311fff, 0x80300000,
1077 	0x98f8, 0x73773777, 0x12010001,
1078 	0x350c, 0x00810000, 0x408af000,
1079 	0x7030, 0x31000111, 0x00000011,
1080 	0x2f48, 0x73773777, 0x12010001,
1081 	0x220c, 0x00007fb6, 0x0021a1b1,
1082 	0x2210, 0x00007fb6, 0x002021b1,
1083 	0x2180, 0x00007fb6, 0x00002191,
1084 	0x2218, 0x00007fb6, 0x002121b1,
1085 	0x221c, 0x00007fb6, 0x002021b1,
1086 	0x21dc, 0x00007fb6, 0x00002191,
1087 	0x21e0, 0x00007fb6, 0x00002191,
1088 	0x3628, 0x0000003f, 0x0000000a,
1089 	0x362c, 0x0000003f, 0x0000000a,
1090 	0x2ae4, 0x00073ffe, 0x000022a2,
1091 	0x240c, 0x000007ff, 0x00000000,
1092 	0x8a14, 0xf000003f, 0x00000007,
1093 	0x8bf0, 0x00002001, 0x00000001,
1094 	0x8b24, 0xffffffff, 0x00ffffff,
1095 	0x30a04, 0x0000ff0f, 0x00000000,
1096 	0x28a4c, 0x07ffffff, 0x06000000,
1097 	0x4d8, 0x00000fff, 0x00000100,
1098 	0x3e78, 0x00000001, 0x00000002,
1099 	0x9100, 0x03000000, 0x0362c688,
1100 	0x8c00, 0x000000ff, 0x00000001,
1101 	0xe40, 0x00001fff, 0x00001fff,
1102 	0x9060, 0x0000007f, 0x00000020,
1103 	0x9508, 0x00010000, 0x00010000,
1104 	0xac14, 0x000003ff, 0x000000f3,
1105 	0xac0c, 0xffffffff, 0x00001032
1106 };
1107 
1108 static const u32 bonaire_mgcg_cgcg_init[] =
1109 {
1110 	0xc420, 0xffffffff, 0xfffffffc,
1111 	0x30800, 0xffffffff, 0xe0000000,
1112 	0x3c2a0, 0xffffffff, 0x00000100,
1113 	0x3c208, 0xffffffff, 0x00000100,
1114 	0x3c2c0, 0xffffffff, 0xc0000100,
1115 	0x3c2c8, 0xffffffff, 0xc0000100,
1116 	0x3c2c4, 0xffffffff, 0xc0000100,
1117 	0x55e4, 0xffffffff, 0x00600100,
1118 	0x3c280, 0xffffffff, 0x00000100,
1119 	0x3c214, 0xffffffff, 0x06000100,
1120 	0x3c220, 0xffffffff, 0x00000100,
1121 	0x3c218, 0xffffffff, 0x06000100,
1122 	0x3c204, 0xffffffff, 0x00000100,
1123 	0x3c2e0, 0xffffffff, 0x00000100,
1124 	0x3c224, 0xffffffff, 0x00000100,
1125 	0x3c200, 0xffffffff, 0x00000100,
1126 	0x3c230, 0xffffffff, 0x00000100,
1127 	0x3c234, 0xffffffff, 0x00000100,
1128 	0x3c250, 0xffffffff, 0x00000100,
1129 	0x3c254, 0xffffffff, 0x00000100,
1130 	0x3c258, 0xffffffff, 0x00000100,
1131 	0x3c25c, 0xffffffff, 0x00000100,
1132 	0x3c260, 0xffffffff, 0x00000100,
1133 	0x3c27c, 0xffffffff, 0x00000100,
1134 	0x3c278, 0xffffffff, 0x00000100,
1135 	0x3c210, 0xffffffff, 0x06000100,
1136 	0x3c290, 0xffffffff, 0x00000100,
1137 	0x3c274, 0xffffffff, 0x00000100,
1138 	0x3c2b4, 0xffffffff, 0x00000100,
1139 	0x3c2b0, 0xffffffff, 0x00000100,
1140 	0x3c270, 0xffffffff, 0x00000100,
1141 	0x30800, 0xffffffff, 0xe0000000,
1142 	0x3c020, 0xffffffff, 0x00010000,
1143 	0x3c024, 0xffffffff, 0x00030002,
1144 	0x3c028, 0xffffffff, 0x00040007,
1145 	0x3c02c, 0xffffffff, 0x00060005,
1146 	0x3c030, 0xffffffff, 0x00090008,
1147 	0x3c034, 0xffffffff, 0x00010000,
1148 	0x3c038, 0xffffffff, 0x00030002,
1149 	0x3c03c, 0xffffffff, 0x00040007,
1150 	0x3c040, 0xffffffff, 0x00060005,
1151 	0x3c044, 0xffffffff, 0x00090008,
1152 	0x3c048, 0xffffffff, 0x00010000,
1153 	0x3c04c, 0xffffffff, 0x00030002,
1154 	0x3c050, 0xffffffff, 0x00040007,
1155 	0x3c054, 0xffffffff, 0x00060005,
1156 	0x3c058, 0xffffffff, 0x00090008,
1157 	0x3c05c, 0xffffffff, 0x00010000,
1158 	0x3c060, 0xffffffff, 0x00030002,
1159 	0x3c064, 0xffffffff, 0x00040007,
1160 	0x3c068, 0xffffffff, 0x00060005,
1161 	0x3c06c, 0xffffffff, 0x00090008,
1162 	0x3c070, 0xffffffff, 0x00010000,
1163 	0x3c074, 0xffffffff, 0x00030002,
1164 	0x3c078, 0xffffffff, 0x00040007,
1165 	0x3c07c, 0xffffffff, 0x00060005,
1166 	0x3c080, 0xffffffff, 0x00090008,
1167 	0x3c084, 0xffffffff, 0x00010000,
1168 	0x3c088, 0xffffffff, 0x00030002,
1169 	0x3c08c, 0xffffffff, 0x00040007,
1170 	0x3c090, 0xffffffff, 0x00060005,
1171 	0x3c094, 0xffffffff, 0x00090008,
1172 	0x3c098, 0xffffffff, 0x00010000,
1173 	0x3c09c, 0xffffffff, 0x00030002,
1174 	0x3c0a0, 0xffffffff, 0x00040007,
1175 	0x3c0a4, 0xffffffff, 0x00060005,
1176 	0x3c0a8, 0xffffffff, 0x00090008,
1177 	0x3c000, 0xffffffff, 0x96e00200,
1178 	0x8708, 0xffffffff, 0x00900100,
1179 	0xc424, 0xffffffff, 0x0020003f,
1180 	0x38, 0xffffffff, 0x0140001c,
1181 	0x3c, 0x000f0000, 0x000f0000,
1182 	0x220, 0xffffffff, 0xC060000C,
1183 	0x224, 0xc0000fff, 0x00000100,
1184 	0xf90, 0xffffffff, 0x00000100,
1185 	0xf98, 0x00000101, 0x00000000,
1186 	0x20a8, 0xffffffff, 0x00000104,
1187 	0x55e4, 0xff000fff, 0x00000100,
1188 	0x30cc, 0xc0000fff, 0x00000104,
1189 	0xc1e4, 0x00000001, 0x00000001,
1190 	0xd00c, 0xff000ff0, 0x00000100,
1191 	0xd80c, 0xff000ff0, 0x00000100
1192 };
1193 
1194 static const u32 spectre_golden_spm_registers[] =
1195 {
1196 	0x30800, 0xe0ffffff, 0xe0000000
1197 };
1198 
1199 static const u32 spectre_golden_common_registers[] =
1200 {
1201 	0xc770, 0xffffffff, 0x00000800,
1202 	0xc774, 0xffffffff, 0x00000800,
1203 	0xc798, 0xffffffff, 0x00007fbf,
1204 	0xc79c, 0xffffffff, 0x00007faf
1205 };
1206 
1207 static const u32 spectre_golden_registers[] =
1208 {
1209 	0x3c000, 0xffff1fff, 0x96940200,
1210 	0x3c00c, 0xffff0001, 0xff000000,
1211 	0x3c200, 0xfffc0fff, 0x00000100,
1212 	0x6ed8, 0x00010101, 0x00010000,
1213 	0x9834, 0xf00fffff, 0x00000400,
1214 	0x9838, 0xfffffffc, 0x00020200,
1215 	0x5bb0, 0x000000f0, 0x00000070,
1216 	0x5bc0, 0xf0311fff, 0x80300000,
1217 	0x98f8, 0x73773777, 0x12010001,
1218 	0x9b7c, 0x00ff0000, 0x00fc0000,
1219 	0x2f48, 0x73773777, 0x12010001,
1220 	0x8a14, 0xf000003f, 0x00000007,
1221 	0x8b24, 0xffffffff, 0x00ffffff,
1222 	0x28350, 0x3f3f3fff, 0x00000082,
1223 	0x28354, 0x0000003f, 0x00000000,
1224 	0x3e78, 0x00000001, 0x00000002,
1225 	0x913c, 0xffff03df, 0x00000004,
1226 	0xc768, 0x00000008, 0x00000008,
1227 	0x8c00, 0x000008ff, 0x00000800,
1228 	0x9508, 0x00010000, 0x00010000,
1229 	0xac0c, 0xffffffff, 0x54763210,
1230 	0x214f8, 0x01ff01ff, 0x00000002,
1231 	0x21498, 0x007ff800, 0x00200000,
1232 	0x2015c, 0xffffffff, 0x00000f40,
1233 	0x30934, 0xffffffff, 0x00000001
1234 };
1235 
1236 static const u32 spectre_mgcg_cgcg_init[] =
1237 {
1238 	0xc420, 0xffffffff, 0xfffffffc,
1239 	0x30800, 0xffffffff, 0xe0000000,
1240 	0x3c2a0, 0xffffffff, 0x00000100,
1241 	0x3c208, 0xffffffff, 0x00000100,
1242 	0x3c2c0, 0xffffffff, 0x00000100,
1243 	0x3c2c8, 0xffffffff, 0x00000100,
1244 	0x3c2c4, 0xffffffff, 0x00000100,
1245 	0x55e4, 0xffffffff, 0x00600100,
1246 	0x3c280, 0xffffffff, 0x00000100,
1247 	0x3c214, 0xffffffff, 0x06000100,
1248 	0x3c220, 0xffffffff, 0x00000100,
1249 	0x3c218, 0xffffffff, 0x06000100,
1250 	0x3c204, 0xffffffff, 0x00000100,
1251 	0x3c2e0, 0xffffffff, 0x00000100,
1252 	0x3c224, 0xffffffff, 0x00000100,
1253 	0x3c200, 0xffffffff, 0x00000100,
1254 	0x3c230, 0xffffffff, 0x00000100,
1255 	0x3c234, 0xffffffff, 0x00000100,
1256 	0x3c250, 0xffffffff, 0x00000100,
1257 	0x3c254, 0xffffffff, 0x00000100,
1258 	0x3c258, 0xffffffff, 0x00000100,
1259 	0x3c25c, 0xffffffff, 0x00000100,
1260 	0x3c260, 0xffffffff, 0x00000100,
1261 	0x3c27c, 0xffffffff, 0x00000100,
1262 	0x3c278, 0xffffffff, 0x00000100,
1263 	0x3c210, 0xffffffff, 0x06000100,
1264 	0x3c290, 0xffffffff, 0x00000100,
1265 	0x3c274, 0xffffffff, 0x00000100,
1266 	0x3c2b4, 0xffffffff, 0x00000100,
1267 	0x3c2b0, 0xffffffff, 0x00000100,
1268 	0x3c270, 0xffffffff, 0x00000100,
1269 	0x30800, 0xffffffff, 0xe0000000,
1270 	0x3c020, 0xffffffff, 0x00010000,
1271 	0x3c024, 0xffffffff, 0x00030002,
1272 	0x3c028, 0xffffffff, 0x00040007,
1273 	0x3c02c, 0xffffffff, 0x00060005,
1274 	0x3c030, 0xffffffff, 0x00090008,
1275 	0x3c034, 0xffffffff, 0x00010000,
1276 	0x3c038, 0xffffffff, 0x00030002,
1277 	0x3c03c, 0xffffffff, 0x00040007,
1278 	0x3c040, 0xffffffff, 0x00060005,
1279 	0x3c044, 0xffffffff, 0x00090008,
1280 	0x3c048, 0xffffffff, 0x00010000,
1281 	0x3c04c, 0xffffffff, 0x00030002,
1282 	0x3c050, 0xffffffff, 0x00040007,
1283 	0x3c054, 0xffffffff, 0x00060005,
1284 	0x3c058, 0xffffffff, 0x00090008,
1285 	0x3c05c, 0xffffffff, 0x00010000,
1286 	0x3c060, 0xffffffff, 0x00030002,
1287 	0x3c064, 0xffffffff, 0x00040007,
1288 	0x3c068, 0xffffffff, 0x00060005,
1289 	0x3c06c, 0xffffffff, 0x00090008,
1290 	0x3c070, 0xffffffff, 0x00010000,
1291 	0x3c074, 0xffffffff, 0x00030002,
1292 	0x3c078, 0xffffffff, 0x00040007,
1293 	0x3c07c, 0xffffffff, 0x00060005,
1294 	0x3c080, 0xffffffff, 0x00090008,
1295 	0x3c084, 0xffffffff, 0x00010000,
1296 	0x3c088, 0xffffffff, 0x00030002,
1297 	0x3c08c, 0xffffffff, 0x00040007,
1298 	0x3c090, 0xffffffff, 0x00060005,
1299 	0x3c094, 0xffffffff, 0x00090008,
1300 	0x3c098, 0xffffffff, 0x00010000,
1301 	0x3c09c, 0xffffffff, 0x00030002,
1302 	0x3c0a0, 0xffffffff, 0x00040007,
1303 	0x3c0a4, 0xffffffff, 0x00060005,
1304 	0x3c0a8, 0xffffffff, 0x00090008,
1305 	0x3c0ac, 0xffffffff, 0x00010000,
1306 	0x3c0b0, 0xffffffff, 0x00030002,
1307 	0x3c0b4, 0xffffffff, 0x00040007,
1308 	0x3c0b8, 0xffffffff, 0x00060005,
1309 	0x3c0bc, 0xffffffff, 0x00090008,
1310 	0x3c000, 0xffffffff, 0x96e00200,
1311 	0x8708, 0xffffffff, 0x00900100,
1312 	0xc424, 0xffffffff, 0x0020003f,
1313 	0x38, 0xffffffff, 0x0140001c,
1314 	0x3c, 0x000f0000, 0x000f0000,
1315 	0x220, 0xffffffff, 0xC060000C,
1316 	0x224, 0xc0000fff, 0x00000100,
1317 	0xf90, 0xffffffff, 0x00000100,
1318 	0xf98, 0x00000101, 0x00000000,
1319 	0x20a8, 0xffffffff, 0x00000104,
1320 	0x55e4, 0xff000fff, 0x00000100,
1321 	0x30cc, 0xc0000fff, 0x00000104,
1322 	0xc1e4, 0x00000001, 0x00000001,
1323 	0xd00c, 0xff000ff0, 0x00000100,
1324 	0xd80c, 0xff000ff0, 0x00000100
1325 };
1326 
1327 static const u32 kalindi_golden_spm_registers[] =
1328 {
1329 	0x30800, 0xe0ffffff, 0xe0000000
1330 };
1331 
1332 static const u32 kalindi_golden_common_registers[] =
1333 {
1334 	0xc770, 0xffffffff, 0x00000800,
1335 	0xc774, 0xffffffff, 0x00000800,
1336 	0xc798, 0xffffffff, 0x00007fbf,
1337 	0xc79c, 0xffffffff, 0x00007faf
1338 };
1339 
1340 static const u32 kalindi_golden_registers[] =
1341 {
1342 	0x3c000, 0xffffdfff, 0x6e944040,
1343 	0x55e4, 0xff607fff, 0xfc000100,
1344 	0x3c220, 0xff000fff, 0x00000100,
1345 	0x3c224, 0xff000fff, 0x00000100,
1346 	0x3c200, 0xfffc0fff, 0x00000100,
1347 	0x6ed8, 0x00010101, 0x00010000,
1348 	0x9830, 0xffffffff, 0x00000000,
1349 	0x9834, 0xf00fffff, 0x00000400,
1350 	0x5bb0, 0x000000f0, 0x00000070,
1351 	0x5bc0, 0xf0311fff, 0x80300000,
1352 	0x98f8, 0x73773777, 0x12010001,
1353 	0x98fc, 0xffffffff, 0x00000010,
1354 	0x9b7c, 0x00ff0000, 0x00fc0000,
1355 	0x8030, 0x00001f0f, 0x0000100a,
1356 	0x2f48, 0x73773777, 0x12010001,
1357 	0x2408, 0x000fffff, 0x000c007f,
1358 	0x8a14, 0xf000003f, 0x00000007,
1359 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1360 	0x30a04, 0x0000ff0f, 0x00000000,
1361 	0x28a4c, 0x07ffffff, 0x06000000,
1362 	0x4d8, 0x00000fff, 0x00000100,
1363 	0x3e78, 0x00000001, 0x00000002,
1364 	0xc768, 0x00000008, 0x00000008,
1365 	0x8c00, 0x000000ff, 0x00000003,
1366 	0x214f8, 0x01ff01ff, 0x00000002,
1367 	0x21498, 0x007ff800, 0x00200000,
1368 	0x2015c, 0xffffffff, 0x00000f40,
1369 	0x88c4, 0x001f3ae3, 0x00000082,
1370 	0x88d4, 0x0000001f, 0x00000010,
1371 	0x30934, 0xffffffff, 0x00000000
1372 };
1373 
1374 static const u32 kalindi_mgcg_cgcg_init[] =
1375 {
1376 	0xc420, 0xffffffff, 0xfffffffc,
1377 	0x30800, 0xffffffff, 0xe0000000,
1378 	0x3c2a0, 0xffffffff, 0x00000100,
1379 	0x3c208, 0xffffffff, 0x00000100,
1380 	0x3c2c0, 0xffffffff, 0x00000100,
1381 	0x3c2c8, 0xffffffff, 0x00000100,
1382 	0x3c2c4, 0xffffffff, 0x00000100,
1383 	0x55e4, 0xffffffff, 0x00600100,
1384 	0x3c280, 0xffffffff, 0x00000100,
1385 	0x3c214, 0xffffffff, 0x06000100,
1386 	0x3c220, 0xffffffff, 0x00000100,
1387 	0x3c218, 0xffffffff, 0x06000100,
1388 	0x3c204, 0xffffffff, 0x00000100,
1389 	0x3c2e0, 0xffffffff, 0x00000100,
1390 	0x3c224, 0xffffffff, 0x00000100,
1391 	0x3c200, 0xffffffff, 0x00000100,
1392 	0x3c230, 0xffffffff, 0x00000100,
1393 	0x3c234, 0xffffffff, 0x00000100,
1394 	0x3c250, 0xffffffff, 0x00000100,
1395 	0x3c254, 0xffffffff, 0x00000100,
1396 	0x3c258, 0xffffffff, 0x00000100,
1397 	0x3c25c, 0xffffffff, 0x00000100,
1398 	0x3c260, 0xffffffff, 0x00000100,
1399 	0x3c27c, 0xffffffff, 0x00000100,
1400 	0x3c278, 0xffffffff, 0x00000100,
1401 	0x3c210, 0xffffffff, 0x06000100,
1402 	0x3c290, 0xffffffff, 0x00000100,
1403 	0x3c274, 0xffffffff, 0x00000100,
1404 	0x3c2b4, 0xffffffff, 0x00000100,
1405 	0x3c2b0, 0xffffffff, 0x00000100,
1406 	0x3c270, 0xffffffff, 0x00000100,
1407 	0x30800, 0xffffffff, 0xe0000000,
1408 	0x3c020, 0xffffffff, 0x00010000,
1409 	0x3c024, 0xffffffff, 0x00030002,
1410 	0x3c028, 0xffffffff, 0x00040007,
1411 	0x3c02c, 0xffffffff, 0x00060005,
1412 	0x3c030, 0xffffffff, 0x00090008,
1413 	0x3c034, 0xffffffff, 0x00010000,
1414 	0x3c038, 0xffffffff, 0x00030002,
1415 	0x3c03c, 0xffffffff, 0x00040007,
1416 	0x3c040, 0xffffffff, 0x00060005,
1417 	0x3c044, 0xffffffff, 0x00090008,
1418 	0x3c000, 0xffffffff, 0x96e00200,
1419 	0x8708, 0xffffffff, 0x00900100,
1420 	0xc424, 0xffffffff, 0x0020003f,
1421 	0x38, 0xffffffff, 0x0140001c,
1422 	0x3c, 0x000f0000, 0x000f0000,
1423 	0x220, 0xffffffff, 0xC060000C,
1424 	0x224, 0xc0000fff, 0x00000100,
1425 	0x20a8, 0xffffffff, 0x00000104,
1426 	0x55e4, 0xff000fff, 0x00000100,
1427 	0x30cc, 0xc0000fff, 0x00000104,
1428 	0xc1e4, 0x00000001, 0x00000001,
1429 	0xd00c, 0xff000ff0, 0x00000100,
1430 	0xd80c, 0xff000ff0, 0x00000100
1431 };
1432 
1433 static const u32 hawaii_golden_spm_registers[] =
1434 {
1435 	0x30800, 0xe0ffffff, 0xe0000000
1436 };
1437 
1438 static const u32 hawaii_golden_common_registers[] =
1439 {
1440 	0x30800, 0xffffffff, 0xe0000000,
1441 	0x28350, 0xffffffff, 0x3a00161a,
1442 	0x28354, 0xffffffff, 0x0000002e,
1443 	0x9a10, 0xffffffff, 0x00018208,
1444 	0x98f8, 0xffffffff, 0x12011003
1445 };
1446 
1447 static const u32 hawaii_golden_registers[] =
1448 {
1449 	0x3354, 0x00000333, 0x00000333,
1450 	0x9a10, 0x00010000, 0x00058208,
1451 	0x9830, 0xffffffff, 0x00000000,
1452 	0x9834, 0xf00fffff, 0x00000400,
1453 	0x9838, 0x0002021c, 0x00020200,
1454 	0xc78, 0x00000080, 0x00000000,
1455 	0x5bb0, 0x000000f0, 0x00000070,
1456 	0x5bc0, 0xf0311fff, 0x80300000,
1457 	0x350c, 0x00810000, 0x408af000,
1458 	0x7030, 0x31000111, 0x00000011,
1459 	0x2f48, 0x73773777, 0x12010001,
1460 	0x2120, 0x0000007f, 0x0000001b,
1461 	0x21dc, 0x00007fb6, 0x00002191,
1462 	0x3628, 0x0000003f, 0x0000000a,
1463 	0x362c, 0x0000003f, 0x0000000a,
1464 	0x2ae4, 0x00073ffe, 0x000022a2,
1465 	0x240c, 0x000007ff, 0x00000000,
1466 	0x8bf0, 0x00002001, 0x00000001,
1467 	0x8b24, 0xffffffff, 0x00ffffff,
1468 	0x30a04, 0x0000ff0f, 0x00000000,
1469 	0x28a4c, 0x07ffffff, 0x06000000,
1470 	0x3e78, 0x00000001, 0x00000002,
1471 	0xc768, 0x00000008, 0x00000008,
1472 	0xc770, 0x00000f00, 0x00000800,
1473 	0xc774, 0x00000f00, 0x00000800,
1474 	0xc798, 0x00ffffff, 0x00ff7fbf,
1475 	0xc79c, 0x00ffffff, 0x00ff7faf,
1476 	0x8c00, 0x000000ff, 0x00000800,
1477 	0xe40, 0x00001fff, 0x00001fff,
1478 	0x9060, 0x0000007f, 0x00000020,
1479 	0x9508, 0x00010000, 0x00010000,
1480 	0xae00, 0x00100000, 0x000ff07c,
1481 	0xac14, 0x000003ff, 0x0000000f,
1482 	0xac10, 0xffffffff, 0x7564fdec,
1483 	0xac0c, 0xffffffff, 0x3120b9a8,
1484 	0xac08, 0x20000000, 0x0f9c0000
1485 };
1486 
1487 static const u32 hawaii_mgcg_cgcg_init[] =
1488 {
1489 	0xc420, 0xffffffff, 0xfffffffd,
1490 	0x30800, 0xffffffff, 0xe0000000,
1491 	0x3c2a0, 0xffffffff, 0x00000100,
1492 	0x3c208, 0xffffffff, 0x00000100,
1493 	0x3c2c0, 0xffffffff, 0x00000100,
1494 	0x3c2c8, 0xffffffff, 0x00000100,
1495 	0x3c2c4, 0xffffffff, 0x00000100,
1496 	0x55e4, 0xffffffff, 0x00200100,
1497 	0x3c280, 0xffffffff, 0x00000100,
1498 	0x3c214, 0xffffffff, 0x06000100,
1499 	0x3c220, 0xffffffff, 0x00000100,
1500 	0x3c218, 0xffffffff, 0x06000100,
1501 	0x3c204, 0xffffffff, 0x00000100,
1502 	0x3c2e0, 0xffffffff, 0x00000100,
1503 	0x3c224, 0xffffffff, 0x00000100,
1504 	0x3c200, 0xffffffff, 0x00000100,
1505 	0x3c230, 0xffffffff, 0x00000100,
1506 	0x3c234, 0xffffffff, 0x00000100,
1507 	0x3c250, 0xffffffff, 0x00000100,
1508 	0x3c254, 0xffffffff, 0x00000100,
1509 	0x3c258, 0xffffffff, 0x00000100,
1510 	0x3c25c, 0xffffffff, 0x00000100,
1511 	0x3c260, 0xffffffff, 0x00000100,
1512 	0x3c27c, 0xffffffff, 0x00000100,
1513 	0x3c278, 0xffffffff, 0x00000100,
1514 	0x3c210, 0xffffffff, 0x06000100,
1515 	0x3c290, 0xffffffff, 0x00000100,
1516 	0x3c274, 0xffffffff, 0x00000100,
1517 	0x3c2b4, 0xffffffff, 0x00000100,
1518 	0x3c2b0, 0xffffffff, 0x00000100,
1519 	0x3c270, 0xffffffff, 0x00000100,
1520 	0x30800, 0xffffffff, 0xe0000000,
1521 	0x3c020, 0xffffffff, 0x00010000,
1522 	0x3c024, 0xffffffff, 0x00030002,
1523 	0x3c028, 0xffffffff, 0x00040007,
1524 	0x3c02c, 0xffffffff, 0x00060005,
1525 	0x3c030, 0xffffffff, 0x00090008,
1526 	0x3c034, 0xffffffff, 0x00010000,
1527 	0x3c038, 0xffffffff, 0x00030002,
1528 	0x3c03c, 0xffffffff, 0x00040007,
1529 	0x3c040, 0xffffffff, 0x00060005,
1530 	0x3c044, 0xffffffff, 0x00090008,
1531 	0x3c048, 0xffffffff, 0x00010000,
1532 	0x3c04c, 0xffffffff, 0x00030002,
1533 	0x3c050, 0xffffffff, 0x00040007,
1534 	0x3c054, 0xffffffff, 0x00060005,
1535 	0x3c058, 0xffffffff, 0x00090008,
1536 	0x3c05c, 0xffffffff, 0x00010000,
1537 	0x3c060, 0xffffffff, 0x00030002,
1538 	0x3c064, 0xffffffff, 0x00040007,
1539 	0x3c068, 0xffffffff, 0x00060005,
1540 	0x3c06c, 0xffffffff, 0x00090008,
1541 	0x3c070, 0xffffffff, 0x00010000,
1542 	0x3c074, 0xffffffff, 0x00030002,
1543 	0x3c078, 0xffffffff, 0x00040007,
1544 	0x3c07c, 0xffffffff, 0x00060005,
1545 	0x3c080, 0xffffffff, 0x00090008,
1546 	0x3c084, 0xffffffff, 0x00010000,
1547 	0x3c088, 0xffffffff, 0x00030002,
1548 	0x3c08c, 0xffffffff, 0x00040007,
1549 	0x3c090, 0xffffffff, 0x00060005,
1550 	0x3c094, 0xffffffff, 0x00090008,
1551 	0x3c098, 0xffffffff, 0x00010000,
1552 	0x3c09c, 0xffffffff, 0x00030002,
1553 	0x3c0a0, 0xffffffff, 0x00040007,
1554 	0x3c0a4, 0xffffffff, 0x00060005,
1555 	0x3c0a8, 0xffffffff, 0x00090008,
1556 	0x3c0ac, 0xffffffff, 0x00010000,
1557 	0x3c0b0, 0xffffffff, 0x00030002,
1558 	0x3c0b4, 0xffffffff, 0x00040007,
1559 	0x3c0b8, 0xffffffff, 0x00060005,
1560 	0x3c0bc, 0xffffffff, 0x00090008,
1561 	0x3c0c0, 0xffffffff, 0x00010000,
1562 	0x3c0c4, 0xffffffff, 0x00030002,
1563 	0x3c0c8, 0xffffffff, 0x00040007,
1564 	0x3c0cc, 0xffffffff, 0x00060005,
1565 	0x3c0d0, 0xffffffff, 0x00090008,
1566 	0x3c0d4, 0xffffffff, 0x00010000,
1567 	0x3c0d8, 0xffffffff, 0x00030002,
1568 	0x3c0dc, 0xffffffff, 0x00040007,
1569 	0x3c0e0, 0xffffffff, 0x00060005,
1570 	0x3c0e4, 0xffffffff, 0x00090008,
1571 	0x3c0e8, 0xffffffff, 0x00010000,
1572 	0x3c0ec, 0xffffffff, 0x00030002,
1573 	0x3c0f0, 0xffffffff, 0x00040007,
1574 	0x3c0f4, 0xffffffff, 0x00060005,
1575 	0x3c0f8, 0xffffffff, 0x00090008,
1576 	0xc318, 0xffffffff, 0x00020200,
1577 	0x3350, 0xffffffff, 0x00000200,
1578 	0x15c0, 0xffffffff, 0x00000400,
1579 	0x55e8, 0xffffffff, 0x00000000,
1580 	0x2f50, 0xffffffff, 0x00000902,
1581 	0x3c000, 0xffffffff, 0x96940200,
1582 	0x8708, 0xffffffff, 0x00900100,
1583 	0xc424, 0xffffffff, 0x0020003f,
1584 	0x38, 0xffffffff, 0x0140001c,
1585 	0x3c, 0x000f0000, 0x000f0000,
1586 	0x220, 0xffffffff, 0xc060000c,
1587 	0x224, 0xc0000fff, 0x00000100,
1588 	0xf90, 0xffffffff, 0x00000100,
1589 	0xf98, 0x00000101, 0x00000000,
1590 	0x20a8, 0xffffffff, 0x00000104,
1591 	0x55e4, 0xff000fff, 0x00000100,
1592 	0x30cc, 0xc0000fff, 0x00000104,
1593 	0xc1e4, 0x00000001, 0x00000001,
1594 	0xd00c, 0xff000ff0, 0x00000100,
1595 	0xd80c, 0xff000ff0, 0x00000100
1596 };
1597 
1598 static const u32 godavari_golden_registers[] =
1599 {
1600 	0x55e4, 0xff607fff, 0xfc000100,
1601 	0x6ed8, 0x00010101, 0x00010000,
1602 	0x9830, 0xffffffff, 0x00000000,
1603 	0x98302, 0xf00fffff, 0x00000400,
1604 	0x6130, 0xffffffff, 0x00010000,
1605 	0x5bb0, 0x000000f0, 0x00000070,
1606 	0x5bc0, 0xf0311fff, 0x80300000,
1607 	0x98f8, 0x73773777, 0x12010001,
1608 	0x98fc, 0xffffffff, 0x00000010,
1609 	0x8030, 0x00001f0f, 0x0000100a,
1610 	0x2f48, 0x73773777, 0x12010001,
1611 	0x2408, 0x000fffff, 0x000c007f,
1612 	0x8a14, 0xf000003f, 0x00000007,
1613 	0x8b24, 0xffffffff, 0x00ff0fff,
1614 	0x30a04, 0x0000ff0f, 0x00000000,
1615 	0x28a4c, 0x07ffffff, 0x06000000,
1616 	0x4d8, 0x00000fff, 0x00000100,
1617 	0xd014, 0x00010000, 0x00810001,
1618 	0xd814, 0x00010000, 0x00810001,
1619 	0x3e78, 0x00000001, 0x00000002,
1620 	0xc768, 0x00000008, 0x00000008,
1621 	0xc770, 0x00000f00, 0x00000800,
1622 	0xc774, 0x00000f00, 0x00000800,
1623 	0xc798, 0x00ffffff, 0x00ff7fbf,
1624 	0xc79c, 0x00ffffff, 0x00ff7faf,
1625 	0x8c00, 0x000000ff, 0x00000001,
1626 	0x214f8, 0x01ff01ff, 0x00000002,
1627 	0x21498, 0x007ff800, 0x00200000,
1628 	0x2015c, 0xffffffff, 0x00000f40,
1629 	0x88c4, 0x001f3ae3, 0x00000082,
1630 	0x88d4, 0x0000001f, 0x00000010,
1631 	0x30934, 0xffffffff, 0x00000000
1632 };
1633 
1634 
cik_init_golden_registers(struct radeon_device * rdev)1635 static void cik_init_golden_registers(struct radeon_device *rdev)
1636 {
1637 	switch (rdev->family) {
1638 	case CHIP_BONAIRE:
1639 		radeon_program_register_sequence(rdev,
1640 						 bonaire_mgcg_cgcg_init,
1641 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1642 		radeon_program_register_sequence(rdev,
1643 						 bonaire_golden_registers,
1644 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1645 		radeon_program_register_sequence(rdev,
1646 						 bonaire_golden_common_registers,
1647 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1648 		radeon_program_register_sequence(rdev,
1649 						 bonaire_golden_spm_registers,
1650 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1651 		break;
1652 	case CHIP_KABINI:
1653 		radeon_program_register_sequence(rdev,
1654 						 kalindi_mgcg_cgcg_init,
1655 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1656 		radeon_program_register_sequence(rdev,
1657 						 kalindi_golden_registers,
1658 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1659 		radeon_program_register_sequence(rdev,
1660 						 kalindi_golden_common_registers,
1661 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1662 		radeon_program_register_sequence(rdev,
1663 						 kalindi_golden_spm_registers,
1664 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1665 		break;
1666 	case CHIP_MULLINS:
1667 		radeon_program_register_sequence(rdev,
1668 						 kalindi_mgcg_cgcg_init,
1669 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1670 		radeon_program_register_sequence(rdev,
1671 						 godavari_golden_registers,
1672 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1673 		radeon_program_register_sequence(rdev,
1674 						 kalindi_golden_common_registers,
1675 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1676 		radeon_program_register_sequence(rdev,
1677 						 kalindi_golden_spm_registers,
1678 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1679 		break;
1680 	case CHIP_KAVERI:
1681 		radeon_program_register_sequence(rdev,
1682 						 spectre_mgcg_cgcg_init,
1683 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1684 		radeon_program_register_sequence(rdev,
1685 						 spectre_golden_registers,
1686 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1687 		radeon_program_register_sequence(rdev,
1688 						 spectre_golden_common_registers,
1689 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1690 		radeon_program_register_sequence(rdev,
1691 						 spectre_golden_spm_registers,
1692 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1693 		break;
1694 	case CHIP_HAWAII:
1695 		radeon_program_register_sequence(rdev,
1696 						 hawaii_mgcg_cgcg_init,
1697 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1698 		radeon_program_register_sequence(rdev,
1699 						 hawaii_golden_registers,
1700 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1701 		radeon_program_register_sequence(rdev,
1702 						 hawaii_golden_common_registers,
1703 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1704 		radeon_program_register_sequence(rdev,
1705 						 hawaii_golden_spm_registers,
1706 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1707 		break;
1708 	default:
1709 		break;
1710 	}
1711 }
1712 
1713 /**
1714  * cik_get_xclk - get the xclk
1715  *
1716  * @rdev: radeon_device pointer
1717  *
1718  * Returns the reference clock used by the gfx engine
1719  * (CIK).
1720  */
cik_get_xclk(struct radeon_device * rdev)1721 u32 cik_get_xclk(struct radeon_device *rdev)
1722 {
1723 	u32 reference_clock = rdev->clock.spll.reference_freq;
1724 
1725 	if (rdev->flags & RADEON_IS_IGP) {
1726 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1727 			return reference_clock / 2;
1728 	} else {
1729 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1730 			return reference_clock / 4;
1731 	}
1732 	return reference_clock;
1733 }
1734 
1735 /**
1736  * cik_mm_rdoorbell - read a doorbell dword
1737  *
1738  * @rdev: radeon_device pointer
1739  * @index: doorbell index
1740  *
1741  * Returns the value in the doorbell aperture at the
1742  * requested doorbell index (CIK).
1743  */
cik_mm_rdoorbell(struct radeon_device * rdev,u32 index)1744 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1745 {
1746 	if (index < rdev->doorbell.num_doorbells) {
1747 #ifdef __NetBSD__
1748 		return bus_space_read_4(rdev->doorbell.bst, rdev->doorbell.bsh,
1749 		    index*4);
1750 #else
1751 		return readl(rdev->doorbell.ptr + index);
1752 #endif
1753 	} else {
1754 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1755 		return 0;
1756 	}
1757 }
1758 
1759 /**
1760  * cik_mm_wdoorbell - write a doorbell dword
1761  *
1762  * @rdev: radeon_device pointer
1763  * @index: doorbell index
1764  * @v: value to write
1765  *
1766  * Writes @v to the doorbell aperture at the
1767  * requested doorbell index (CIK).
1768  */
cik_mm_wdoorbell(struct radeon_device * rdev,u32 index,u32 v)1769 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1770 {
1771 	if (index < rdev->doorbell.num_doorbells) {
1772 #ifdef __NetBSD__
1773 		bus_space_write_4(rdev->doorbell.bst, rdev->doorbell.bsh,
1774 		    index*4, v);
1775 #else
1776 		writel(v, rdev->doorbell.ptr + index);
1777 #endif
1778 	} else {
1779 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1780 	}
1781 }
1782 
1783 #define BONAIRE_IO_MC_REGS_SIZE 36
1784 
1785 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1786 {
1787 	{0x00000070, 0x04400000},
1788 	{0x00000071, 0x80c01803},
1789 	{0x00000072, 0x00004004},
1790 	{0x00000073, 0x00000100},
1791 	{0x00000074, 0x00ff0000},
1792 	{0x00000075, 0x34000000},
1793 	{0x00000076, 0x08000014},
1794 	{0x00000077, 0x00cc08ec},
1795 	{0x00000078, 0x00000400},
1796 	{0x00000079, 0x00000000},
1797 	{0x0000007a, 0x04090000},
1798 	{0x0000007c, 0x00000000},
1799 	{0x0000007e, 0x4408a8e8},
1800 	{0x0000007f, 0x00000304},
1801 	{0x00000080, 0x00000000},
1802 	{0x00000082, 0x00000001},
1803 	{0x00000083, 0x00000002},
1804 	{0x00000084, 0xf3e4f400},
1805 	{0x00000085, 0x052024e3},
1806 	{0x00000087, 0x00000000},
1807 	{0x00000088, 0x01000000},
1808 	{0x0000008a, 0x1c0a0000},
1809 	{0x0000008b, 0xff010000},
1810 	{0x0000008d, 0xffffefff},
1811 	{0x0000008e, 0xfff3efff},
1812 	{0x0000008f, 0xfff3efbf},
1813 	{0x00000092, 0xf7ffffff},
1814 	{0x00000093, 0xffffff7f},
1815 	{0x00000095, 0x00101101},
1816 	{0x00000096, 0x00000fff},
1817 	{0x00000097, 0x00116fff},
1818 	{0x00000098, 0x60010000},
1819 	{0x00000099, 0x10010000},
1820 	{0x0000009a, 0x00006000},
1821 	{0x0000009b, 0x00001000},
1822 	{0x0000009f, 0x00b48000}
1823 };
1824 
1825 #define HAWAII_IO_MC_REGS_SIZE 22
1826 
1827 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1828 {
1829 	{0x0000007d, 0x40000000},
1830 	{0x0000007e, 0x40180304},
1831 	{0x0000007f, 0x0000ff00},
1832 	{0x00000081, 0x00000000},
1833 	{0x00000083, 0x00000800},
1834 	{0x00000086, 0x00000000},
1835 	{0x00000087, 0x00000100},
1836 	{0x00000088, 0x00020100},
1837 	{0x00000089, 0x00000000},
1838 	{0x0000008b, 0x00040000},
1839 	{0x0000008c, 0x00000100},
1840 	{0x0000008e, 0xff010000},
1841 	{0x00000090, 0xffffefff},
1842 	{0x00000091, 0xfff3efff},
1843 	{0x00000092, 0xfff3efbf},
1844 	{0x00000093, 0xf7ffffff},
1845 	{0x00000094, 0xffffff7f},
1846 	{0x00000095, 0x00000fff},
1847 	{0x00000096, 0x00116fff},
1848 	{0x00000097, 0x60010000},
1849 	{0x00000098, 0x10010000},
1850 	{0x0000009f, 0x00c79000}
1851 };
1852 
1853 
1854 /**
1855  * cik_srbm_select - select specific register instances
1856  *
1857  * @rdev: radeon_device pointer
1858  * @me: selected ME (micro engine)
1859  * @pipe: pipe
1860  * @queue: queue
1861  * @vmid: VMID
1862  *
1863  * Switches the currently active registers instances.  Some
1864  * registers are instanced per VMID, others are instanced per
1865  * me/pipe/queue combination.
1866  */
cik_srbm_select(struct radeon_device * rdev,u32 me,u32 pipe,u32 queue,u32 vmid)1867 static void cik_srbm_select(struct radeon_device *rdev,
1868 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1869 {
1870 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1871 			     MEID(me & 0x3) |
1872 			     VMID(vmid & 0xf) |
1873 			     QUEUEID(queue & 0x7));
1874 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1875 }
1876 
1877 /* ucode loading */
1878 /**
1879  * ci_mc_load_microcode - load MC ucode into the hw
1880  *
1881  * @rdev: radeon_device pointer
1882  *
1883  * Load the GDDR MC ucode into the hw (CIK).
1884  * Returns 0 on success, error on failure.
1885  */
ci_mc_load_microcode(struct radeon_device * rdev)1886 int ci_mc_load_microcode(struct radeon_device *rdev)
1887 {
1888 	const __be32 *fw_data = NULL;
1889 	const __le32 *new_fw_data = NULL;
1890 	u32 running, tmp;
1891 	const u32 *io_mc_regs = NULL;
1892 	const __le32 *new_io_mc_regs = NULL;
1893 	int i, regs_size, ucode_size;
1894 
1895 	if (!rdev->mc_fw)
1896 		return -EINVAL;
1897 
1898 	if (rdev->new_fw) {
1899 		const struct mc_firmware_header_v1_0 *hdr =
1900 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1901 
1902 		radeon_ucode_print_mc_hdr(&hdr->header);
1903 
1904 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1905 		new_io_mc_regs = (const __le32 *)
1906 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1907 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1908 		new_fw_data = (const __le32 *)
1909 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1910 	} else {
1911 		ucode_size = rdev->mc_fw->size / 4;
1912 
1913 		switch (rdev->family) {
1914 		case CHIP_BONAIRE:
1915 			io_mc_regs = &bonaire_io_mc_regs[0][0];
1916 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1917 			break;
1918 		case CHIP_HAWAII:
1919 			io_mc_regs = &hawaii_io_mc_regs[0][0];
1920 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1921 			break;
1922 		default:
1923 			return -EINVAL;
1924 		}
1925 		fw_data = (const __be32 *)rdev->mc_fw->data;
1926 	}
1927 
1928 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1929 
1930 	if (running == 0) {
1931 		/* reset the engine and set to writable */
1932 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1933 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1934 
1935 		/* load mc io regs */
1936 		for (i = 0; i < regs_size; i++) {
1937 			if (rdev->new_fw) {
1938 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1939 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1940 			} else {
1941 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1942 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1943 			}
1944 		}
1945 
1946 		tmp = RREG32(MC_SEQ_MISC0);
1947 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1948 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1949 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1950 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1951 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1952 		}
1953 
1954 		/* load the MC ucode */
1955 		for (i = 0; i < ucode_size; i++) {
1956 			if (rdev->new_fw)
1957 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1958 			else
1959 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1960 		}
1961 
1962 		/* put the engine back into the active state */
1963 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1964 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1965 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1966 
1967 		/* wait for training to complete */
1968 		for (i = 0; i < rdev->usec_timeout; i++) {
1969 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1970 				break;
1971 			udelay(1);
1972 		}
1973 		for (i = 0; i < rdev->usec_timeout; i++) {
1974 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1975 				break;
1976 			udelay(1);
1977 		}
1978 	}
1979 
1980 	return 0;
1981 }
1982 
1983 /**
1984  * cik_init_microcode - load ucode images from disk
1985  *
1986  * @rdev: radeon_device pointer
1987  *
1988  * Use the firmware interface to load the ucode images into
1989  * the driver (not loaded into hw).
1990  * Returns 0 on success, error on failure.
1991  */
cik_init_microcode(struct radeon_device * rdev)1992 static int cik_init_microcode(struct radeon_device *rdev)
1993 {
1994 	const char *chip_name;
1995 	const char *new_chip_name;
1996 	size_t pfp_req_size, me_req_size, ce_req_size,
1997 		mec_req_size, rlc_req_size, mc_req_size = 0,
1998 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1999 	char fw_name[30];
2000 	int new_fw = 0;
2001 	int err;
2002 	int num_fw;
2003 	bool new_smc = false;
2004 
2005 	DRM_DEBUG("\n");
2006 
2007 	switch (rdev->family) {
2008 	case CHIP_BONAIRE:
2009 		chip_name = "BONAIRE";
2010 		if ((rdev->pdev->revision == 0x80) ||
2011 		    (rdev->pdev->revision == 0x81) ||
2012 		    (rdev->pdev->device == 0x665f))
2013 			new_smc = true;
2014 		new_chip_name = "bonaire";
2015 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2016 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2017 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2018 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2019 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2020 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2021 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2022 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2023 		smc_req_size = round_up(BONAIRE_SMC_UCODE_SIZE, 4);
2024 		num_fw = 8;
2025 		break;
2026 	case CHIP_HAWAII:
2027 		chip_name = "HAWAII";
2028 		if (rdev->pdev->revision == 0x80)
2029 			new_smc = true;
2030 		new_chip_name = "hawaii";
2031 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2032 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2033 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2034 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2035 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2036 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2037 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2038 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2039 		smc_req_size = round_up(HAWAII_SMC_UCODE_SIZE, 4);
2040 		num_fw = 8;
2041 		break;
2042 	case CHIP_KAVERI:
2043 		chip_name = "KAVERI";
2044 		new_chip_name = "kaveri";
2045 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2046 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2047 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2048 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2049 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2050 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2051 		num_fw = 7;
2052 		break;
2053 	case CHIP_KABINI:
2054 		chip_name = "KABINI";
2055 		new_chip_name = "kabini";
2056 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2057 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2058 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2059 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2060 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2061 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2062 		num_fw = 6;
2063 		break;
2064 	case CHIP_MULLINS:
2065 		chip_name = "MULLINS";
2066 		new_chip_name = "mullins";
2067 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2068 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2069 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2070 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2071 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2072 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2073 		num_fw = 6;
2074 		break;
2075 	default: BUG();
2076 	}
2077 
2078 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2079 
2080 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2081 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2082 	if (err) {
2083 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2084 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2085 		if (err)
2086 			goto out;
2087 		if (rdev->pfp_fw->size != pfp_req_size) {
2088 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2089 			       rdev->pfp_fw->size, fw_name);
2090 			err = -EINVAL;
2091 			goto out;
2092 		}
2093 	} else {
2094 		err = radeon_ucode_validate(rdev->pfp_fw);
2095 		if (err) {
2096 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2097 			       fw_name);
2098 			goto out;
2099 		} else {
2100 			new_fw++;
2101 		}
2102 	}
2103 
2104 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2105 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2106 	if (err) {
2107 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2108 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2109 		if (err)
2110 			goto out;
2111 		if (rdev->me_fw->size != me_req_size) {
2112 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2113 			       rdev->me_fw->size, fw_name);
2114 			err = -EINVAL;
2115 		}
2116 	} else {
2117 		err = radeon_ucode_validate(rdev->me_fw);
2118 		if (err) {
2119 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2120 			       fw_name);
2121 			goto out;
2122 		} else {
2123 			new_fw++;
2124 		}
2125 	}
2126 
2127 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2128 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2129 	if (err) {
2130 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2131 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2132 		if (err)
2133 			goto out;
2134 		if (rdev->ce_fw->size != ce_req_size) {
2135 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2136 			       rdev->ce_fw->size, fw_name);
2137 			err = -EINVAL;
2138 		}
2139 	} else {
2140 		err = radeon_ucode_validate(rdev->ce_fw);
2141 		if (err) {
2142 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2143 			       fw_name);
2144 			goto out;
2145 		} else {
2146 			new_fw++;
2147 		}
2148 	}
2149 
2150 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2151 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2152 	if (err) {
2153 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2154 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2155 		if (err)
2156 			goto out;
2157 		if (rdev->mec_fw->size != mec_req_size) {
2158 			pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2159 			       rdev->mec_fw->size, fw_name);
2160 			err = -EINVAL;
2161 		}
2162 	} else {
2163 		err = radeon_ucode_validate(rdev->mec_fw);
2164 		if (err) {
2165 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2166 			       fw_name);
2167 			goto out;
2168 		} else {
2169 			new_fw++;
2170 		}
2171 	}
2172 
2173 	if (rdev->family == CHIP_KAVERI) {
2174 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2175 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2176 		if (err) {
2177 			goto out;
2178 		} else {
2179 			err = radeon_ucode_validate(rdev->mec2_fw);
2180 			if (err) {
2181 				goto out;
2182 			} else {
2183 				new_fw++;
2184 			}
2185 		}
2186 	}
2187 
2188 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2189 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2190 	if (err) {
2191 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2192 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2193 		if (err)
2194 			goto out;
2195 		if (rdev->rlc_fw->size != rlc_req_size) {
2196 			pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2197 			       rdev->rlc_fw->size, fw_name);
2198 			err = -EINVAL;
2199 		}
2200 	} else {
2201 		err = radeon_ucode_validate(rdev->rlc_fw);
2202 		if (err) {
2203 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2204 			       fw_name);
2205 			goto out;
2206 		} else {
2207 			new_fw++;
2208 		}
2209 	}
2210 
2211 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2212 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2213 	if (err) {
2214 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2215 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2216 		if (err)
2217 			goto out;
2218 		if (rdev->sdma_fw->size != sdma_req_size) {
2219 			pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2220 			       rdev->sdma_fw->size, fw_name);
2221 			err = -EINVAL;
2222 		}
2223 	} else {
2224 		err = radeon_ucode_validate(rdev->sdma_fw);
2225 		if (err) {
2226 			pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2227 			       fw_name);
2228 			goto out;
2229 		} else {
2230 			new_fw++;
2231 		}
2232 	}
2233 
2234 	/* No SMC, MC ucode on APUs */
2235 	if (!(rdev->flags & RADEON_IS_IGP)) {
2236 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2237 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2238 		if (err) {
2239 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2240 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2241 			if (err) {
2242 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2243 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2244 				if (err)
2245 					goto out;
2246 			}
2247 			if ((rdev->mc_fw->size != mc_req_size) &&
2248 			    (rdev->mc_fw->size != mc2_req_size)){
2249 				pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2250 				       rdev->mc_fw->size, fw_name);
2251 				err = -EINVAL;
2252 			}
2253 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2254 		} else {
2255 			err = radeon_ucode_validate(rdev->mc_fw);
2256 			if (err) {
2257 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2258 				       fw_name);
2259 				goto out;
2260 			} else {
2261 				new_fw++;
2262 			}
2263 		}
2264 
2265 		if (new_smc)
2266 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2267 		else
2268 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2269 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2270 		if (err) {
2271 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2272 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2273 			if (err) {
2274 				pr_err("smc: error loading firmware \"%s\"\n",
2275 				       fw_name);
2276 				release_firmware(rdev->smc_fw);
2277 				rdev->smc_fw = NULL;
2278 				err = 0;
2279 			} else if (rdev->smc_fw->size != smc_req_size) {
2280 				pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2281 				       rdev->smc_fw->size, fw_name);
2282 				err = -EINVAL;
2283 			}
2284 		} else {
2285 			err = radeon_ucode_validate(rdev->smc_fw);
2286 			if (err) {
2287 				pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2288 				       fw_name);
2289 				goto out;
2290 			} else {
2291 				new_fw++;
2292 			}
2293 		}
2294 	}
2295 
2296 	if (new_fw == 0) {
2297 		rdev->new_fw = false;
2298 	} else if (new_fw < num_fw) {
2299 		pr_err("ci_fw: mixing new and old firmware!\n");
2300 		err = -EINVAL;
2301 	} else {
2302 		rdev->new_fw = true;
2303 	}
2304 
2305 out:
2306 	if (err) {
2307 		if (err != -EINVAL)
2308 			pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2309 			       fw_name);
2310 		release_firmware(rdev->pfp_fw);
2311 		rdev->pfp_fw = NULL;
2312 		release_firmware(rdev->me_fw);
2313 		rdev->me_fw = NULL;
2314 		release_firmware(rdev->ce_fw);
2315 		rdev->ce_fw = NULL;
2316 		release_firmware(rdev->mec_fw);
2317 		rdev->mec_fw = NULL;
2318 		release_firmware(rdev->mec2_fw);
2319 		rdev->mec2_fw = NULL;
2320 		release_firmware(rdev->rlc_fw);
2321 		rdev->rlc_fw = NULL;
2322 		release_firmware(rdev->sdma_fw);
2323 		rdev->sdma_fw = NULL;
2324 		release_firmware(rdev->mc_fw);
2325 		rdev->mc_fw = NULL;
2326 		release_firmware(rdev->smc_fw);
2327 		rdev->smc_fw = NULL;
2328 	}
2329 	return err;
2330 }
2331 
2332 /*
2333  * Core functions
2334  */
2335 /**
2336  * cik_tiling_mode_table_init - init the hw tiling table
2337  *
2338  * @rdev: radeon_device pointer
2339  *
2340  * Starting with SI, the tiling setup is done globally in a
2341  * set of 32 tiling modes.  Rather than selecting each set of
2342  * parameters per surface as on older asics, we just select
2343  * which index in the tiling table we want to use, and the
2344  * surface uses those parameters (CIK).
2345  */
cik_tiling_mode_table_init(struct radeon_device * rdev)2346 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2347 {
2348 	u32 *tile = rdev->config.cik.tile_mode_array;
2349 	u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2350 	const u32 num_tile_mode_states =
2351 			ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2352 	const u32 num_secondary_tile_mode_states =
2353 			ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2354 	u32 reg_offset, split_equal_to_row_size;
2355 	u32 num_pipe_configs;
2356 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2357 		rdev->config.cik.max_shader_engines;
2358 
2359 	switch (rdev->config.cik.mem_row_size_in_kb) {
2360 	case 1:
2361 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2362 		break;
2363 	case 2:
2364 	default:
2365 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2366 		break;
2367 	case 4:
2368 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2369 		break;
2370 	}
2371 
2372 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2373 	if (num_pipe_configs > 8)
2374 		num_pipe_configs = 16;
2375 
2376 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2377 		tile[reg_offset] = 0;
2378 	for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2379 		macrotile[reg_offset] = 0;
2380 
2381 	switch(num_pipe_configs) {
2382 	case 16:
2383 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2384 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2385 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2387 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2388 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2389 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2391 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2392 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2393 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2395 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2396 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2397 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2399 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2401 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402 			   TILE_SPLIT(split_equal_to_row_size));
2403 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2406 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2407 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2408 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2410 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2411 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2412 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 			   TILE_SPLIT(split_equal_to_row_size));
2414 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2415 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2416 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2417 			   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2419 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2421 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2423 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2425 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2426 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2428 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2429 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2432 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2434 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2436 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2440 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2441 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2444 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2449 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2451 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2454 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2455 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2456 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2458 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2459 			    PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461 
2462 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2464 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465 			   NUM_BANKS(ADDR_SURF_16_BANK));
2466 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2469 			   NUM_BANKS(ADDR_SURF_16_BANK));
2470 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473 			   NUM_BANKS(ADDR_SURF_16_BANK));
2474 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477 			   NUM_BANKS(ADDR_SURF_16_BANK));
2478 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481 			   NUM_BANKS(ADDR_SURF_8_BANK));
2482 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2484 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485 			   NUM_BANKS(ADDR_SURF_4_BANK));
2486 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2489 			   NUM_BANKS(ADDR_SURF_2_BANK));
2490 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2492 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2493 			   NUM_BANKS(ADDR_SURF_16_BANK));
2494 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2496 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2497 			   NUM_BANKS(ADDR_SURF_16_BANK));
2498 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2501 			    NUM_BANKS(ADDR_SURF_16_BANK));
2502 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505 			    NUM_BANKS(ADDR_SURF_8_BANK));
2506 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2508 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2509 			    NUM_BANKS(ADDR_SURF_4_BANK));
2510 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2513 			    NUM_BANKS(ADDR_SURF_2_BANK));
2514 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 			    BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516 			    MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2517 			    NUM_BANKS(ADDR_SURF_2_BANK));
2518 
2519 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2520 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2521 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2522 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2523 		break;
2524 
2525 	case 8:
2526 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2528 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2530 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2531 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2532 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2534 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2536 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2538 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2540 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2542 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2544 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545 			   TILE_SPLIT(split_equal_to_row_size));
2546 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2549 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2550 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2553 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2554 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2555 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 			   TILE_SPLIT(split_equal_to_row_size));
2557 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2558 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2559 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2560 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2561 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2562 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2564 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2567 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2568 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2569 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2571 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2572 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2575 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2577 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2578 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2579 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2581 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2582 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2583 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2584 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2586 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2587 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2590 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2592 		tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2594 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2597 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2598 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2599 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2600 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2601 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2602 			    PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2604 
2605 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2607 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2608 				NUM_BANKS(ADDR_SURF_16_BANK));
2609 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2612 				NUM_BANKS(ADDR_SURF_16_BANK));
2613 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2615 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2616 				NUM_BANKS(ADDR_SURF_16_BANK));
2617 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2619 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2620 				NUM_BANKS(ADDR_SURF_16_BANK));
2621 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2622 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2623 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2624 				NUM_BANKS(ADDR_SURF_8_BANK));
2625 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2627 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2628 				NUM_BANKS(ADDR_SURF_4_BANK));
2629 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2631 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2632 				NUM_BANKS(ADDR_SURF_2_BANK));
2633 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2635 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2636 				NUM_BANKS(ADDR_SURF_16_BANK));
2637 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2639 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2640 				NUM_BANKS(ADDR_SURF_16_BANK));
2641 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2643 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2644 				NUM_BANKS(ADDR_SURF_16_BANK));
2645 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2647 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2648 				NUM_BANKS(ADDR_SURF_16_BANK));
2649 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2651 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2652 				NUM_BANKS(ADDR_SURF_8_BANK));
2653 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2655 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2656 				NUM_BANKS(ADDR_SURF_4_BANK));
2657 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2660 				NUM_BANKS(ADDR_SURF_2_BANK));
2661 
2662 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2663 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2664 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2665 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2666 		break;
2667 
2668 	case 4:
2669 		if (num_rbs == 4) {
2670 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2672 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2674 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2676 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2677 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2678 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2680 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2681 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2682 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2683 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2684 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2685 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2686 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2688 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689 			   TILE_SPLIT(split_equal_to_row_size));
2690 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2694 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2695 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2697 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2698 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2699 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 			   TILE_SPLIT(split_equal_to_row_size));
2701 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2702 			   PIPE_CONFIG(ADDR_SURF_P4_16x16));
2703 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704 			   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2706 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2708 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2710 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2711 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2712 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2713 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2715 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2716 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2721 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2723 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2725 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2726 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2727 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2728 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2730 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2731 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2733 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2734 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2736 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2737 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2738 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2742 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2744 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2745 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2746 			    PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2748 
2749 		} else if (num_rbs < 4) {
2750 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2751 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2752 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2753 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2754 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2755 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2756 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2757 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2758 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2759 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2760 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2761 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2762 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2764 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2765 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2766 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2767 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2768 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2769 			   TILE_SPLIT(split_equal_to_row_size));
2770 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2773 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2774 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2777 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2778 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2779 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 			   TILE_SPLIT(split_equal_to_row_size));
2781 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2782 			   PIPE_CONFIG(ADDR_SURF_P4_8x16));
2783 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2784 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2785 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2786 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2788 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2789 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2790 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2791 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2792 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2795 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2796 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2799 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2800 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2801 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2803 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2804 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2805 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2806 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2807 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2810 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2811 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2812 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2813 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2814 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2815 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2816 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2817 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2818 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2819 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2820 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2821 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2822 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2824 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2825 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2826 			    PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2827 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2828 		}
2829 
2830 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2832 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2833 				NUM_BANKS(ADDR_SURF_16_BANK));
2834 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837 				NUM_BANKS(ADDR_SURF_16_BANK));
2838 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2840 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2841 				NUM_BANKS(ADDR_SURF_16_BANK));
2842 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845 				NUM_BANKS(ADDR_SURF_16_BANK));
2846 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849 				NUM_BANKS(ADDR_SURF_16_BANK));
2850 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2852 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2853 				NUM_BANKS(ADDR_SURF_8_BANK));
2854 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2855 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2856 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2857 				NUM_BANKS(ADDR_SURF_4_BANK));
2858 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2859 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2860 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2861 				NUM_BANKS(ADDR_SURF_16_BANK));
2862 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2863 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865 				NUM_BANKS(ADDR_SURF_16_BANK));
2866 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869 				NUM_BANKS(ADDR_SURF_16_BANK));
2870 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2872 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2873 				NUM_BANKS(ADDR_SURF_16_BANK));
2874 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2875 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2876 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2877 				NUM_BANKS(ADDR_SURF_16_BANK));
2878 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2879 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2880 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2881 				NUM_BANKS(ADDR_SURF_8_BANK));
2882 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2885 				NUM_BANKS(ADDR_SURF_4_BANK));
2886 
2887 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2888 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2889 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2890 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2891 		break;
2892 
2893 	case 2:
2894 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2895 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2896 			   PIPE_CONFIG(ADDR_SURF_P2) |
2897 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2898 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2900 			   PIPE_CONFIG(ADDR_SURF_P2) |
2901 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2902 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2904 			   PIPE_CONFIG(ADDR_SURF_P2) |
2905 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2906 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2907 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2908 			   PIPE_CONFIG(ADDR_SURF_P2) |
2909 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2910 		tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2912 			   PIPE_CONFIG(ADDR_SURF_P2) |
2913 			   TILE_SPLIT(split_equal_to_row_size));
2914 		tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915 			   PIPE_CONFIG(ADDR_SURF_P2) |
2916 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2917 		tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2918 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2919 			   PIPE_CONFIG(ADDR_SURF_P2) |
2920 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2921 		tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2922 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2923 			   PIPE_CONFIG(ADDR_SURF_P2) |
2924 			   TILE_SPLIT(split_equal_to_row_size));
2925 		tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2926 			   PIPE_CONFIG(ADDR_SURF_P2);
2927 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2928 			   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2929 			   PIPE_CONFIG(ADDR_SURF_P2));
2930 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2931 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932 			    PIPE_CONFIG(ADDR_SURF_P2) |
2933 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2934 		tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2935 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936 			    PIPE_CONFIG(ADDR_SURF_P2) |
2937 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938 		tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2939 			    MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2940 			    PIPE_CONFIG(ADDR_SURF_P2) |
2941 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2943 			    PIPE_CONFIG(ADDR_SURF_P2) |
2944 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2945 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2946 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2947 			    PIPE_CONFIG(ADDR_SURF_P2) |
2948 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949 		tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2951 			    PIPE_CONFIG(ADDR_SURF_P2) |
2952 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2953 		tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2954 			    MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2955 			    PIPE_CONFIG(ADDR_SURF_P2) |
2956 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2957 		tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2958 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2959 			    PIPE_CONFIG(ADDR_SURF_P2));
2960 		tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2961 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2962 			    PIPE_CONFIG(ADDR_SURF_P2) |
2963 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964 		tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2965 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2966 			    PIPE_CONFIG(ADDR_SURF_P2) |
2967 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968 		tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2969 			    MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2970 			    PIPE_CONFIG(ADDR_SURF_P2) |
2971 			    SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2972 
2973 		macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2974 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2975 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2976 				NUM_BANKS(ADDR_SURF_16_BANK));
2977 		macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2978 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2979 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2980 				NUM_BANKS(ADDR_SURF_16_BANK));
2981 		macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2982 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2983 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2984 				NUM_BANKS(ADDR_SURF_16_BANK));
2985 		macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2987 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2988 				NUM_BANKS(ADDR_SURF_16_BANK));
2989 		macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2990 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2991 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2992 				NUM_BANKS(ADDR_SURF_16_BANK));
2993 		macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2994 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2995 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2996 				NUM_BANKS(ADDR_SURF_16_BANK));
2997 		macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2999 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3000 				NUM_BANKS(ADDR_SURF_8_BANK));
3001 		macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3002 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3003 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3004 				NUM_BANKS(ADDR_SURF_16_BANK));
3005 		macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3006 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3007 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3008 				NUM_BANKS(ADDR_SURF_16_BANK));
3009 		macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3010 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012 				NUM_BANKS(ADDR_SURF_16_BANK));
3013 		macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3014 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3015 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3016 				NUM_BANKS(ADDR_SURF_16_BANK));
3017 		macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3018 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3019 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3020 				NUM_BANKS(ADDR_SURF_16_BANK));
3021 		macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3023 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024 				NUM_BANKS(ADDR_SURF_16_BANK));
3025 		macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3027 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3028 				NUM_BANKS(ADDR_SURF_8_BANK));
3029 
3030 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3031 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3032 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3033 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3034 		break;
3035 
3036 	default:
3037 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3038 	}
3039 }
3040 
3041 /**
3042  * cik_select_se_sh - select which SE, SH to address
3043  *
3044  * @rdev: radeon_device pointer
3045  * @se_num: shader engine to address
3046  * @sh_num: sh block to address
3047  *
3048  * Select which SE, SH combinations to address. Certain
3049  * registers are instanced per SE or SH.  0xffffffff means
3050  * broadcast to all SEs or SHs (CIK).
3051  */
cik_select_se_sh(struct radeon_device * rdev,u32 se_num,u32 sh_num)3052 static void cik_select_se_sh(struct radeon_device *rdev,
3053 			     u32 se_num, u32 sh_num)
3054 {
3055 	u32 data = INSTANCE_BROADCAST_WRITES;
3056 
3057 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3058 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3059 	else if (se_num == 0xffffffff)
3060 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3061 	else if (sh_num == 0xffffffff)
3062 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3063 	else
3064 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3065 	WREG32(GRBM_GFX_INDEX, data);
3066 }
3067 
3068 /**
3069  * cik_create_bitmask - create a bitmask
3070  *
3071  * @bit_width: length of the mask
3072  *
3073  * create a variable length bit mask (CIK).
3074  * Returns the bitmask.
3075  */
cik_create_bitmask(u32 bit_width)3076 static u32 cik_create_bitmask(u32 bit_width)
3077 {
3078 	u32 i, mask = 0;
3079 
3080 	for (i = 0; i < bit_width; i++) {
3081 		mask <<= 1;
3082 		mask |= 1;
3083 	}
3084 	return mask;
3085 }
3086 
3087 /**
3088  * cik_get_rb_disabled - computes the mask of disabled RBs
3089  *
3090  * @rdev: radeon_device pointer
3091  * @max_rb_num: max RBs (render backends) for the asic
3092  * @se_num: number of SEs (shader engines) for the asic
3093  * @sh_per_se: number of SH blocks per SE for the asic
3094  *
3095  * Calculates the bitmask of disabled RBs (CIK).
3096  * Returns the disabled RB bitmask.
3097  */
cik_get_rb_disabled(struct radeon_device * rdev,u32 max_rb_num_per_se,u32 sh_per_se)3098 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3099 			      u32 max_rb_num_per_se,
3100 			      u32 sh_per_se)
3101 {
3102 	u32 data, mask;
3103 
3104 	data = RREG32(CC_RB_BACKEND_DISABLE);
3105 	if (data & 1)
3106 		data &= BACKEND_DISABLE_MASK;
3107 	else
3108 		data = 0;
3109 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3110 
3111 	data >>= BACKEND_DISABLE_SHIFT;
3112 
3113 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3114 
3115 	return data & mask;
3116 }
3117 
3118 /**
3119  * cik_setup_rb - setup the RBs on the asic
3120  *
3121  * @rdev: radeon_device pointer
3122  * @se_num: number of SEs (shader engines) for the asic
3123  * @sh_per_se: number of SH blocks per SE for the asic
3124  * @max_rb_num: max RBs (render backends) for the asic
3125  *
3126  * Configures per-SE/SH RB registers (CIK).
3127  */
cik_setup_rb(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 max_rb_num_per_se)3128 static void cik_setup_rb(struct radeon_device *rdev,
3129 			 u32 se_num, u32 sh_per_se,
3130 			 u32 max_rb_num_per_se)
3131 {
3132 	int i, j;
3133 	u32 data, mask;
3134 	u32 disabled_rbs = 0;
3135 	u32 enabled_rbs = 0;
3136 
3137 	for (i = 0; i < se_num; i++) {
3138 		for (j = 0; j < sh_per_se; j++) {
3139 			cik_select_se_sh(rdev, i, j);
3140 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3141 			if (rdev->family == CHIP_HAWAII)
3142 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3143 			else
3144 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3145 		}
3146 	}
3147 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3148 
3149 	mask = 1;
3150 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3151 		if (!(disabled_rbs & mask))
3152 			enabled_rbs |= mask;
3153 		mask <<= 1;
3154 	}
3155 
3156 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3157 
3158 	for (i = 0; i < se_num; i++) {
3159 		cik_select_se_sh(rdev, i, 0xffffffff);
3160 		data = 0;
3161 		for (j = 0; j < sh_per_se; j++) {
3162 			switch (enabled_rbs & 3) {
3163 			case 0:
3164 				if (j == 0)
3165 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3166 				else
3167 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3168 				break;
3169 			case 1:
3170 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3171 				break;
3172 			case 2:
3173 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3174 				break;
3175 			case 3:
3176 			default:
3177 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3178 				break;
3179 			}
3180 			enabled_rbs >>= 2;
3181 		}
3182 		WREG32(PA_SC_RASTER_CONFIG, data);
3183 	}
3184 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3185 }
3186 
3187 /**
3188  * cik_gpu_init - setup the 3D engine
3189  *
3190  * @rdev: radeon_device pointer
3191  *
3192  * Configures the 3D engine and tiling configuration
3193  * registers so that the 3D engine is usable.
3194  */
cik_gpu_init(struct radeon_device * rdev)3195 static void cik_gpu_init(struct radeon_device *rdev)
3196 {
3197 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3198 	u32 mc_shared_chmap __unused, mc_arb_ramcfg;
3199 	u32 hdp_host_path_cntl;
3200 	u32 tmp;
3201 	int i, j;
3202 
3203 	switch (rdev->family) {
3204 	case CHIP_BONAIRE:
3205 		rdev->config.cik.max_shader_engines = 2;
3206 		rdev->config.cik.max_tile_pipes = 4;
3207 		rdev->config.cik.max_cu_per_sh = 7;
3208 		rdev->config.cik.max_sh_per_se = 1;
3209 		rdev->config.cik.max_backends_per_se = 2;
3210 		rdev->config.cik.max_texture_channel_caches = 4;
3211 		rdev->config.cik.max_gprs = 256;
3212 		rdev->config.cik.max_gs_threads = 32;
3213 		rdev->config.cik.max_hw_contexts = 8;
3214 
3215 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3216 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3217 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3218 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3219 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3220 		break;
3221 	case CHIP_HAWAII:
3222 		rdev->config.cik.max_shader_engines = 4;
3223 		rdev->config.cik.max_tile_pipes = 16;
3224 		rdev->config.cik.max_cu_per_sh = 11;
3225 		rdev->config.cik.max_sh_per_se = 1;
3226 		rdev->config.cik.max_backends_per_se = 4;
3227 		rdev->config.cik.max_texture_channel_caches = 16;
3228 		rdev->config.cik.max_gprs = 256;
3229 		rdev->config.cik.max_gs_threads = 32;
3230 		rdev->config.cik.max_hw_contexts = 8;
3231 
3232 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3233 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3234 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3235 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3236 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3237 		break;
3238 	case CHIP_KAVERI:
3239 		rdev->config.cik.max_shader_engines = 1;
3240 		rdev->config.cik.max_tile_pipes = 4;
3241 		rdev->config.cik.max_cu_per_sh = 8;
3242 		rdev->config.cik.max_backends_per_se = 2;
3243 		rdev->config.cik.max_sh_per_se = 1;
3244 		rdev->config.cik.max_texture_channel_caches = 4;
3245 		rdev->config.cik.max_gprs = 256;
3246 		rdev->config.cik.max_gs_threads = 16;
3247 		rdev->config.cik.max_hw_contexts = 8;
3248 
3249 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3250 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3251 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3252 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3253 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3254 		break;
3255 	case CHIP_KABINI:
3256 	case CHIP_MULLINS:
3257 	default:
3258 		rdev->config.cik.max_shader_engines = 1;
3259 		rdev->config.cik.max_tile_pipes = 2;
3260 		rdev->config.cik.max_cu_per_sh = 2;
3261 		rdev->config.cik.max_sh_per_se = 1;
3262 		rdev->config.cik.max_backends_per_se = 1;
3263 		rdev->config.cik.max_texture_channel_caches = 2;
3264 		rdev->config.cik.max_gprs = 256;
3265 		rdev->config.cik.max_gs_threads = 16;
3266 		rdev->config.cik.max_hw_contexts = 8;
3267 
3268 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3269 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3270 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3271 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3272 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3273 		break;
3274 	}
3275 
3276 	/* Initialize HDP */
3277 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3278 		WREG32((0x2c14 + j), 0x00000000);
3279 		WREG32((0x2c18 + j), 0x00000000);
3280 		WREG32((0x2c1c + j), 0x00000000);
3281 		WREG32((0x2c20 + j), 0x00000000);
3282 		WREG32((0x2c24 + j), 0x00000000);
3283 	}
3284 
3285 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3286 	WREG32(SRBM_INT_CNTL, 0x1);
3287 	WREG32(SRBM_INT_ACK, 0x1);
3288 
3289 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3290 
3291 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3292 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3293 
3294 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3295 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3296 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3297 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3298 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3299 		rdev->config.cik.mem_row_size_in_kb = 4;
3300 	/* XXX use MC settings? */
3301 	rdev->config.cik.shader_engine_tile_size = 32;
3302 	rdev->config.cik.num_gpus = 1;
3303 	rdev->config.cik.multi_gpu_tile_size = 64;
3304 
3305 	/* fix up row size */
3306 	gb_addr_config &= ~ROW_SIZE_MASK;
3307 	switch (rdev->config.cik.mem_row_size_in_kb) {
3308 	case 1:
3309 	default:
3310 		gb_addr_config |= ROW_SIZE(0);
3311 		break;
3312 	case 2:
3313 		gb_addr_config |= ROW_SIZE(1);
3314 		break;
3315 	case 4:
3316 		gb_addr_config |= ROW_SIZE(2);
3317 		break;
3318 	}
3319 
3320 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3321 	 * not have bank info, so create a custom tiling dword.
3322 	 * bits 3:0   num_pipes
3323 	 * bits 7:4   num_banks
3324 	 * bits 11:8  group_size
3325 	 * bits 15:12 row_size
3326 	 */
3327 	rdev->config.cik.tile_config = 0;
3328 	switch (rdev->config.cik.num_tile_pipes) {
3329 	case 1:
3330 		rdev->config.cik.tile_config |= (0 << 0);
3331 		break;
3332 	case 2:
3333 		rdev->config.cik.tile_config |= (1 << 0);
3334 		break;
3335 	case 4:
3336 		rdev->config.cik.tile_config |= (2 << 0);
3337 		break;
3338 	case 8:
3339 	default:
3340 		/* XXX what about 12? */
3341 		rdev->config.cik.tile_config |= (3 << 0);
3342 		break;
3343 	}
3344 	rdev->config.cik.tile_config |=
3345 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3346 	rdev->config.cik.tile_config |=
3347 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3348 	rdev->config.cik.tile_config |=
3349 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3350 
3351 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3352 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3353 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3354 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3355 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3356 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3357 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3358 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3359 
3360 	cik_tiling_mode_table_init(rdev);
3361 
3362 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3363 		     rdev->config.cik.max_sh_per_se,
3364 		     rdev->config.cik.max_backends_per_se);
3365 
3366 	rdev->config.cik.active_cus = 0;
3367 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3368 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3369 			rdev->config.cik.active_cus +=
3370 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3371 		}
3372 	}
3373 
3374 	/* set HW defaults for 3D engine */
3375 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3376 
3377 	WREG32(SX_DEBUG_1, 0x20);
3378 
3379 	WREG32(TA_CNTL_AUX, 0x00010000);
3380 
3381 	tmp = RREG32(SPI_CONFIG_CNTL);
3382 	tmp |= 0x03000000;
3383 	WREG32(SPI_CONFIG_CNTL, tmp);
3384 
3385 	WREG32(SQ_CONFIG, 1);
3386 
3387 	WREG32(DB_DEBUG, 0);
3388 
3389 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3390 	tmp |= 0x00000400;
3391 	WREG32(DB_DEBUG2, tmp);
3392 
3393 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3394 	tmp |= 0x00020200;
3395 	WREG32(DB_DEBUG3, tmp);
3396 
3397 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3398 	tmp |= 0x00018208;
3399 	WREG32(CB_HW_CONTROL, tmp);
3400 
3401 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3402 
3403 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3404 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3405 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3406 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3407 
3408 	WREG32(VGT_NUM_INSTANCES, 1);
3409 
3410 	WREG32(CP_PERFMON_CNTL, 0);
3411 
3412 	WREG32(SQ_CONFIG, 0);
3413 
3414 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3415 					  FORCE_EOV_MAX_REZ_CNT(255)));
3416 
3417 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3418 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3419 
3420 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3421 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3422 
3423 	tmp = RREG32(HDP_MISC_CNTL);
3424 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3425 	WREG32(HDP_MISC_CNTL, tmp);
3426 
3427 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3428 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3429 
3430 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3431 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3432 
3433 	udelay(50);
3434 }
3435 
3436 /*
3437  * GPU scratch registers helpers function.
3438  */
3439 /**
3440  * cik_scratch_init - setup driver info for CP scratch regs
3441  *
3442  * @rdev: radeon_device pointer
3443  *
3444  * Set up the number and offset of the CP scratch registers.
3445  * NOTE: use of CP scratch registers is a legacy inferface and
3446  * is not used by default on newer asics (r6xx+).  On newer asics,
3447  * memory buffers are used for fences rather than scratch regs.
3448  */
cik_scratch_init(struct radeon_device * rdev)3449 static void cik_scratch_init(struct radeon_device *rdev)
3450 {
3451 	int i;
3452 
3453 	rdev->scratch.num_reg = 7;
3454 	rdev->scratch.reg_base = SCRATCH_REG0;
3455 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3456 		rdev->scratch.free[i] = true;
3457 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3458 	}
3459 }
3460 
3461 /**
3462  * cik_ring_test - basic gfx ring test
3463  *
3464  * @rdev: radeon_device pointer
3465  * @ring: radeon_ring structure holding ring information
3466  *
3467  * Allocate a scratch register and write to it using the gfx ring (CIK).
3468  * Provides a basic gfx ring test to verify that the ring is working.
3469  * Used by cik_cp_gfx_resume();
3470  * Returns 0 on success, error on failure.
3471  */
cik_ring_test(struct radeon_device * rdev,struct radeon_ring * ring)3472 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3473 {
3474 	uint32_t scratch;
3475 	uint32_t tmp = 0;
3476 	unsigned i;
3477 	int r;
3478 
3479 	r = radeon_scratch_get(rdev, &scratch);
3480 	if (r) {
3481 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3482 		return r;
3483 	}
3484 	WREG32(scratch, 0xCAFEDEAD);
3485 	r = radeon_ring_lock(rdev, ring, 3);
3486 	if (r) {
3487 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3488 		radeon_scratch_free(rdev, scratch);
3489 		return r;
3490 	}
3491 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3492 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3493 	radeon_ring_write(ring, 0xDEADBEEF);
3494 	radeon_ring_unlock_commit(rdev, ring, false);
3495 
3496 	for (i = 0; i < rdev->usec_timeout; i++) {
3497 		tmp = RREG32(scratch);
3498 		if (tmp == 0xDEADBEEF)
3499 			break;
3500 		udelay(1);
3501 	}
3502 	if (i < rdev->usec_timeout) {
3503 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3504 	} else {
3505 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3506 			  ring->idx, scratch, tmp);
3507 		r = -EINVAL;
3508 	}
3509 	radeon_scratch_free(rdev, scratch);
3510 	return r;
3511 }
3512 
3513 /**
3514  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3515  *
3516  * @rdev: radeon_device pointer
3517  * @ridx: radeon ring index
3518  *
3519  * Emits an hdp flush on the cp.
3520  */
cik_hdp_flush_cp_ring_emit(struct radeon_device * rdev,int ridx)3521 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3522 				       int ridx)
3523 {
3524 	struct radeon_ring *ring = &rdev->ring[ridx];
3525 	u32 ref_and_mask;
3526 
3527 	switch (ring->idx) {
3528 	case CAYMAN_RING_TYPE_CP1_INDEX:
3529 	case CAYMAN_RING_TYPE_CP2_INDEX:
3530 	default:
3531 		switch (ring->me) {
3532 		case 0:
3533 			ref_and_mask = CP2 << ring->pipe;
3534 			break;
3535 		case 1:
3536 			ref_and_mask = CP6 << ring->pipe;
3537 			break;
3538 		default:
3539 			return;
3540 		}
3541 		break;
3542 	case RADEON_RING_TYPE_GFX_INDEX:
3543 		ref_and_mask = CP0;
3544 		break;
3545 	}
3546 
3547 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3548 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3549 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3550 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3551 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3552 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3553 	radeon_ring_write(ring, ref_and_mask);
3554 	radeon_ring_write(ring, ref_and_mask);
3555 	radeon_ring_write(ring, 0x20); /* poll interval */
3556 }
3557 
3558 /**
3559  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3560  *
3561  * @rdev: radeon_device pointer
3562  * @fence: radeon fence object
3563  *
3564  * Emits a fence sequnce number on the gfx ring and flushes
3565  * GPU caches.
3566  */
cik_fence_gfx_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3567 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3568 			     struct radeon_fence *fence)
3569 {
3570 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3571 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3572 
3573 	/* Workaround for cache flush problems. First send a dummy EOP
3574 	 * event down the pipe with seq one below.
3575 	 */
3576 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3577 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3578 				 EOP_TC_ACTION_EN |
3579 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3580 				 EVENT_INDEX(5)));
3581 	radeon_ring_write(ring, addr & 0xfffffffc);
3582 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3583 				DATA_SEL(1) | INT_SEL(0));
3584 	radeon_ring_write(ring, fence->seq - 1);
3585 	radeon_ring_write(ring, 0);
3586 
3587 	/* Then send the real EOP event down the pipe. */
3588 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3589 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3590 				 EOP_TC_ACTION_EN |
3591 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3592 				 EVENT_INDEX(5)));
3593 	radeon_ring_write(ring, addr & 0xfffffffc);
3594 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3595 	radeon_ring_write(ring, fence->seq);
3596 	radeon_ring_write(ring, 0);
3597 }
3598 
3599 /**
3600  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3601  *
3602  * @rdev: radeon_device pointer
3603  * @fence: radeon fence object
3604  *
3605  * Emits a fence sequnce number on the compute ring and flushes
3606  * GPU caches.
3607  */
cik_fence_compute_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3608 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3609 				 struct radeon_fence *fence)
3610 {
3611 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3612 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3613 
3614 	/* RELEASE_MEM - flush caches, send int */
3615 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3616 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3617 				 EOP_TC_ACTION_EN |
3618 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3619 				 EVENT_INDEX(5)));
3620 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3621 	radeon_ring_write(ring, addr & 0xfffffffc);
3622 	radeon_ring_write(ring, upper_32_bits(addr));
3623 	radeon_ring_write(ring, fence->seq);
3624 	radeon_ring_write(ring, 0);
3625 }
3626 
3627 /**
3628  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3629  *
3630  * @rdev: radeon_device pointer
3631  * @ring: radeon ring buffer object
3632  * @semaphore: radeon semaphore object
3633  * @emit_wait: Is this a sempahore wait?
3634  *
3635  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3636  * from running ahead of semaphore waits.
3637  */
cik_semaphore_ring_emit(struct radeon_device * rdev,struct radeon_ring * ring,struct radeon_semaphore * semaphore,bool emit_wait)3638 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3639 			     struct radeon_ring *ring,
3640 			     struct radeon_semaphore *semaphore,
3641 			     bool emit_wait)
3642 {
3643 	uint64_t addr = semaphore->gpu_addr;
3644 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3645 
3646 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3647 	radeon_ring_write(ring, lower_32_bits(addr));
3648 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3649 
3650 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3651 		/* Prevent the PFP from running ahead of the semaphore wait */
3652 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3653 		radeon_ring_write(ring, 0x0);
3654 	}
3655 
3656 	return true;
3657 }
3658 
3659 /**
3660  * cik_copy_cpdma - copy pages using the CP DMA engine
3661  *
3662  * @rdev: radeon_device pointer
3663  * @src_offset: src GPU address
3664  * @dst_offset: dst GPU address
3665  * @num_gpu_pages: number of GPU pages to xfer
3666  * @resv: reservation object to sync to
3667  *
3668  * Copy GPU paging using the CP DMA engine (CIK+).
3669  * Used by the radeon ttm implementation to move pages if
3670  * registered as the asic copy callback.
3671  */
cik_copy_cpdma(struct radeon_device * rdev,uint64_t src_offset,uint64_t dst_offset,unsigned num_gpu_pages,struct dma_resv * resv)3672 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3673 				    uint64_t src_offset, uint64_t dst_offset,
3674 				    unsigned num_gpu_pages,
3675 				    struct dma_resv *resv)
3676 {
3677 	struct radeon_fence *fence;
3678 	struct radeon_sync sync;
3679 	int ring_index = rdev->asic->copy.blit_ring_index;
3680 	struct radeon_ring *ring = &rdev->ring[ring_index];
3681 	u32 size_in_bytes, cur_size_in_bytes, control;
3682 	int i, num_loops;
3683 	int r = 0;
3684 
3685 	radeon_sync_create(&sync);
3686 
3687 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3688 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3689 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3690 	if (r) {
3691 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3692 		radeon_sync_free(rdev, &sync, NULL);
3693 		return ERR_PTR(r);
3694 	}
3695 
3696 	radeon_sync_resv(rdev, &sync, resv, false);
3697 	radeon_sync_rings(rdev, &sync, ring->idx);
3698 
3699 	for (i = 0; i < num_loops; i++) {
3700 		cur_size_in_bytes = size_in_bytes;
3701 		if (cur_size_in_bytes > 0x1fffff)
3702 			cur_size_in_bytes = 0x1fffff;
3703 		size_in_bytes -= cur_size_in_bytes;
3704 		control = 0;
3705 		if (size_in_bytes == 0)
3706 			control |= PACKET3_DMA_DATA_CP_SYNC;
3707 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3708 		radeon_ring_write(ring, control);
3709 		radeon_ring_write(ring, lower_32_bits(src_offset));
3710 		radeon_ring_write(ring, upper_32_bits(src_offset));
3711 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3712 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3713 		radeon_ring_write(ring, cur_size_in_bytes);
3714 		src_offset += cur_size_in_bytes;
3715 		dst_offset += cur_size_in_bytes;
3716 	}
3717 
3718 	r = radeon_fence_emit(rdev, &fence, ring->idx);
3719 	if (r) {
3720 		radeon_ring_unlock_undo(rdev, ring);
3721 		radeon_sync_free(rdev, &sync, NULL);
3722 		return ERR_PTR(r);
3723 	}
3724 
3725 	radeon_ring_unlock_commit(rdev, ring, false);
3726 	radeon_sync_free(rdev, &sync, fence);
3727 
3728 	return fence;
3729 }
3730 
3731 /*
3732  * IB stuff
3733  */
3734 /**
3735  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3736  *
3737  * @rdev: radeon_device pointer
3738  * @ib: radeon indirect buffer object
3739  *
3740  * Emits a DE (drawing engine) or CE (constant engine) IB
3741  * on the gfx ring.  IBs are usually generated by userspace
3742  * acceleration drivers and submitted to the kernel for
3743  * scheduling on the ring.  This function schedules the IB
3744  * on the gfx ring for execution by the GPU.
3745  */
cik_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)3746 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3747 {
3748 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3749 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3750 	u32 header, control = INDIRECT_BUFFER_VALID;
3751 
3752 	if (ib->is_const_ib) {
3753 		/* set switch buffer packet before const IB */
3754 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3755 		radeon_ring_write(ring, 0);
3756 
3757 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3758 	} else {
3759 		u32 next_rptr;
3760 		if (ring->rptr_save_reg) {
3761 			next_rptr = ring->wptr + 3 + 4;
3762 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3763 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3764 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3765 			radeon_ring_write(ring, next_rptr);
3766 		} else if (rdev->wb.enabled) {
3767 			next_rptr = ring->wptr + 5 + 4;
3768 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3769 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3770 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3771 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3772 			radeon_ring_write(ring, next_rptr);
3773 		}
3774 
3775 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3776 	}
3777 
3778 	control |= ib->length_dw | (vm_id << 24);
3779 
3780 	radeon_ring_write(ring, header);
3781 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3782 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3783 	radeon_ring_write(ring, control);
3784 }
3785 
3786 /**
3787  * cik_ib_test - basic gfx ring IB test
3788  *
3789  * @rdev: radeon_device pointer
3790  * @ring: radeon_ring structure holding ring information
3791  *
3792  * Allocate an IB and execute it on the gfx ring (CIK).
3793  * Provides a basic gfx ring test to verify that IBs are working.
3794  * Returns 0 on success, error on failure.
3795  */
cik_ib_test(struct radeon_device * rdev,struct radeon_ring * ring)3796 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3797 {
3798 	struct radeon_ib ib;
3799 	uint32_t scratch;
3800 	uint32_t tmp = 0;
3801 	unsigned i;
3802 	int r;
3803 
3804 	r = radeon_scratch_get(rdev, &scratch);
3805 	if (r) {
3806 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3807 		return r;
3808 	}
3809 	WREG32(scratch, 0xCAFEDEAD);
3810 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3811 	if (r) {
3812 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3813 		radeon_scratch_free(rdev, scratch);
3814 		return r;
3815 	}
3816 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3817 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3818 	ib.ptr[2] = 0xDEADBEEF;
3819 	ib.length_dw = 3;
3820 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
3821 	if (r) {
3822 		radeon_scratch_free(rdev, scratch);
3823 		radeon_ib_free(rdev, &ib);
3824 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3825 		return r;
3826 	}
3827 	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3828 		RADEON_USEC_IB_TEST_TIMEOUT));
3829 	if (r < 0) {
3830 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3831 		radeon_scratch_free(rdev, scratch);
3832 		radeon_ib_free(rdev, &ib);
3833 		return r;
3834 	} else if (r == 0) {
3835 		DRM_ERROR("radeon: fence wait timed out.\n");
3836 		radeon_scratch_free(rdev, scratch);
3837 		radeon_ib_free(rdev, &ib);
3838 		return -ETIMEDOUT;
3839 	}
3840 	r = 0;
3841 	for (i = 0; i < rdev->usec_timeout; i++) {
3842 		tmp = RREG32(scratch);
3843 		if (tmp == 0xDEADBEEF)
3844 			break;
3845 		udelay(1);
3846 	}
3847 	if (i < rdev->usec_timeout) {
3848 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3849 	} else {
3850 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3851 			  scratch, tmp);
3852 		r = -EINVAL;
3853 	}
3854 	radeon_scratch_free(rdev, scratch);
3855 	radeon_ib_free(rdev, &ib);
3856 	return r;
3857 }
3858 
3859 /*
3860  * CP.
3861  * On CIK, gfx and compute now have independant command processors.
3862  *
3863  * GFX
3864  * Gfx consists of a single ring and can process both gfx jobs and
3865  * compute jobs.  The gfx CP consists of three microengines (ME):
3866  * PFP - Pre-Fetch Parser
3867  * ME - Micro Engine
3868  * CE - Constant Engine
3869  * The PFP and ME make up what is considered the Drawing Engine (DE).
3870  * The CE is an asynchronous engine used for updating buffer desciptors
3871  * used by the DE so that they can be loaded into cache in parallel
3872  * while the DE is processing state update packets.
3873  *
3874  * Compute
3875  * The compute CP consists of two microengines (ME):
3876  * MEC1 - Compute MicroEngine 1
3877  * MEC2 - Compute MicroEngine 2
3878  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3879  * The queues are exposed to userspace and are programmed directly
3880  * by the compute runtime.
3881  */
3882 /**
3883  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3884  *
3885  * @rdev: radeon_device pointer
3886  * @enable: enable or disable the MEs
3887  *
3888  * Halts or unhalts the gfx MEs.
3889  */
cik_cp_gfx_enable(struct radeon_device * rdev,bool enable)3890 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3891 {
3892 	if (enable)
3893 		WREG32(CP_ME_CNTL, 0);
3894 	else {
3895 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3896 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3897 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3898 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3899 	}
3900 	udelay(50);
3901 }
3902 
3903 /**
3904  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3905  *
3906  * @rdev: radeon_device pointer
3907  *
3908  * Loads the gfx PFP, ME, and CE ucode.
3909  * Returns 0 for success, -EINVAL if the ucode is not available.
3910  */
cik_cp_gfx_load_microcode(struct radeon_device * rdev)3911 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3912 {
3913 	int i;
3914 
3915 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3916 		return -EINVAL;
3917 
3918 	cik_cp_gfx_enable(rdev, false);
3919 
3920 	if (rdev->new_fw) {
3921 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3922 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3923 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3924 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3925 		const struct gfx_firmware_header_v1_0 *me_hdr =
3926 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3927 		const __le32 *fw_data;
3928 		u32 fw_size;
3929 
3930 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3931 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3932 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3933 
3934 		/* PFP */
3935 		fw_data = (const __le32 *)
3936 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3937 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3938 		WREG32(CP_PFP_UCODE_ADDR, 0);
3939 		for (i = 0; i < fw_size; i++)
3940 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3941 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3942 
3943 		/* CE */
3944 		fw_data = (const __le32 *)
3945 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3946 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3947 		WREG32(CP_CE_UCODE_ADDR, 0);
3948 		for (i = 0; i < fw_size; i++)
3949 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3950 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3951 
3952 		/* ME */
3953 		fw_data = (const __be32 *)
3954 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3955 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3956 		WREG32(CP_ME_RAM_WADDR, 0);
3957 		for (i = 0; i < fw_size; i++)
3958 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3959 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3960 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3961 	} else {
3962 		const __be32 *fw_data;
3963 
3964 		/* PFP */
3965 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3966 		WREG32(CP_PFP_UCODE_ADDR, 0);
3967 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3968 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3969 		WREG32(CP_PFP_UCODE_ADDR, 0);
3970 
3971 		/* CE */
3972 		fw_data = (const __be32 *)rdev->ce_fw->data;
3973 		WREG32(CP_CE_UCODE_ADDR, 0);
3974 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3975 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3976 		WREG32(CP_CE_UCODE_ADDR, 0);
3977 
3978 		/* ME */
3979 		fw_data = (const __be32 *)rdev->me_fw->data;
3980 		WREG32(CP_ME_RAM_WADDR, 0);
3981 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3982 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3983 		WREG32(CP_ME_RAM_WADDR, 0);
3984 	}
3985 
3986 	return 0;
3987 }
3988 
3989 /**
3990  * cik_cp_gfx_start - start the gfx ring
3991  *
3992  * @rdev: radeon_device pointer
3993  *
3994  * Enables the ring and loads the clear state context and other
3995  * packets required to init the ring.
3996  * Returns 0 for success, error for failure.
3997  */
cik_cp_gfx_start(struct radeon_device * rdev)3998 static int cik_cp_gfx_start(struct radeon_device *rdev)
3999 {
4000 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4001 	int r, i;
4002 
4003 	/* init the CP */
4004 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4005 	WREG32(CP_ENDIAN_SWAP, 0);
4006 	WREG32(CP_DEVICE_ID, 1);
4007 
4008 	cik_cp_gfx_enable(rdev, true);
4009 
4010 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4011 	if (r) {
4012 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4013 		return r;
4014 	}
4015 
4016 	/* init the CE partitions.  CE only used for gfx on CIK */
4017 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4018 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4019 	radeon_ring_write(ring, 0x8000);
4020 	radeon_ring_write(ring, 0x8000);
4021 
4022 	/* setup clear context state */
4023 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4024 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4025 
4026 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4027 	radeon_ring_write(ring, 0x80000000);
4028 	radeon_ring_write(ring, 0x80000000);
4029 
4030 	for (i = 0; i < cik_default_size; i++)
4031 		radeon_ring_write(ring, cik_default_state[i]);
4032 
4033 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4034 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4035 
4036 	/* set clear context state */
4037 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4038 	radeon_ring_write(ring, 0);
4039 
4040 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4041 	radeon_ring_write(ring, 0x00000316);
4042 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4043 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4044 
4045 	radeon_ring_unlock_commit(rdev, ring, false);
4046 
4047 	return 0;
4048 }
4049 
4050 /**
4051  * cik_cp_gfx_fini - stop the gfx ring
4052  *
4053  * @rdev: radeon_device pointer
4054  *
4055  * Stop the gfx ring and tear down the driver ring
4056  * info.
4057  */
cik_cp_gfx_fini(struct radeon_device * rdev)4058 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4059 {
4060 	cik_cp_gfx_enable(rdev, false);
4061 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4062 }
4063 
4064 /**
4065  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4066  *
4067  * @rdev: radeon_device pointer
4068  *
4069  * Program the location and size of the gfx ring buffer
4070  * and test it to make sure it's working.
4071  * Returns 0 for success, error for failure.
4072  */
cik_cp_gfx_resume(struct radeon_device * rdev)4073 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4074 {
4075 	struct radeon_ring *ring;
4076 	u32 tmp;
4077 	u32 rb_bufsz;
4078 	u64 rb_addr;
4079 	int r;
4080 
4081 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4082 	if (rdev->family != CHIP_HAWAII)
4083 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4084 
4085 	/* Set the write pointer delay */
4086 	WREG32(CP_RB_WPTR_DELAY, 0);
4087 
4088 	/* set the RB to use vmid 0 */
4089 	WREG32(CP_RB_VMID, 0);
4090 
4091 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4092 
4093 	/* ring 0 - compute and gfx */
4094 	/* Set ring buffer size */
4095 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4096 	rb_bufsz = order_base_2(ring->ring_size / 8);
4097 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4098 #ifdef __BIG_ENDIAN
4099 	tmp |= BUF_SWAP_32BIT;
4100 #endif
4101 	WREG32(CP_RB0_CNTL, tmp);
4102 
4103 	/* Initialize the ring buffer's read and write pointers */
4104 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4105 	ring->wptr = 0;
4106 	WREG32(CP_RB0_WPTR, ring->wptr);
4107 
4108 	/* set the wb address wether it's enabled or not */
4109 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4110 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4111 
4112 	/* scratch register shadowing is no longer supported */
4113 	WREG32(SCRATCH_UMSK, 0);
4114 
4115 	if (!rdev->wb.enabled)
4116 		tmp |= RB_NO_UPDATE;
4117 
4118 	mdelay(1);
4119 	WREG32(CP_RB0_CNTL, tmp);
4120 
4121 	rb_addr = ring->gpu_addr >> 8;
4122 	WREG32(CP_RB0_BASE, rb_addr);
4123 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4124 
4125 	/* start the ring */
4126 	cik_cp_gfx_start(rdev);
4127 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4128 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4129 	if (r) {
4130 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4131 		return r;
4132 	}
4133 
4134 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4135 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4136 
4137 	return 0;
4138 }
4139 
cik_gfx_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4140 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4141 		     struct radeon_ring *ring)
4142 {
4143 	u32 rptr;
4144 
4145 	if (rdev->wb.enabled)
4146 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4147 	else
4148 		rptr = RREG32(CP_RB0_RPTR);
4149 
4150 	return rptr;
4151 }
4152 
cik_gfx_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4153 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4154 		     struct radeon_ring *ring)
4155 {
4156 	return RREG32(CP_RB0_WPTR);
4157 }
4158 
cik_gfx_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4159 void cik_gfx_set_wptr(struct radeon_device *rdev,
4160 		      struct radeon_ring *ring)
4161 {
4162 	WREG32(CP_RB0_WPTR, ring->wptr);
4163 	(void)RREG32(CP_RB0_WPTR);
4164 }
4165 
cik_compute_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4166 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4167 			 struct radeon_ring *ring)
4168 {
4169 	u32 rptr;
4170 
4171 	if (rdev->wb.enabled) {
4172 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4173 	} else {
4174 		mutex_lock(&rdev->srbm_mutex);
4175 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4176 		rptr = RREG32(CP_HQD_PQ_RPTR);
4177 		cik_srbm_select(rdev, 0, 0, 0, 0);
4178 		mutex_unlock(&rdev->srbm_mutex);
4179 	}
4180 
4181 	return rptr;
4182 }
4183 
cik_compute_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4184 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4185 			 struct radeon_ring *ring)
4186 {
4187 	u32 wptr;
4188 
4189 	if (rdev->wb.enabled) {
4190 		/* XXX check if swapping is necessary on BE */
4191 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4192 	} else {
4193 		mutex_lock(&rdev->srbm_mutex);
4194 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4195 		wptr = RREG32(CP_HQD_PQ_WPTR);
4196 		cik_srbm_select(rdev, 0, 0, 0, 0);
4197 		mutex_unlock(&rdev->srbm_mutex);
4198 	}
4199 
4200 	return wptr;
4201 }
4202 
cik_compute_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4203 void cik_compute_set_wptr(struct radeon_device *rdev,
4204 			  struct radeon_ring *ring)
4205 {
4206 	/* XXX check if swapping is necessary on BE */
4207 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4208 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4209 }
4210 
cik_compute_stop(struct radeon_device * rdev,struct radeon_ring * ring)4211 static void cik_compute_stop(struct radeon_device *rdev,
4212 			     struct radeon_ring *ring)
4213 {
4214 	u32 j, tmp;
4215 
4216 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4217 	/* Disable wptr polling. */
4218 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4219 	tmp &= ~WPTR_POLL_EN;
4220 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4221 	/* Disable HQD. */
4222 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4223 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4224 		for (j = 0; j < rdev->usec_timeout; j++) {
4225 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4226 				break;
4227 			udelay(1);
4228 		}
4229 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4230 		WREG32(CP_HQD_PQ_RPTR, 0);
4231 		WREG32(CP_HQD_PQ_WPTR, 0);
4232 	}
4233 	cik_srbm_select(rdev, 0, 0, 0, 0);
4234 }
4235 
4236 /**
4237  * cik_cp_compute_enable - enable/disable the compute CP MEs
4238  *
4239  * @rdev: radeon_device pointer
4240  * @enable: enable or disable the MEs
4241  *
4242  * Halts or unhalts the compute MEs.
4243  */
cik_cp_compute_enable(struct radeon_device * rdev,bool enable)4244 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4245 {
4246 	if (enable)
4247 		WREG32(CP_MEC_CNTL, 0);
4248 	else {
4249 		/*
4250 		 * To make hibernation reliable we need to clear compute ring
4251 		 * configuration before halting the compute ring.
4252 		 */
4253 		mutex_lock(&rdev->srbm_mutex);
4254 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4255 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4256 		mutex_unlock(&rdev->srbm_mutex);
4257 
4258 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4259 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4260 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4261 	}
4262 	udelay(50);
4263 }
4264 
4265 /**
4266  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4267  *
4268  * @rdev: radeon_device pointer
4269  *
4270  * Loads the compute MEC1&2 ucode.
4271  * Returns 0 for success, -EINVAL if the ucode is not available.
4272  */
cik_cp_compute_load_microcode(struct radeon_device * rdev)4273 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4274 {
4275 	int i;
4276 
4277 	if (!rdev->mec_fw)
4278 		return -EINVAL;
4279 
4280 	cik_cp_compute_enable(rdev, false);
4281 
4282 	if (rdev->new_fw) {
4283 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4284 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4285 		const __le32 *fw_data;
4286 		u32 fw_size;
4287 
4288 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4289 
4290 		/* MEC1 */
4291 		fw_data = (const __le32 *)
4292 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4293 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4294 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4295 		for (i = 0; i < fw_size; i++)
4296 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4297 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4298 
4299 		/* MEC2 */
4300 		if (rdev->family == CHIP_KAVERI) {
4301 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4302 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4303 
4304 			fw_data = (const __le32 *)
4305 				(rdev->mec2_fw->data +
4306 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4307 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4308 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4309 			for (i = 0; i < fw_size; i++)
4310 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4311 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4312 		}
4313 	} else {
4314 		const __be32 *fw_data;
4315 
4316 		/* MEC1 */
4317 		fw_data = (const __be32 *)rdev->mec_fw->data;
4318 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4319 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4320 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4321 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4322 
4323 		if (rdev->family == CHIP_KAVERI) {
4324 			/* MEC2 */
4325 			fw_data = (const __be32 *)rdev->mec_fw->data;
4326 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4327 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4328 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4329 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4330 		}
4331 	}
4332 
4333 	return 0;
4334 }
4335 
4336 /**
4337  * cik_cp_compute_start - start the compute queues
4338  *
4339  * @rdev: radeon_device pointer
4340  *
4341  * Enable the compute queues.
4342  * Returns 0 for success, error for failure.
4343  */
cik_cp_compute_start(struct radeon_device * rdev)4344 static int cik_cp_compute_start(struct radeon_device *rdev)
4345 {
4346 	cik_cp_compute_enable(rdev, true);
4347 
4348 	return 0;
4349 }
4350 
4351 /**
4352  * cik_cp_compute_fini - stop the compute queues
4353  *
4354  * @rdev: radeon_device pointer
4355  *
4356  * Stop the compute queues and tear down the driver queue
4357  * info.
4358  */
cik_cp_compute_fini(struct radeon_device * rdev)4359 static void cik_cp_compute_fini(struct radeon_device *rdev)
4360 {
4361 	int i, idx, r;
4362 
4363 	cik_cp_compute_enable(rdev, false);
4364 
4365 	for (i = 0; i < 2; i++) {
4366 		if (i == 0)
4367 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4368 		else
4369 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4370 
4371 		if (rdev->ring[idx].mqd_obj) {
4372 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4373 			if (unlikely(r != 0))
4374 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4375 
4376 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4377 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4378 
4379 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4380 			rdev->ring[idx].mqd_obj = NULL;
4381 		}
4382 	}
4383 }
4384 
cik_mec_fini(struct radeon_device * rdev)4385 static void cik_mec_fini(struct radeon_device *rdev)
4386 {
4387 	int r;
4388 
4389 	if (rdev->mec.hpd_eop_obj) {
4390 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4391 		if (unlikely(r != 0))
4392 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4393 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4394 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4395 
4396 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4397 		rdev->mec.hpd_eop_obj = NULL;
4398 	}
4399 }
4400 
4401 #define MEC_HPD_SIZE 2048
4402 
cik_mec_init(struct radeon_device * rdev)4403 static int cik_mec_init(struct radeon_device *rdev)
4404 {
4405 	int r;
4406 	u32 *hpd;
4407 
4408 	/*
4409 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4410 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4411 	 */
4412 	if (rdev->family == CHIP_KAVERI)
4413 		rdev->mec.num_mec = 2;
4414 	else
4415 		rdev->mec.num_mec = 1;
4416 	rdev->mec.num_pipe = 4;
4417 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4418 
4419 	if (rdev->mec.hpd_eop_obj == NULL) {
4420 		r = radeon_bo_create(rdev,
4421 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4422 				     PAGE_SIZE, true,
4423 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4424 				     &rdev->mec.hpd_eop_obj);
4425 		if (r) {
4426 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4427 			return r;
4428 		}
4429 	}
4430 
4431 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4432 	if (unlikely(r != 0)) {
4433 		cik_mec_fini(rdev);
4434 		return r;
4435 	}
4436 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4437 			  &rdev->mec.hpd_eop_gpu_addr);
4438 	if (r) {
4439 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4440 		cik_mec_fini(rdev);
4441 		return r;
4442 	}
4443 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4444 	if (r) {
4445 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4446 		cik_mec_fini(rdev);
4447 		return r;
4448 	}
4449 
4450 	/* clear memory.  Not sure if this is required or not */
4451 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4452 
4453 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4454 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4455 
4456 	return 0;
4457 }
4458 
4459 struct hqd_registers
4460 {
4461 	u32 cp_mqd_base_addr;
4462 	u32 cp_mqd_base_addr_hi;
4463 	u32 cp_hqd_active;
4464 	u32 cp_hqd_vmid;
4465 	u32 cp_hqd_persistent_state;
4466 	u32 cp_hqd_pipe_priority;
4467 	u32 cp_hqd_queue_priority;
4468 	u32 cp_hqd_quantum;
4469 	u32 cp_hqd_pq_base;
4470 	u32 cp_hqd_pq_base_hi;
4471 	u32 cp_hqd_pq_rptr;
4472 	u32 cp_hqd_pq_rptr_report_addr;
4473 	u32 cp_hqd_pq_rptr_report_addr_hi;
4474 	u32 cp_hqd_pq_wptr_poll_addr;
4475 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4476 	u32 cp_hqd_pq_doorbell_control;
4477 	u32 cp_hqd_pq_wptr;
4478 	u32 cp_hqd_pq_control;
4479 	u32 cp_hqd_ib_base_addr;
4480 	u32 cp_hqd_ib_base_addr_hi;
4481 	u32 cp_hqd_ib_rptr;
4482 	u32 cp_hqd_ib_control;
4483 	u32 cp_hqd_iq_timer;
4484 	u32 cp_hqd_iq_rptr;
4485 	u32 cp_hqd_dequeue_request;
4486 	u32 cp_hqd_dma_offload;
4487 	u32 cp_hqd_sema_cmd;
4488 	u32 cp_hqd_msg_type;
4489 	u32 cp_hqd_atomic0_preop_lo;
4490 	u32 cp_hqd_atomic0_preop_hi;
4491 	u32 cp_hqd_atomic1_preop_lo;
4492 	u32 cp_hqd_atomic1_preop_hi;
4493 	u32 cp_hqd_hq_scheduler0;
4494 	u32 cp_hqd_hq_scheduler1;
4495 	u32 cp_mqd_control;
4496 };
4497 
4498 struct bonaire_mqd
4499 {
4500 	u32 header;
4501 	u32 dispatch_initiator;
4502 	u32 dimensions[3];
4503 	u32 start_idx[3];
4504 	u32 num_threads[3];
4505 	u32 pipeline_stat_enable;
4506 	u32 perf_counter_enable;
4507 	u32 pgm[2];
4508 	u32 tba[2];
4509 	u32 tma[2];
4510 	u32 pgm_rsrc[2];
4511 	u32 vmid;
4512 	u32 resource_limits;
4513 	u32 static_thread_mgmt01[2];
4514 	u32 tmp_ring_size;
4515 	u32 static_thread_mgmt23[2];
4516 	u32 restart[3];
4517 	u32 thread_trace_enable;
4518 	u32 reserved1;
4519 	u32 user_data[16];
4520 	u32 vgtcs_invoke_count[2];
4521 	struct hqd_registers queue_state;
4522 	u32 dequeue_cntr;
4523 	u32 interrupt_queue[64];
4524 };
4525 
4526 /**
4527  * cik_cp_compute_resume - setup the compute queue registers
4528  *
4529  * @rdev: radeon_device pointer
4530  *
4531  * Program the compute queues and test them to make sure they
4532  * are working.
4533  * Returns 0 for success, error for failure.
4534  */
cik_cp_compute_resume(struct radeon_device * rdev)4535 static int cik_cp_compute_resume(struct radeon_device *rdev)
4536 {
4537 	int r, i, j, idx;
4538 	u32 tmp;
4539 	bool use_doorbell = true;
4540 	u64 hqd_gpu_addr;
4541 	u64 mqd_gpu_addr;
4542 	u64 eop_gpu_addr;
4543 	u64 wb_gpu_addr;
4544 	u32 *buf;
4545 	struct bonaire_mqd *mqd;
4546 
4547 	r = cik_cp_compute_start(rdev);
4548 	if (r)
4549 		return r;
4550 
4551 	/* fix up chicken bits */
4552 	tmp = RREG32(CP_CPF_DEBUG);
4553 	tmp |= (1 << 23);
4554 	WREG32(CP_CPF_DEBUG, tmp);
4555 
4556 	/* init the pipes */
4557 	mutex_lock(&rdev->srbm_mutex);
4558 
4559 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4560 		int me = (i < 4) ? 1 : 2;
4561 		int pipe = (i < 4) ? i : (i - 4);
4562 
4563 		cik_srbm_select(rdev, me, pipe, 0, 0);
4564 
4565 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4566 		/* write the EOP addr */
4567 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4568 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4569 
4570 		/* set the VMID assigned */
4571 		WREG32(CP_HPD_EOP_VMID, 0);
4572 
4573 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4574 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4575 		tmp &= ~EOP_SIZE_MASK;
4576 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4577 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4578 
4579 	}
4580 	cik_srbm_select(rdev, 0, 0, 0, 0);
4581 	mutex_unlock(&rdev->srbm_mutex);
4582 
4583 	/* init the queues.  Just two for now. */
4584 	for (i = 0; i < 2; i++) {
4585 		if (i == 0)
4586 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4587 		else
4588 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4589 
4590 		if (rdev->ring[idx].mqd_obj == NULL) {
4591 			r = radeon_bo_create(rdev,
4592 					     sizeof(struct bonaire_mqd),
4593 					     PAGE_SIZE, true,
4594 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4595 					     NULL, &rdev->ring[idx].mqd_obj);
4596 			if (r) {
4597 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4598 				return r;
4599 			}
4600 		}
4601 
4602 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4603 		if (unlikely(r != 0)) {
4604 			cik_cp_compute_fini(rdev);
4605 			return r;
4606 		}
4607 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4608 				  &mqd_gpu_addr);
4609 		if (r) {
4610 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4611 			cik_cp_compute_fini(rdev);
4612 			return r;
4613 		}
4614 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4615 		if (r) {
4616 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4617 			cik_cp_compute_fini(rdev);
4618 			return r;
4619 		}
4620 
4621 		/* init the mqd struct */
4622 		memset(buf, 0, sizeof(struct bonaire_mqd));
4623 
4624 		mqd = (struct bonaire_mqd *)buf;
4625 		mqd->header = 0xC0310800;
4626 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4627 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4628 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4629 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4630 
4631 		mutex_lock(&rdev->srbm_mutex);
4632 		cik_srbm_select(rdev, rdev->ring[idx].me,
4633 				rdev->ring[idx].pipe,
4634 				rdev->ring[idx].queue, 0);
4635 
4636 		/* disable wptr polling */
4637 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4638 		tmp &= ~WPTR_POLL_EN;
4639 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4640 
4641 		/* enable doorbell? */
4642 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4643 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4644 		if (use_doorbell)
4645 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4646 		else
4647 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4648 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4649 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4650 
4651 		/* disable the queue if it's active */
4652 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4653 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4654 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4655 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4656 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4657 			for (j = 0; j < rdev->usec_timeout; j++) {
4658 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4659 					break;
4660 				udelay(1);
4661 			}
4662 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4663 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4664 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4665 		}
4666 
4667 		/* set the pointer to the MQD */
4668 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4669 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4670 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4671 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4672 		/* set MQD vmid to 0 */
4673 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4674 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4675 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4676 
4677 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4678 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4679 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4680 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4681 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4682 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4683 
4684 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4685 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4686 		mqd->queue_state.cp_hqd_pq_control &=
4687 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4688 
4689 		mqd->queue_state.cp_hqd_pq_control |=
4690 			order_base_2(rdev->ring[idx].ring_size / 8);
4691 		mqd->queue_state.cp_hqd_pq_control |=
4692 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4693 #ifdef __BIG_ENDIAN
4694 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4695 #endif
4696 		mqd->queue_state.cp_hqd_pq_control &=
4697 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4698 		mqd->queue_state.cp_hqd_pq_control |=
4699 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4700 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4701 
4702 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4703 		if (i == 0)
4704 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4705 		else
4706 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4707 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4708 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4709 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4710 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4711 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4712 
4713 		/* set the wb address wether it's enabled or not */
4714 		if (i == 0)
4715 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4716 		else
4717 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4718 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4719 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4720 			upper_32_bits(wb_gpu_addr) & 0xffff;
4721 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4722 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4723 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4724 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4725 
4726 		/* enable the doorbell if requested */
4727 		if (use_doorbell) {
4728 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4729 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4730 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4731 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4732 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4733 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4734 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4735 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4736 
4737 		} else {
4738 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4739 		}
4740 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4741 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4742 
4743 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4744 		rdev->ring[idx].wptr = 0;
4745 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4746 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4747 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4748 
4749 		/* set the vmid for the queue */
4750 		mqd->queue_state.cp_hqd_vmid = 0;
4751 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4752 
4753 		/* activate the queue */
4754 		mqd->queue_state.cp_hqd_active = 1;
4755 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4756 
4757 		cik_srbm_select(rdev, 0, 0, 0, 0);
4758 		mutex_unlock(&rdev->srbm_mutex);
4759 
4760 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4761 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4762 
4763 		rdev->ring[idx].ready = true;
4764 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4765 		if (r)
4766 			rdev->ring[idx].ready = false;
4767 	}
4768 
4769 	return 0;
4770 }
4771 
cik_cp_enable(struct radeon_device * rdev,bool enable)4772 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4773 {
4774 	cik_cp_gfx_enable(rdev, enable);
4775 	cik_cp_compute_enable(rdev, enable);
4776 }
4777 
cik_cp_load_microcode(struct radeon_device * rdev)4778 static int cik_cp_load_microcode(struct radeon_device *rdev)
4779 {
4780 	int r;
4781 
4782 	r = cik_cp_gfx_load_microcode(rdev);
4783 	if (r)
4784 		return r;
4785 	r = cik_cp_compute_load_microcode(rdev);
4786 	if (r)
4787 		return r;
4788 
4789 	return 0;
4790 }
4791 
cik_cp_fini(struct radeon_device * rdev)4792 static void cik_cp_fini(struct radeon_device *rdev)
4793 {
4794 	cik_cp_gfx_fini(rdev);
4795 	cik_cp_compute_fini(rdev);
4796 }
4797 
cik_cp_resume(struct radeon_device * rdev)4798 static int cik_cp_resume(struct radeon_device *rdev)
4799 {
4800 	int r;
4801 
4802 	cik_enable_gui_idle_interrupt(rdev, false);
4803 
4804 	r = cik_cp_load_microcode(rdev);
4805 	if (r)
4806 		return r;
4807 
4808 	r = cik_cp_gfx_resume(rdev);
4809 	if (r)
4810 		return r;
4811 	r = cik_cp_compute_resume(rdev);
4812 	if (r)
4813 		return r;
4814 
4815 	cik_enable_gui_idle_interrupt(rdev, true);
4816 
4817 	return 0;
4818 }
4819 
cik_print_gpu_status_regs(struct radeon_device * rdev)4820 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4821 {
4822 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4823 		RREG32(GRBM_STATUS));
4824 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4825 		RREG32(GRBM_STATUS2));
4826 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4827 		RREG32(GRBM_STATUS_SE0));
4828 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4829 		RREG32(GRBM_STATUS_SE1));
4830 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4831 		RREG32(GRBM_STATUS_SE2));
4832 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4833 		RREG32(GRBM_STATUS_SE3));
4834 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4835 		RREG32(SRBM_STATUS));
4836 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4837 		RREG32(SRBM_STATUS2));
4838 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4839 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4840 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4841 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4842 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4843 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4844 		 RREG32(CP_STALLED_STAT1));
4845 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4846 		 RREG32(CP_STALLED_STAT2));
4847 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4848 		 RREG32(CP_STALLED_STAT3));
4849 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4850 		 RREG32(CP_CPF_BUSY_STAT));
4851 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4852 		 RREG32(CP_CPF_STALLED_STAT1));
4853 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4854 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4855 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4856 		 RREG32(CP_CPC_STALLED_STAT1));
4857 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4858 }
4859 
4860 /**
4861  * cik_gpu_check_soft_reset - check which blocks are busy
4862  *
4863  * @rdev: radeon_device pointer
4864  *
4865  * Check which blocks are busy and return the relevant reset
4866  * mask to be used by cik_gpu_soft_reset().
4867  * Returns a mask of the blocks to be reset.
4868  */
cik_gpu_check_soft_reset(struct radeon_device * rdev)4869 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4870 {
4871 	u32 reset_mask = 0;
4872 	u32 tmp;
4873 
4874 	/* GRBM_STATUS */
4875 	tmp = RREG32(GRBM_STATUS);
4876 	if (tmp & (PA_BUSY | SC_BUSY |
4877 		   BCI_BUSY | SX_BUSY |
4878 		   TA_BUSY | VGT_BUSY |
4879 		   DB_BUSY | CB_BUSY |
4880 		   GDS_BUSY | SPI_BUSY |
4881 		   IA_BUSY | IA_BUSY_NO_DMA))
4882 		reset_mask |= RADEON_RESET_GFX;
4883 
4884 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4885 		reset_mask |= RADEON_RESET_CP;
4886 
4887 	/* GRBM_STATUS2 */
4888 	tmp = RREG32(GRBM_STATUS2);
4889 	if (tmp & RLC_BUSY)
4890 		reset_mask |= RADEON_RESET_RLC;
4891 
4892 	/* SDMA0_STATUS_REG */
4893 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4894 	if (!(tmp & SDMA_IDLE))
4895 		reset_mask |= RADEON_RESET_DMA;
4896 
4897 	/* SDMA1_STATUS_REG */
4898 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4899 	if (!(tmp & SDMA_IDLE))
4900 		reset_mask |= RADEON_RESET_DMA1;
4901 
4902 	/* SRBM_STATUS2 */
4903 	tmp = RREG32(SRBM_STATUS2);
4904 	if (tmp & SDMA_BUSY)
4905 		reset_mask |= RADEON_RESET_DMA;
4906 
4907 	if (tmp & SDMA1_BUSY)
4908 		reset_mask |= RADEON_RESET_DMA1;
4909 
4910 	/* SRBM_STATUS */
4911 	tmp = RREG32(SRBM_STATUS);
4912 
4913 	if (tmp & IH_BUSY)
4914 		reset_mask |= RADEON_RESET_IH;
4915 
4916 	if (tmp & SEM_BUSY)
4917 		reset_mask |= RADEON_RESET_SEM;
4918 
4919 	if (tmp & GRBM_RQ_PENDING)
4920 		reset_mask |= RADEON_RESET_GRBM;
4921 
4922 	if (tmp & VMC_BUSY)
4923 		reset_mask |= RADEON_RESET_VMC;
4924 
4925 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4926 		   MCC_BUSY | MCD_BUSY))
4927 		reset_mask |= RADEON_RESET_MC;
4928 
4929 	if (evergreen_is_display_hung(rdev))
4930 		reset_mask |= RADEON_RESET_DISPLAY;
4931 
4932 	/* Skip MC reset as it's mostly likely not hung, just busy */
4933 	if (reset_mask & RADEON_RESET_MC) {
4934 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4935 		reset_mask &= ~RADEON_RESET_MC;
4936 	}
4937 
4938 	return reset_mask;
4939 }
4940 
4941 /**
4942  * cik_gpu_soft_reset - soft reset GPU
4943  *
4944  * @rdev: radeon_device pointer
4945  * @reset_mask: mask of which blocks to reset
4946  *
4947  * Soft reset the blocks specified in @reset_mask.
4948  */
cik_gpu_soft_reset(struct radeon_device * rdev,u32 reset_mask)4949 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4950 {
4951 	struct evergreen_mc_save save;
4952 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4953 	u32 tmp;
4954 
4955 	if (reset_mask == 0)
4956 		return;
4957 
4958 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4959 
4960 	cik_print_gpu_status_regs(rdev);
4961 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4962 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4963 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4964 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4965 
4966 	/* disable CG/PG */
4967 	cik_fini_pg(rdev);
4968 	cik_fini_cg(rdev);
4969 
4970 	/* stop the rlc */
4971 	cik_rlc_stop(rdev);
4972 
4973 	/* Disable GFX parsing/prefetching */
4974 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4975 
4976 	/* Disable MEC parsing/prefetching */
4977 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4978 
4979 	if (reset_mask & RADEON_RESET_DMA) {
4980 		/* sdma0 */
4981 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4982 		tmp |= SDMA_HALT;
4983 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4984 	}
4985 	if (reset_mask & RADEON_RESET_DMA1) {
4986 		/* sdma1 */
4987 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4988 		tmp |= SDMA_HALT;
4989 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4990 	}
4991 
4992 	evergreen_mc_stop(rdev, &save);
4993 	if (evergreen_mc_wait_for_idle(rdev)) {
4994 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4995 	}
4996 
4997 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4998 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4999 
5000 	if (reset_mask & RADEON_RESET_CP) {
5001 		grbm_soft_reset |= SOFT_RESET_CP;
5002 
5003 		srbm_soft_reset |= SOFT_RESET_GRBM;
5004 	}
5005 
5006 	if (reset_mask & RADEON_RESET_DMA)
5007 		srbm_soft_reset |= SOFT_RESET_SDMA;
5008 
5009 	if (reset_mask & RADEON_RESET_DMA1)
5010 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5011 
5012 	if (reset_mask & RADEON_RESET_DISPLAY)
5013 		srbm_soft_reset |= SOFT_RESET_DC;
5014 
5015 	if (reset_mask & RADEON_RESET_RLC)
5016 		grbm_soft_reset |= SOFT_RESET_RLC;
5017 
5018 	if (reset_mask & RADEON_RESET_SEM)
5019 		srbm_soft_reset |= SOFT_RESET_SEM;
5020 
5021 	if (reset_mask & RADEON_RESET_IH)
5022 		srbm_soft_reset |= SOFT_RESET_IH;
5023 
5024 	if (reset_mask & RADEON_RESET_GRBM)
5025 		srbm_soft_reset |= SOFT_RESET_GRBM;
5026 
5027 	if (reset_mask & RADEON_RESET_VMC)
5028 		srbm_soft_reset |= SOFT_RESET_VMC;
5029 
5030 	if (!(rdev->flags & RADEON_IS_IGP)) {
5031 		if (reset_mask & RADEON_RESET_MC)
5032 			srbm_soft_reset |= SOFT_RESET_MC;
5033 	}
5034 
5035 	if (grbm_soft_reset) {
5036 		tmp = RREG32(GRBM_SOFT_RESET);
5037 		tmp |= grbm_soft_reset;
5038 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5039 		WREG32(GRBM_SOFT_RESET, tmp);
5040 		tmp = RREG32(GRBM_SOFT_RESET);
5041 
5042 		udelay(50);
5043 
5044 		tmp &= ~grbm_soft_reset;
5045 		WREG32(GRBM_SOFT_RESET, tmp);
5046 		tmp = RREG32(GRBM_SOFT_RESET);
5047 	}
5048 
5049 	if (srbm_soft_reset) {
5050 		tmp = RREG32(SRBM_SOFT_RESET);
5051 		tmp |= srbm_soft_reset;
5052 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5053 		WREG32(SRBM_SOFT_RESET, tmp);
5054 		tmp = RREG32(SRBM_SOFT_RESET);
5055 
5056 		udelay(50);
5057 
5058 		tmp &= ~srbm_soft_reset;
5059 		WREG32(SRBM_SOFT_RESET, tmp);
5060 		tmp = RREG32(SRBM_SOFT_RESET);
5061 	}
5062 
5063 	/* Wait a little for things to settle down */
5064 	udelay(50);
5065 
5066 	evergreen_mc_resume(rdev, &save);
5067 	udelay(50);
5068 
5069 	cik_print_gpu_status_regs(rdev);
5070 }
5071 
5072 struct kv_reset_save_regs {
5073 	u32 gmcon_reng_execute;
5074 	u32 gmcon_misc;
5075 	u32 gmcon_misc3;
5076 };
5077 
kv_save_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5078 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5079 				   struct kv_reset_save_regs *save)
5080 {
5081 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5082 	save->gmcon_misc = RREG32(GMCON_MISC);
5083 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5084 
5085 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5086 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5087 						STCTRL_STUTTER_EN));
5088 }
5089 
kv_restore_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5090 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5091 				      struct kv_reset_save_regs *save)
5092 {
5093 	int i;
5094 
5095 	WREG32(GMCON_PGFSM_WRITE, 0);
5096 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5097 
5098 	for (i = 0; i < 5; i++)
5099 		WREG32(GMCON_PGFSM_WRITE, 0);
5100 
5101 	WREG32(GMCON_PGFSM_WRITE, 0);
5102 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5103 
5104 	for (i = 0; i < 5; i++)
5105 		WREG32(GMCON_PGFSM_WRITE, 0);
5106 
5107 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5108 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5109 
5110 	for (i = 0; i < 5; i++)
5111 		WREG32(GMCON_PGFSM_WRITE, 0);
5112 
5113 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5114 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5115 
5116 	for (i = 0; i < 5; i++)
5117 		WREG32(GMCON_PGFSM_WRITE, 0);
5118 
5119 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5120 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5121 
5122 	for (i = 0; i < 5; i++)
5123 		WREG32(GMCON_PGFSM_WRITE, 0);
5124 
5125 	WREG32(GMCON_PGFSM_WRITE, 0);
5126 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5127 
5128 	for (i = 0; i < 5; i++)
5129 		WREG32(GMCON_PGFSM_WRITE, 0);
5130 
5131 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5132 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5133 
5134 	for (i = 0; i < 5; i++)
5135 		WREG32(GMCON_PGFSM_WRITE, 0);
5136 
5137 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5138 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5139 
5140 	for (i = 0; i < 5; i++)
5141 		WREG32(GMCON_PGFSM_WRITE, 0);
5142 
5143 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5144 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5145 
5146 	for (i = 0; i < 5; i++)
5147 		WREG32(GMCON_PGFSM_WRITE, 0);
5148 
5149 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5150 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5151 
5152 	for (i = 0; i < 5; i++)
5153 		WREG32(GMCON_PGFSM_WRITE, 0);
5154 
5155 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5156 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5157 
5158 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5159 	WREG32(GMCON_MISC, save->gmcon_misc);
5160 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5161 }
5162 
cik_gpu_pci_config_reset(struct radeon_device * rdev)5163 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5164 {
5165 	struct evergreen_mc_save save;
5166 	struct kv_reset_save_regs kv_save = { 0 };
5167 	u32 tmp, i;
5168 
5169 	dev_info(rdev->dev, "GPU pci config reset\n");
5170 
5171 	/* disable dpm? */
5172 
5173 	/* disable cg/pg */
5174 	cik_fini_pg(rdev);
5175 	cik_fini_cg(rdev);
5176 
5177 	/* Disable GFX parsing/prefetching */
5178 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5179 
5180 	/* Disable MEC parsing/prefetching */
5181 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5182 
5183 	/* sdma0 */
5184 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5185 	tmp |= SDMA_HALT;
5186 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5187 	/* sdma1 */
5188 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5189 	tmp |= SDMA_HALT;
5190 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5191 	/* XXX other engines? */
5192 
5193 	/* halt the rlc, disable cp internal ints */
5194 	cik_rlc_stop(rdev);
5195 
5196 	udelay(50);
5197 
5198 	/* disable mem access */
5199 	evergreen_mc_stop(rdev, &save);
5200 	if (evergreen_mc_wait_for_idle(rdev)) {
5201 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5202 	}
5203 
5204 	if (rdev->flags & RADEON_IS_IGP)
5205 		kv_save_regs_for_reset(rdev, &kv_save);
5206 
5207 	/* disable BM */
5208 	pci_clear_master(rdev->pdev);
5209 	/* reset */
5210 	radeon_pci_config_reset(rdev);
5211 
5212 	udelay(100);
5213 
5214 	/* wait for asic to come out of reset */
5215 	for (i = 0; i < rdev->usec_timeout; i++) {
5216 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5217 			break;
5218 		udelay(1);
5219 	}
5220 
5221 	/* does asic init need to be run first??? */
5222 	if (rdev->flags & RADEON_IS_IGP)
5223 		kv_restore_regs_for_reset(rdev, &kv_save);
5224 }
5225 
5226 /**
5227  * cik_asic_reset - soft reset GPU
5228  *
5229  * @rdev: radeon_device pointer
5230  * @hard: force hard reset
5231  *
5232  * Look up which blocks are hung and attempt
5233  * to reset them.
5234  * Returns 0 for success.
5235  */
cik_asic_reset(struct radeon_device * rdev,bool hard)5236 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5237 {
5238 	u32 reset_mask;
5239 
5240 	if (hard) {
5241 		cik_gpu_pci_config_reset(rdev);
5242 		return 0;
5243 	}
5244 
5245 	reset_mask = cik_gpu_check_soft_reset(rdev);
5246 
5247 	if (reset_mask)
5248 		r600_set_bios_scratch_engine_hung(rdev, true);
5249 
5250 	/* try soft reset */
5251 	cik_gpu_soft_reset(rdev, reset_mask);
5252 
5253 	reset_mask = cik_gpu_check_soft_reset(rdev);
5254 
5255 	/* try pci config reset */
5256 	if (reset_mask && radeon_hard_reset)
5257 		cik_gpu_pci_config_reset(rdev);
5258 
5259 	reset_mask = cik_gpu_check_soft_reset(rdev);
5260 
5261 	if (!reset_mask)
5262 		r600_set_bios_scratch_engine_hung(rdev, false);
5263 
5264 	return 0;
5265 }
5266 
5267 /**
5268  * cik_gfx_is_lockup - check if the 3D engine is locked up
5269  *
5270  * @rdev: radeon_device pointer
5271  * @ring: radeon_ring structure holding ring information
5272  *
5273  * Check if the 3D engine is locked up (CIK).
5274  * Returns true if the engine is locked, false if not.
5275  */
cik_gfx_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)5276 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5277 {
5278 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5279 
5280 	if (!(reset_mask & (RADEON_RESET_GFX |
5281 			    RADEON_RESET_COMPUTE |
5282 			    RADEON_RESET_CP))) {
5283 		radeon_ring_lockup_update(rdev, ring);
5284 		return false;
5285 	}
5286 	return radeon_ring_test_lockup(rdev, ring);
5287 }
5288 
5289 /* MC */
5290 /**
5291  * cik_mc_program - program the GPU memory controller
5292  *
5293  * @rdev: radeon_device pointer
5294  *
5295  * Set the location of vram, gart, and AGP in the GPU's
5296  * physical address space (CIK).
5297  */
cik_mc_program(struct radeon_device * rdev)5298 static void cik_mc_program(struct radeon_device *rdev)
5299 {
5300 	struct evergreen_mc_save save;
5301 	u32 tmp;
5302 	int i, j;
5303 
5304 	/* Initialize HDP */
5305 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5306 		WREG32((0x2c14 + j), 0x00000000);
5307 		WREG32((0x2c18 + j), 0x00000000);
5308 		WREG32((0x2c1c + j), 0x00000000);
5309 		WREG32((0x2c20 + j), 0x00000000);
5310 		WREG32((0x2c24 + j), 0x00000000);
5311 	}
5312 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5313 
5314 	evergreen_mc_stop(rdev, &save);
5315 	if (radeon_mc_wait_for_idle(rdev)) {
5316 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5317 	}
5318 	/* Lockout access through VGA aperture*/
5319 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5320 	/* Update configuration */
5321 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5322 	       rdev->mc.vram_start >> 12);
5323 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5324 	       rdev->mc.vram_end >> 12);
5325 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5326 	       rdev->vram_scratch.gpu_addr >> 12);
5327 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5328 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5329 	WREG32(MC_VM_FB_LOCATION, tmp);
5330 	/* XXX double check these! */
5331 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5332 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5333 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5334 	WREG32(MC_VM_AGP_BASE, 0);
5335 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5336 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5337 	if (radeon_mc_wait_for_idle(rdev)) {
5338 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5339 	}
5340 	evergreen_mc_resume(rdev, &save);
5341 	/* we need to own VRAM, so turn off the VGA renderer here
5342 	 * to stop it overwriting our objects */
5343 	rv515_vga_render_disable(rdev);
5344 }
5345 
5346 /**
5347  * cik_mc_init - initialize the memory controller driver params
5348  *
5349  * @rdev: radeon_device pointer
5350  *
5351  * Look up the amount of vram, vram width, and decide how to place
5352  * vram and gart within the GPU's physical address space (CIK).
5353  * Returns 0 for success.
5354  */
cik_mc_init(struct radeon_device * rdev)5355 static int cik_mc_init(struct radeon_device *rdev)
5356 {
5357 	u32 tmp;
5358 	int chansize, numchan;
5359 
5360 	/* Get VRAM informations */
5361 	rdev->mc.vram_is_ddr = true;
5362 	tmp = RREG32(MC_ARB_RAMCFG);
5363 	if (tmp & CHANSIZE_MASK) {
5364 		chansize = 64;
5365 	} else {
5366 		chansize = 32;
5367 	}
5368 	tmp = RREG32(MC_SHARED_CHMAP);
5369 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5370 	case 0:
5371 	default:
5372 		numchan = 1;
5373 		break;
5374 	case 1:
5375 		numchan = 2;
5376 		break;
5377 	case 2:
5378 		numchan = 4;
5379 		break;
5380 	case 3:
5381 		numchan = 8;
5382 		break;
5383 	case 4:
5384 		numchan = 3;
5385 		break;
5386 	case 5:
5387 		numchan = 6;
5388 		break;
5389 	case 6:
5390 		numchan = 10;
5391 		break;
5392 	case 7:
5393 		numchan = 12;
5394 		break;
5395 	case 8:
5396 		numchan = 16;
5397 		break;
5398 	}
5399 	rdev->mc.vram_width = numchan * chansize;
5400 	/* Could aper size report 0 ? */
5401 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5402 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5403 	/* size in MB on si */
5404 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5405 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5406 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5407 	si_vram_gtt_location(rdev, &rdev->mc);
5408 	radeon_update_bandwidth_info(rdev);
5409 
5410 	return 0;
5411 }
5412 
5413 /*
5414  * GART
5415  * VMID 0 is the physical GPU addresses as used by the kernel.
5416  * VMIDs 1-15 are used for userspace clients and are handled
5417  * by the radeon vm/hsa code.
5418  */
5419 /**
5420  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5421  *
5422  * @rdev: radeon_device pointer
5423  *
5424  * Flush the TLB for the VMID 0 page table (CIK).
5425  */
cik_pcie_gart_tlb_flush(struct radeon_device * rdev)5426 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5427 {
5428 	/* flush hdp cache */
5429 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5430 
5431 	/* bits 0-15 are the VM contexts0-15 */
5432 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5433 }
5434 
5435 /**
5436  * cik_pcie_gart_enable - gart enable
5437  *
5438  * @rdev: radeon_device pointer
5439  *
5440  * This sets up the TLBs, programs the page tables for VMID0,
5441  * sets up the hw for VMIDs 1-15 which are allocated on
5442  * demand, and sets up the global locations for the LDS, GDS,
5443  * and GPUVM for FSA64 clients (CIK).
5444  * Returns 0 for success, errors for failure.
5445  */
cik_pcie_gart_enable(struct radeon_device * rdev)5446 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5447 {
5448 	int r, i;
5449 
5450 	if (rdev->gart.robj == NULL) {
5451 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5452 		return -EINVAL;
5453 	}
5454 	r = radeon_gart_table_vram_pin(rdev);
5455 	if (r)
5456 		return r;
5457 	/* Setup TLB control */
5458 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5459 	       (0xA << 7) |
5460 	       ENABLE_L1_TLB |
5461 	       ENABLE_L1_FRAGMENT_PROCESSING |
5462 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5463 	       ENABLE_ADVANCED_DRIVER_MODEL |
5464 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5465 	/* Setup L2 cache */
5466 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5467 	       ENABLE_L2_FRAGMENT_PROCESSING |
5468 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5469 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5470 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5471 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5472 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5473 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5474 	       BANK_SELECT(4) |
5475 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5476 	/* setup context0 */
5477 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5478 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5479 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5480 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5481 			(u32)(rdev->dummy_page.addr >> 12));
5482 	WREG32(VM_CONTEXT0_CNTL2, 0);
5483 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5484 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5485 
5486 	WREG32(0x15D4, 0);
5487 	WREG32(0x15D8, 0);
5488 	WREG32(0x15DC, 0);
5489 
5490 	/* restore context1-15 */
5491 	/* set vm size, must be a multiple of 4 */
5492 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5493 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5494 	for (i = 1; i < 16; i++) {
5495 		if (i < 8)
5496 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5497 			       rdev->vm_manager.saved_table_addr[i]);
5498 		else
5499 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5500 			       rdev->vm_manager.saved_table_addr[i]);
5501 	}
5502 
5503 	/* enable context1-15 */
5504 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5505 	       (u32)(rdev->dummy_page.addr >> 12));
5506 	WREG32(VM_CONTEXT1_CNTL2, 4);
5507 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5508 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5509 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5510 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5511 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5512 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5513 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5514 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5515 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5516 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5517 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5518 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5519 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5520 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5521 
5522 	if (rdev->family == CHIP_KAVERI) {
5523 		u32 tmp = RREG32(CHUB_CONTROL);
5524 		tmp &= ~BYPASS_VM;
5525 		WREG32(CHUB_CONTROL, tmp);
5526 	}
5527 
5528 	/* XXX SH_MEM regs */
5529 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5530 	mutex_lock(&rdev->srbm_mutex);
5531 	for (i = 0; i < 16; i++) {
5532 		cik_srbm_select(rdev, 0, 0, 0, i);
5533 		/* CP and shaders */
5534 		WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5535 		WREG32(SH_MEM_APE1_BASE, 1);
5536 		WREG32(SH_MEM_APE1_LIMIT, 0);
5537 		WREG32(SH_MEM_BASES, 0);
5538 		/* SDMA GFX */
5539 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5540 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5541 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5542 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5543 		/* XXX SDMA RLC - todo */
5544 	}
5545 	cik_srbm_select(rdev, 0, 0, 0, 0);
5546 	mutex_unlock(&rdev->srbm_mutex);
5547 
5548 	cik_pcie_gart_tlb_flush(rdev);
5549 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5550 		 (unsigned)(rdev->mc.gtt_size >> 20),
5551 		 (unsigned long long)rdev->gart.table_addr);
5552 	rdev->gart.ready = true;
5553 	return 0;
5554 }
5555 
5556 /**
5557  * cik_pcie_gart_disable - gart disable
5558  *
5559  * @rdev: radeon_device pointer
5560  *
5561  * This disables all VM page table (CIK).
5562  */
cik_pcie_gart_disable(struct radeon_device * rdev)5563 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5564 {
5565 	unsigned i;
5566 
5567 	for (i = 1; i < 16; ++i) {
5568 		uint32_t reg;
5569 		if (i < 8)
5570 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5571 		else
5572 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5573 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5574 	}
5575 
5576 	/* Disable all tables */
5577 	WREG32(VM_CONTEXT0_CNTL, 0);
5578 	WREG32(VM_CONTEXT1_CNTL, 0);
5579 	/* Setup TLB control */
5580 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5581 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5582 	/* Setup L2 cache */
5583 	WREG32(VM_L2_CNTL,
5584 	       ENABLE_L2_FRAGMENT_PROCESSING |
5585 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5586 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5587 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5588 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5589 	WREG32(VM_L2_CNTL2, 0);
5590 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5591 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5592 	radeon_gart_table_vram_unpin(rdev);
5593 }
5594 
5595 /**
5596  * cik_pcie_gart_fini - vm fini callback
5597  *
5598  * @rdev: radeon_device pointer
5599  *
5600  * Tears down the driver GART/VM setup (CIK).
5601  */
cik_pcie_gart_fini(struct radeon_device * rdev)5602 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5603 {
5604 	cik_pcie_gart_disable(rdev);
5605 	radeon_gart_table_vram_free(rdev);
5606 	radeon_gart_fini(rdev);
5607 }
5608 
5609 /* vm parser */
5610 /**
5611  * cik_ib_parse - vm ib_parse callback
5612  *
5613  * @rdev: radeon_device pointer
5614  * @ib: indirect buffer pointer
5615  *
5616  * CIK uses hw IB checking so this is a nop (CIK).
5617  */
cik_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)5618 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5619 {
5620 	return 0;
5621 }
5622 
5623 /*
5624  * vm
5625  * VMID 0 is the physical GPU addresses as used by the kernel.
5626  * VMIDs 1-15 are used for userspace clients and are handled
5627  * by the radeon vm/hsa code.
5628  */
5629 /**
5630  * cik_vm_init - cik vm init callback
5631  *
5632  * @rdev: radeon_device pointer
5633  *
5634  * Inits cik specific vm parameters (number of VMs, base of vram for
5635  * VMIDs 1-15) (CIK).
5636  * Returns 0 for success.
5637  */
cik_vm_init(struct radeon_device * rdev)5638 int cik_vm_init(struct radeon_device *rdev)
5639 {
5640 	/*
5641 	 * number of VMs
5642 	 * VMID 0 is reserved for System
5643 	 * radeon graphics/compute will use VMIDs 1-15
5644 	 */
5645 	rdev->vm_manager.nvm = 16;
5646 	/* base offset of vram pages */
5647 	if (rdev->flags & RADEON_IS_IGP) {
5648 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5649 		tmp <<= 22;
5650 		rdev->vm_manager.vram_base_offset = tmp;
5651 	} else
5652 		rdev->vm_manager.vram_base_offset = 0;
5653 
5654 	return 0;
5655 }
5656 
5657 /**
5658  * cik_vm_fini - cik vm fini callback
5659  *
5660  * @rdev: radeon_device pointer
5661  *
5662  * Tear down any asic specific VM setup (CIK).
5663  */
cik_vm_fini(struct radeon_device * rdev)5664 void cik_vm_fini(struct radeon_device *rdev)
5665 {
5666 }
5667 
5668 /**
5669  * cik_vm_decode_fault - print human readable fault info
5670  *
5671  * @rdev: radeon_device pointer
5672  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5673  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5674  *
5675  * Print human readable fault information (CIK).
5676  */
cik_vm_decode_fault(struct radeon_device * rdev,u32 status,u32 addr,u32 mc_client)5677 static void cik_vm_decode_fault(struct radeon_device *rdev,
5678 				u32 status, u32 addr, u32 mc_client)
5679 {
5680 	u32 mc_id;
5681 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5682 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5683 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5684 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5685 
5686 	if (rdev->family == CHIP_HAWAII)
5687 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5688 	else
5689 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5690 
5691 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5692 	       protections, vmid, addr,
5693 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5694 	       block, mc_client, mc_id);
5695 }
5696 
5697 /**
5698  * cik_vm_flush - cik vm flush using the CP
5699  *
5700  * @rdev: radeon_device pointer
5701  *
5702  * Update the page table base and flush the VM TLB
5703  * using the CP (CIK).
5704  */
cik_vm_flush(struct radeon_device * rdev,struct radeon_ring * ring,unsigned vm_id,uint64_t pd_addr)5705 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5706 		  unsigned vm_id, uint64_t pd_addr)
5707 {
5708 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5709 
5710 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5711 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5712 				 WRITE_DATA_DST_SEL(0)));
5713 	if (vm_id < 8) {
5714 		radeon_ring_write(ring,
5715 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5716 	} else {
5717 		radeon_ring_write(ring,
5718 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5719 	}
5720 	radeon_ring_write(ring, 0);
5721 	radeon_ring_write(ring, pd_addr >> 12);
5722 
5723 	/* update SH_MEM_* regs */
5724 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5725 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5726 				 WRITE_DATA_DST_SEL(0)));
5727 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5728 	radeon_ring_write(ring, 0);
5729 	radeon_ring_write(ring, VMID(vm_id));
5730 
5731 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5732 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5733 				 WRITE_DATA_DST_SEL(0)));
5734 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5735 	radeon_ring_write(ring, 0);
5736 
5737 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5738 	radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5739 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5740 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5741 
5742 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5743 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5744 				 WRITE_DATA_DST_SEL(0)));
5745 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5746 	radeon_ring_write(ring, 0);
5747 	radeon_ring_write(ring, VMID(0));
5748 
5749 	/* HDP flush */
5750 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5751 
5752 	/* bits 0-15 are the VM contexts0-15 */
5753 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5754 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5755 				 WRITE_DATA_DST_SEL(0)));
5756 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5757 	radeon_ring_write(ring, 0);
5758 	radeon_ring_write(ring, 1 << vm_id);
5759 
5760 	/* wait for the invalidate to complete */
5761 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5762 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5763 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5764 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5765 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5766 	radeon_ring_write(ring, 0);
5767 	radeon_ring_write(ring, 0); /* ref */
5768 	radeon_ring_write(ring, 0); /* mask */
5769 	radeon_ring_write(ring, 0x20); /* poll interval */
5770 
5771 	/* compute doesn't have PFP */
5772 	if (usepfp) {
5773 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5774 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5775 		radeon_ring_write(ring, 0x0);
5776 	}
5777 }
5778 
5779 /*
5780  * RLC
5781  * The RLC is a multi-purpose microengine that handles a
5782  * variety of functions, the most important of which is
5783  * the interrupt controller.
5784  */
cik_enable_gui_idle_interrupt(struct radeon_device * rdev,bool enable)5785 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5786 					  bool enable)
5787 {
5788 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5789 
5790 	if (enable)
5791 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5792 	else
5793 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5794 	WREG32(CP_INT_CNTL_RING0, tmp);
5795 }
5796 
cik_enable_lbpw(struct radeon_device * rdev,bool enable)5797 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5798 {
5799 	u32 tmp;
5800 
5801 	tmp = RREG32(RLC_LB_CNTL);
5802 	if (enable)
5803 		tmp |= LOAD_BALANCE_ENABLE;
5804 	else
5805 		tmp &= ~LOAD_BALANCE_ENABLE;
5806 	WREG32(RLC_LB_CNTL, tmp);
5807 }
5808 
cik_wait_for_rlc_serdes(struct radeon_device * rdev)5809 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5810 {
5811 	u32 i, j, k;
5812 	u32 mask;
5813 
5814 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5815 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5816 			cik_select_se_sh(rdev, i, j);
5817 			for (k = 0; k < rdev->usec_timeout; k++) {
5818 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5819 					break;
5820 				udelay(1);
5821 			}
5822 		}
5823 	}
5824 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5825 
5826 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5827 	for (k = 0; k < rdev->usec_timeout; k++) {
5828 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5829 			break;
5830 		udelay(1);
5831 	}
5832 }
5833 
cik_update_rlc(struct radeon_device * rdev,u32 rlc)5834 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5835 {
5836 	u32 tmp;
5837 
5838 	tmp = RREG32(RLC_CNTL);
5839 	if (tmp != rlc)
5840 		WREG32(RLC_CNTL, rlc);
5841 }
5842 
cik_halt_rlc(struct radeon_device * rdev)5843 static u32 cik_halt_rlc(struct radeon_device *rdev)
5844 {
5845 	u32 data, orig;
5846 
5847 	orig = data = RREG32(RLC_CNTL);
5848 
5849 	if (data & RLC_ENABLE) {
5850 		u32 i;
5851 
5852 		data &= ~RLC_ENABLE;
5853 		WREG32(RLC_CNTL, data);
5854 
5855 		for (i = 0; i < rdev->usec_timeout; i++) {
5856 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5857 				break;
5858 			udelay(1);
5859 		}
5860 
5861 		cik_wait_for_rlc_serdes(rdev);
5862 	}
5863 
5864 	return orig;
5865 }
5866 
cik_enter_rlc_safe_mode(struct radeon_device * rdev)5867 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5868 {
5869 	u32 tmp, i, mask;
5870 
5871 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5872 	WREG32(RLC_GPR_REG2, tmp);
5873 
5874 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5875 	for (i = 0; i < rdev->usec_timeout; i++) {
5876 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5877 			break;
5878 		udelay(1);
5879 	}
5880 
5881 	for (i = 0; i < rdev->usec_timeout; i++) {
5882 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5883 			break;
5884 		udelay(1);
5885 	}
5886 }
5887 
cik_exit_rlc_safe_mode(struct radeon_device * rdev)5888 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5889 {
5890 	u32 tmp;
5891 
5892 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5893 	WREG32(RLC_GPR_REG2, tmp);
5894 }
5895 
5896 /**
5897  * cik_rlc_stop - stop the RLC ME
5898  *
5899  * @rdev: radeon_device pointer
5900  *
5901  * Halt the RLC ME (MicroEngine) (CIK).
5902  */
cik_rlc_stop(struct radeon_device * rdev)5903 static void cik_rlc_stop(struct radeon_device *rdev)
5904 {
5905 	WREG32(RLC_CNTL, 0);
5906 
5907 	cik_enable_gui_idle_interrupt(rdev, false);
5908 
5909 	cik_wait_for_rlc_serdes(rdev);
5910 }
5911 
5912 /**
5913  * cik_rlc_start - start the RLC ME
5914  *
5915  * @rdev: radeon_device pointer
5916  *
5917  * Unhalt the RLC ME (MicroEngine) (CIK).
5918  */
cik_rlc_start(struct radeon_device * rdev)5919 static void cik_rlc_start(struct radeon_device *rdev)
5920 {
5921 	WREG32(RLC_CNTL, RLC_ENABLE);
5922 
5923 	cik_enable_gui_idle_interrupt(rdev, true);
5924 
5925 	udelay(50);
5926 }
5927 
5928 /**
5929  * cik_rlc_resume - setup the RLC hw
5930  *
5931  * @rdev: radeon_device pointer
5932  *
5933  * Initialize the RLC registers, load the ucode,
5934  * and start the RLC (CIK).
5935  * Returns 0 for success, -EINVAL if the ucode is not available.
5936  */
cik_rlc_resume(struct radeon_device * rdev)5937 static int cik_rlc_resume(struct radeon_device *rdev)
5938 {
5939 	u32 i, size, tmp;
5940 
5941 	if (!rdev->rlc_fw)
5942 		return -EINVAL;
5943 
5944 	cik_rlc_stop(rdev);
5945 
5946 	/* disable CG */
5947 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5948 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5949 
5950 	si_rlc_reset(rdev);
5951 
5952 	cik_init_pg(rdev);
5953 
5954 	cik_init_cg(rdev);
5955 
5956 	WREG32(RLC_LB_CNTR_INIT, 0);
5957 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5958 
5959 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5960 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5961 	WREG32(RLC_LB_PARAMS, 0x00600408);
5962 	WREG32(RLC_LB_CNTL, 0x80000004);
5963 
5964 	WREG32(RLC_MC_CNTL, 0);
5965 	WREG32(RLC_UCODE_CNTL, 0);
5966 
5967 	if (rdev->new_fw) {
5968 		const struct rlc_firmware_header_v1_0 *hdr =
5969 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5970 		const __le32 *fw_data = (const __le32 *)
5971 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5972 
5973 		radeon_ucode_print_rlc_hdr(&hdr->header);
5974 
5975 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5976 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5977 		for (i = 0; i < size; i++)
5978 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5979 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5980 	} else {
5981 		const __be32 *fw_data;
5982 
5983 		switch (rdev->family) {
5984 		case CHIP_BONAIRE:
5985 		case CHIP_HAWAII:
5986 		default:
5987 			size = BONAIRE_RLC_UCODE_SIZE;
5988 			break;
5989 		case CHIP_KAVERI:
5990 			size = KV_RLC_UCODE_SIZE;
5991 			break;
5992 		case CHIP_KABINI:
5993 			size = KB_RLC_UCODE_SIZE;
5994 			break;
5995 		case CHIP_MULLINS:
5996 			size = ML_RLC_UCODE_SIZE;
5997 			break;
5998 		}
5999 
6000 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6001 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6002 		for (i = 0; i < size; i++)
6003 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6004 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6005 	}
6006 
6007 	/* XXX - find out what chips support lbpw */
6008 	cik_enable_lbpw(rdev, false);
6009 
6010 	if (rdev->family == CHIP_BONAIRE)
6011 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6012 
6013 	cik_rlc_start(rdev);
6014 
6015 	return 0;
6016 }
6017 
cik_enable_cgcg(struct radeon_device * rdev,bool enable)6018 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6019 {
6020 	u32 data, orig, tmp, tmp2;
6021 
6022 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6023 
6024 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6025 		cik_enable_gui_idle_interrupt(rdev, true);
6026 
6027 		tmp = cik_halt_rlc(rdev);
6028 
6029 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6030 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6031 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6032 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6033 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6034 
6035 		cik_update_rlc(rdev, tmp);
6036 
6037 		data |= CGCG_EN | CGLS_EN;
6038 	} else {
6039 		cik_enable_gui_idle_interrupt(rdev, false);
6040 
6041 		RREG32(CB_CGTT_SCLK_CTRL);
6042 		RREG32(CB_CGTT_SCLK_CTRL);
6043 		RREG32(CB_CGTT_SCLK_CTRL);
6044 		RREG32(CB_CGTT_SCLK_CTRL);
6045 
6046 		data &= ~(CGCG_EN | CGLS_EN);
6047 	}
6048 
6049 	if (orig != data)
6050 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6051 
6052 }
6053 
cik_enable_mgcg(struct radeon_device * rdev,bool enable)6054 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6055 {
6056 	u32 data, orig, tmp = 0;
6057 
6058 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6059 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6060 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6061 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6062 				data |= CP_MEM_LS_EN;
6063 				if (orig != data)
6064 					WREG32(CP_MEM_SLP_CNTL, data);
6065 			}
6066 		}
6067 
6068 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6069 		data |= 0x00000001;
6070 		data &= 0xfffffffd;
6071 		if (orig != data)
6072 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6073 
6074 		tmp = cik_halt_rlc(rdev);
6075 
6076 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6077 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6078 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6079 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6080 		WREG32(RLC_SERDES_WR_CTRL, data);
6081 
6082 		cik_update_rlc(rdev, tmp);
6083 
6084 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6085 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6086 			data &= ~SM_MODE_MASK;
6087 			data |= SM_MODE(0x2);
6088 			data |= SM_MODE_ENABLE;
6089 			data &= ~CGTS_OVERRIDE;
6090 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6091 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6092 				data &= ~CGTS_LS_OVERRIDE;
6093 			data &= ~ON_MONITOR_ADD_MASK;
6094 			data |= ON_MONITOR_ADD_EN;
6095 			data |= ON_MONITOR_ADD(0x96);
6096 			if (orig != data)
6097 				WREG32(CGTS_SM_CTRL_REG, data);
6098 		}
6099 	} else {
6100 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6101 		data |= 0x00000003;
6102 		if (orig != data)
6103 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6104 
6105 		data = RREG32(RLC_MEM_SLP_CNTL);
6106 		if (data & RLC_MEM_LS_EN) {
6107 			data &= ~RLC_MEM_LS_EN;
6108 			WREG32(RLC_MEM_SLP_CNTL, data);
6109 		}
6110 
6111 		data = RREG32(CP_MEM_SLP_CNTL);
6112 		if (data & CP_MEM_LS_EN) {
6113 			data &= ~CP_MEM_LS_EN;
6114 			WREG32(CP_MEM_SLP_CNTL, data);
6115 		}
6116 
6117 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6118 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6119 		if (orig != data)
6120 			WREG32(CGTS_SM_CTRL_REG, data);
6121 
6122 		tmp = cik_halt_rlc(rdev);
6123 
6124 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6125 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6126 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6127 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6128 		WREG32(RLC_SERDES_WR_CTRL, data);
6129 
6130 		cik_update_rlc(rdev, tmp);
6131 	}
6132 }
6133 
6134 static const u32 mc_cg_registers[] =
6135 {
6136 	MC_HUB_MISC_HUB_CG,
6137 	MC_HUB_MISC_SIP_CG,
6138 	MC_HUB_MISC_VM_CG,
6139 	MC_XPB_CLK_GAT,
6140 	ATC_MISC_CG,
6141 	MC_CITF_MISC_WR_CG,
6142 	MC_CITF_MISC_RD_CG,
6143 	MC_CITF_MISC_VM_CG,
6144 	VM_L2_CG,
6145 };
6146 
cik_enable_mc_ls(struct radeon_device * rdev,bool enable)6147 static void cik_enable_mc_ls(struct radeon_device *rdev,
6148 			     bool enable)
6149 {
6150 	int i;
6151 	u32 orig, data;
6152 
6153 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6154 		orig = data = RREG32(mc_cg_registers[i]);
6155 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6156 			data |= MC_LS_ENABLE;
6157 		else
6158 			data &= ~MC_LS_ENABLE;
6159 		if (data != orig)
6160 			WREG32(mc_cg_registers[i], data);
6161 	}
6162 }
6163 
cik_enable_mc_mgcg(struct radeon_device * rdev,bool enable)6164 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6165 			       bool enable)
6166 {
6167 	int i;
6168 	u32 orig, data;
6169 
6170 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6171 		orig = data = RREG32(mc_cg_registers[i]);
6172 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6173 			data |= MC_CG_ENABLE;
6174 		else
6175 			data &= ~MC_CG_ENABLE;
6176 		if (data != orig)
6177 			WREG32(mc_cg_registers[i], data);
6178 	}
6179 }
6180 
cik_enable_sdma_mgcg(struct radeon_device * rdev,bool enable)6181 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6182 				 bool enable)
6183 {
6184 	u32 orig, data;
6185 
6186 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6187 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6188 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6189 	} else {
6190 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6191 		data |= 0xff000000;
6192 		if (data != orig)
6193 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6194 
6195 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6196 		data |= 0xff000000;
6197 		if (data != orig)
6198 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6199 	}
6200 }
6201 
cik_enable_sdma_mgls(struct radeon_device * rdev,bool enable)6202 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6203 				 bool enable)
6204 {
6205 	u32 orig, data;
6206 
6207 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6208 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6209 		data |= 0x100;
6210 		if (orig != data)
6211 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6212 
6213 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6214 		data |= 0x100;
6215 		if (orig != data)
6216 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6217 	} else {
6218 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6219 		data &= ~0x100;
6220 		if (orig != data)
6221 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6222 
6223 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6224 		data &= ~0x100;
6225 		if (orig != data)
6226 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6227 	}
6228 }
6229 
cik_enable_uvd_mgcg(struct radeon_device * rdev,bool enable)6230 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6231 				bool enable)
6232 {
6233 	u32 orig, data;
6234 
6235 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6236 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6237 		data = 0xfff;
6238 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6239 
6240 		orig = data = RREG32(UVD_CGC_CTRL);
6241 		data |= DCM;
6242 		if (orig != data)
6243 			WREG32(UVD_CGC_CTRL, data);
6244 	} else {
6245 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6246 		data &= ~0xfff;
6247 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6248 
6249 		orig = data = RREG32(UVD_CGC_CTRL);
6250 		data &= ~DCM;
6251 		if (orig != data)
6252 			WREG32(UVD_CGC_CTRL, data);
6253 	}
6254 }
6255 
cik_enable_bif_mgls(struct radeon_device * rdev,bool enable)6256 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6257 			       bool enable)
6258 {
6259 	u32 orig, data;
6260 
6261 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6262 
6263 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6264 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6265 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6266 	else
6267 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6268 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6269 
6270 	if (orig != data)
6271 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6272 }
6273 
cik_enable_hdp_mgcg(struct radeon_device * rdev,bool enable)6274 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6275 				bool enable)
6276 {
6277 	u32 orig, data;
6278 
6279 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6280 
6281 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6282 		data &= ~CLOCK_GATING_DIS;
6283 	else
6284 		data |= CLOCK_GATING_DIS;
6285 
6286 	if (orig != data)
6287 		WREG32(HDP_HOST_PATH_CNTL, data);
6288 }
6289 
cik_enable_hdp_ls(struct radeon_device * rdev,bool enable)6290 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6291 			      bool enable)
6292 {
6293 	u32 orig, data;
6294 
6295 	orig = data = RREG32(HDP_MEM_POWER_LS);
6296 
6297 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6298 		data |= HDP_LS_ENABLE;
6299 	else
6300 		data &= ~HDP_LS_ENABLE;
6301 
6302 	if (orig != data)
6303 		WREG32(HDP_MEM_POWER_LS, data);
6304 }
6305 
cik_update_cg(struct radeon_device * rdev,u32 block,bool enable)6306 void cik_update_cg(struct radeon_device *rdev,
6307 		   u32 block, bool enable)
6308 {
6309 
6310 	if (block & RADEON_CG_BLOCK_GFX) {
6311 		cik_enable_gui_idle_interrupt(rdev, false);
6312 		/* order matters! */
6313 		if (enable) {
6314 			cik_enable_mgcg(rdev, true);
6315 			cik_enable_cgcg(rdev, true);
6316 		} else {
6317 			cik_enable_cgcg(rdev, false);
6318 			cik_enable_mgcg(rdev, false);
6319 		}
6320 		cik_enable_gui_idle_interrupt(rdev, true);
6321 	}
6322 
6323 	if (block & RADEON_CG_BLOCK_MC) {
6324 		if (!(rdev->flags & RADEON_IS_IGP)) {
6325 			cik_enable_mc_mgcg(rdev, enable);
6326 			cik_enable_mc_ls(rdev, enable);
6327 		}
6328 	}
6329 
6330 	if (block & RADEON_CG_BLOCK_SDMA) {
6331 		cik_enable_sdma_mgcg(rdev, enable);
6332 		cik_enable_sdma_mgls(rdev, enable);
6333 	}
6334 
6335 	if (block & RADEON_CG_BLOCK_BIF) {
6336 		cik_enable_bif_mgls(rdev, enable);
6337 	}
6338 
6339 	if (block & RADEON_CG_BLOCK_UVD) {
6340 		if (rdev->has_uvd)
6341 			cik_enable_uvd_mgcg(rdev, enable);
6342 	}
6343 
6344 	if (block & RADEON_CG_BLOCK_HDP) {
6345 		cik_enable_hdp_mgcg(rdev, enable);
6346 		cik_enable_hdp_ls(rdev, enable);
6347 	}
6348 
6349 	if (block & RADEON_CG_BLOCK_VCE) {
6350 		vce_v2_0_enable_mgcg(rdev, enable);
6351 	}
6352 }
6353 
cik_init_cg(struct radeon_device * rdev)6354 static void cik_init_cg(struct radeon_device *rdev)
6355 {
6356 
6357 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6358 
6359 	if (rdev->has_uvd)
6360 		si_init_uvd_internal_cg(rdev);
6361 
6362 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6363 			     RADEON_CG_BLOCK_SDMA |
6364 			     RADEON_CG_BLOCK_BIF |
6365 			     RADEON_CG_BLOCK_UVD |
6366 			     RADEON_CG_BLOCK_HDP), true);
6367 }
6368 
cik_fini_cg(struct radeon_device * rdev)6369 static void cik_fini_cg(struct radeon_device *rdev)
6370 {
6371 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6372 			     RADEON_CG_BLOCK_SDMA |
6373 			     RADEON_CG_BLOCK_BIF |
6374 			     RADEON_CG_BLOCK_UVD |
6375 			     RADEON_CG_BLOCK_HDP), false);
6376 
6377 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6378 }
6379 
cik_enable_sck_slowdown_on_pu(struct radeon_device * rdev,bool enable)6380 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6381 					  bool enable)
6382 {
6383 	u32 data, orig;
6384 
6385 	orig = data = RREG32(RLC_PG_CNTL);
6386 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6387 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6388 	else
6389 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6390 	if (orig != data)
6391 		WREG32(RLC_PG_CNTL, data);
6392 }
6393 
cik_enable_sck_slowdown_on_pd(struct radeon_device * rdev,bool enable)6394 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6395 					  bool enable)
6396 {
6397 	u32 data, orig;
6398 
6399 	orig = data = RREG32(RLC_PG_CNTL);
6400 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6401 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6402 	else
6403 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6404 	if (orig != data)
6405 		WREG32(RLC_PG_CNTL, data);
6406 }
6407 
cik_enable_cp_pg(struct radeon_device * rdev,bool enable)6408 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6409 {
6410 	u32 data, orig;
6411 
6412 	orig = data = RREG32(RLC_PG_CNTL);
6413 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6414 		data &= ~DISABLE_CP_PG;
6415 	else
6416 		data |= DISABLE_CP_PG;
6417 	if (orig != data)
6418 		WREG32(RLC_PG_CNTL, data);
6419 }
6420 
cik_enable_gds_pg(struct radeon_device * rdev,bool enable)6421 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6422 {
6423 	u32 data, orig;
6424 
6425 	orig = data = RREG32(RLC_PG_CNTL);
6426 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6427 		data &= ~DISABLE_GDS_PG;
6428 	else
6429 		data |= DISABLE_GDS_PG;
6430 	if (orig != data)
6431 		WREG32(RLC_PG_CNTL, data);
6432 }
6433 
6434 #define CP_ME_TABLE_SIZE    96
6435 #define CP_ME_TABLE_OFFSET  2048
6436 #define CP_MEC_TABLE_OFFSET 4096
6437 
cik_init_cp_pg_table(struct radeon_device * rdev)6438 void cik_init_cp_pg_table(struct radeon_device *rdev)
6439 {
6440 	volatile u32 *dst_ptr;
6441 	int me, i, max_me = 4;
6442 	u32 bo_offset = 0;
6443 	u32 table_offset, table_size;
6444 
6445 	if (rdev->family == CHIP_KAVERI)
6446 		max_me = 5;
6447 
6448 	if (rdev->rlc.cp_table_ptr == NULL)
6449 		return;
6450 
6451 	/* write the cp table buffer */
6452 	dst_ptr = rdev->rlc.cp_table_ptr;
6453 	for (me = 0; me < max_me; me++) {
6454 		if (rdev->new_fw) {
6455 			const __le32 *fw_data;
6456 			const struct gfx_firmware_header_v1_0 *hdr;
6457 
6458 			if (me == 0) {
6459 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6460 				fw_data = (const __le32 *)
6461 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6462 				table_offset = le32_to_cpu(hdr->jt_offset);
6463 				table_size = le32_to_cpu(hdr->jt_size);
6464 			} else if (me == 1) {
6465 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6466 				fw_data = (const __le32 *)
6467 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6468 				table_offset = le32_to_cpu(hdr->jt_offset);
6469 				table_size = le32_to_cpu(hdr->jt_size);
6470 			} else if (me == 2) {
6471 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6472 				fw_data = (const __le32 *)
6473 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6474 				table_offset = le32_to_cpu(hdr->jt_offset);
6475 				table_size = le32_to_cpu(hdr->jt_size);
6476 			} else if (me == 3) {
6477 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6478 				fw_data = (const __le32 *)
6479 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6480 				table_offset = le32_to_cpu(hdr->jt_offset);
6481 				table_size = le32_to_cpu(hdr->jt_size);
6482 			} else {
6483 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6484 				fw_data = (const __le32 *)
6485 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6486 				table_offset = le32_to_cpu(hdr->jt_offset);
6487 				table_size = le32_to_cpu(hdr->jt_size);
6488 			}
6489 
6490 			for (i = 0; i < table_size; i ++) {
6491 				dst_ptr[bo_offset + i] =
6492 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6493 			}
6494 			bo_offset += table_size;
6495 		} else {
6496 			const __be32 *fw_data;
6497 			table_size = CP_ME_TABLE_SIZE;
6498 
6499 			if (me == 0) {
6500 				fw_data = (const __be32 *)rdev->ce_fw->data;
6501 				table_offset = CP_ME_TABLE_OFFSET;
6502 			} else if (me == 1) {
6503 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6504 				table_offset = CP_ME_TABLE_OFFSET;
6505 			} else if (me == 2) {
6506 				fw_data = (const __be32 *)rdev->me_fw->data;
6507 				table_offset = CP_ME_TABLE_OFFSET;
6508 			} else {
6509 				fw_data = (const __be32 *)rdev->mec_fw->data;
6510 				table_offset = CP_MEC_TABLE_OFFSET;
6511 			}
6512 
6513 			for (i = 0; i < table_size; i ++) {
6514 				dst_ptr[bo_offset + i] =
6515 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6516 			}
6517 			bo_offset += table_size;
6518 		}
6519 	}
6520 }
6521 
cik_enable_gfx_cgpg(struct radeon_device * rdev,bool enable)6522 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6523 				bool enable)
6524 {
6525 	u32 data, orig;
6526 
6527 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6528 		orig = data = RREG32(RLC_PG_CNTL);
6529 		data |= GFX_PG_ENABLE;
6530 		if (orig != data)
6531 			WREG32(RLC_PG_CNTL, data);
6532 
6533 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6534 		data |= AUTO_PG_EN;
6535 		if (orig != data)
6536 			WREG32(RLC_AUTO_PG_CTRL, data);
6537 	} else {
6538 		orig = data = RREG32(RLC_PG_CNTL);
6539 		data &= ~GFX_PG_ENABLE;
6540 		if (orig != data)
6541 			WREG32(RLC_PG_CNTL, data);
6542 
6543 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6544 		data &= ~AUTO_PG_EN;
6545 		if (orig != data)
6546 			WREG32(RLC_AUTO_PG_CTRL, data);
6547 
6548 		data = RREG32(DB_RENDER_CONTROL);
6549 	}
6550 }
6551 
cik_get_cu_active_bitmap(struct radeon_device * rdev,u32 se,u32 sh)6552 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6553 {
6554 	u32 mask = 0, tmp, tmp1;
6555 	int i;
6556 
6557 	cik_select_se_sh(rdev, se, sh);
6558 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6559 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6560 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6561 
6562 	tmp &= 0xffff0000;
6563 
6564 	tmp |= tmp1;
6565 	tmp >>= 16;
6566 
6567 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6568 		mask <<= 1;
6569 		mask |= 1;
6570 	}
6571 
6572 	return (~tmp) & mask;
6573 }
6574 
cik_init_ao_cu_mask(struct radeon_device * rdev)6575 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6576 {
6577 	u32 i, j, k, active_cu_number = 0;
6578 	u32 mask, counter, cu_bitmap;
6579 	u32 tmp = 0;
6580 
6581 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6582 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6583 			mask = 1;
6584 			cu_bitmap = 0;
6585 			counter = 0;
6586 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6587 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6588 					if (counter < 2)
6589 						cu_bitmap |= mask;
6590 					counter ++;
6591 				}
6592 				mask <<= 1;
6593 			}
6594 
6595 			active_cu_number += counter;
6596 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6597 		}
6598 	}
6599 
6600 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6601 
6602 	tmp = RREG32(RLC_MAX_PG_CU);
6603 	tmp &= ~MAX_PU_CU_MASK;
6604 	tmp |= MAX_PU_CU(active_cu_number);
6605 	WREG32(RLC_MAX_PG_CU, tmp);
6606 }
6607 
cik_enable_gfx_static_mgpg(struct radeon_device * rdev,bool enable)6608 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6609 				       bool enable)
6610 {
6611 	u32 data, orig;
6612 
6613 	orig = data = RREG32(RLC_PG_CNTL);
6614 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6615 		data |= STATIC_PER_CU_PG_ENABLE;
6616 	else
6617 		data &= ~STATIC_PER_CU_PG_ENABLE;
6618 	if (orig != data)
6619 		WREG32(RLC_PG_CNTL, data);
6620 }
6621 
cik_enable_gfx_dynamic_mgpg(struct radeon_device * rdev,bool enable)6622 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6623 					bool enable)
6624 {
6625 	u32 data, orig;
6626 
6627 	orig = data = RREG32(RLC_PG_CNTL);
6628 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6629 		data |= DYN_PER_CU_PG_ENABLE;
6630 	else
6631 		data &= ~DYN_PER_CU_PG_ENABLE;
6632 	if (orig != data)
6633 		WREG32(RLC_PG_CNTL, data);
6634 }
6635 
6636 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6637 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6638 
cik_init_gfx_cgpg(struct radeon_device * rdev)6639 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6640 {
6641 	u32 data, orig;
6642 	u32 i;
6643 
6644 	if (rdev->rlc.cs_data) {
6645 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6646 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6647 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6648 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6649 	} else {
6650 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6651 		for (i = 0; i < 3; i++)
6652 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6653 	}
6654 	if (rdev->rlc.reg_list) {
6655 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6656 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6657 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6658 	}
6659 
6660 	orig = data = RREG32(RLC_PG_CNTL);
6661 	data |= GFX_PG_SRC;
6662 	if (orig != data)
6663 		WREG32(RLC_PG_CNTL, data);
6664 
6665 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6666 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6667 
6668 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6669 	data &= ~IDLE_POLL_COUNT_MASK;
6670 	data |= IDLE_POLL_COUNT(0x60);
6671 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6672 
6673 	data = 0x10101010;
6674 	WREG32(RLC_PG_DELAY, data);
6675 
6676 	data = RREG32(RLC_PG_DELAY_2);
6677 	data &= ~0xff;
6678 	data |= 0x3;
6679 	WREG32(RLC_PG_DELAY_2, data);
6680 
6681 	data = RREG32(RLC_AUTO_PG_CTRL);
6682 	data &= ~GRBM_REG_SGIT_MASK;
6683 	data |= GRBM_REG_SGIT(0x700);
6684 	WREG32(RLC_AUTO_PG_CTRL, data);
6685 
6686 }
6687 
cik_update_gfx_pg(struct radeon_device * rdev,bool enable)6688 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6689 {
6690 	cik_enable_gfx_cgpg(rdev, enable);
6691 	cik_enable_gfx_static_mgpg(rdev, enable);
6692 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6693 }
6694 
cik_get_csb_size(struct radeon_device * rdev)6695 u32 cik_get_csb_size(struct radeon_device *rdev)
6696 {
6697 	u32 count = 0;
6698 	const struct cs_section_def *sect = NULL;
6699 	const struct cs_extent_def *ext = NULL;
6700 
6701 	if (rdev->rlc.cs_data == NULL)
6702 		return 0;
6703 
6704 	/* begin clear state */
6705 	count += 2;
6706 	/* context control state */
6707 	count += 3;
6708 
6709 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6710 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6711 			if (sect->id == SECT_CONTEXT)
6712 				count += 2 + ext->reg_count;
6713 			else
6714 				return 0;
6715 		}
6716 	}
6717 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6718 	count += 4;
6719 	/* end clear state */
6720 	count += 2;
6721 	/* clear state */
6722 	count += 2;
6723 
6724 	return count;
6725 }
6726 
cik_get_csb_buffer(struct radeon_device * rdev,volatile u32 * buffer)6727 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6728 {
6729 	u32 count = 0, i;
6730 	const struct cs_section_def *sect = NULL;
6731 	const struct cs_extent_def *ext = NULL;
6732 
6733 	if (rdev->rlc.cs_data == NULL)
6734 		return;
6735 	if (buffer == NULL)
6736 		return;
6737 
6738 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6739 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6740 
6741 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6742 	buffer[count++] = cpu_to_le32(0x80000000);
6743 	buffer[count++] = cpu_to_le32(0x80000000);
6744 
6745 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6746 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6747 			if (sect->id == SECT_CONTEXT) {
6748 				buffer[count++] =
6749 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6750 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6751 				for (i = 0; i < ext->reg_count; i++)
6752 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6753 			} else {
6754 				return;
6755 			}
6756 		}
6757 	}
6758 
6759 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6760 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6761 	switch (rdev->family) {
6762 	case CHIP_BONAIRE:
6763 		buffer[count++] = cpu_to_le32(0x16000012);
6764 		buffer[count++] = cpu_to_le32(0x00000000);
6765 		break;
6766 	case CHIP_KAVERI:
6767 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6768 		buffer[count++] = cpu_to_le32(0x00000000);
6769 		break;
6770 	case CHIP_KABINI:
6771 	case CHIP_MULLINS:
6772 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6773 		buffer[count++] = cpu_to_le32(0x00000000);
6774 		break;
6775 	case CHIP_HAWAII:
6776 		buffer[count++] = cpu_to_le32(0x3a00161a);
6777 		buffer[count++] = cpu_to_le32(0x0000002e);
6778 		break;
6779 	default:
6780 		buffer[count++] = cpu_to_le32(0x00000000);
6781 		buffer[count++] = cpu_to_le32(0x00000000);
6782 		break;
6783 	}
6784 
6785 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6786 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6787 
6788 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6789 	buffer[count++] = cpu_to_le32(0);
6790 }
6791 
cik_init_pg(struct radeon_device * rdev)6792 static void cik_init_pg(struct radeon_device *rdev)
6793 {
6794 	if (rdev->pg_flags) {
6795 		cik_enable_sck_slowdown_on_pu(rdev, true);
6796 		cik_enable_sck_slowdown_on_pd(rdev, true);
6797 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6798 			cik_init_gfx_cgpg(rdev);
6799 			cik_enable_cp_pg(rdev, true);
6800 			cik_enable_gds_pg(rdev, true);
6801 		}
6802 		cik_init_ao_cu_mask(rdev);
6803 		cik_update_gfx_pg(rdev, true);
6804 	}
6805 }
6806 
cik_fini_pg(struct radeon_device * rdev)6807 static void cik_fini_pg(struct radeon_device *rdev)
6808 {
6809 	if (rdev->pg_flags) {
6810 		cik_update_gfx_pg(rdev, false);
6811 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6812 			cik_enable_cp_pg(rdev, false);
6813 			cik_enable_gds_pg(rdev, false);
6814 		}
6815 	}
6816 }
6817 
6818 /*
6819  * Interrupts
6820  * Starting with r6xx, interrupts are handled via a ring buffer.
6821  * Ring buffers are areas of GPU accessible memory that the GPU
6822  * writes interrupt vectors into and the host reads vectors out of.
6823  * There is a rptr (read pointer) that determines where the
6824  * host is currently reading, and a wptr (write pointer)
6825  * which determines where the GPU has written.  When the
6826  * pointers are equal, the ring is idle.  When the GPU
6827  * writes vectors to the ring buffer, it increments the
6828  * wptr.  When there is an interrupt, the host then starts
6829  * fetching commands and processing them until the pointers are
6830  * equal again at which point it updates the rptr.
6831  */
6832 
6833 /**
6834  * cik_enable_interrupts - Enable the interrupt ring buffer
6835  *
6836  * @rdev: radeon_device pointer
6837  *
6838  * Enable the interrupt ring buffer (CIK).
6839  */
cik_enable_interrupts(struct radeon_device * rdev)6840 static void cik_enable_interrupts(struct radeon_device *rdev)
6841 {
6842 	u32 ih_cntl = RREG32(IH_CNTL);
6843 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6844 
6845 	ih_cntl |= ENABLE_INTR;
6846 	ih_rb_cntl |= IH_RB_ENABLE;
6847 	WREG32(IH_CNTL, ih_cntl);
6848 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6849 	rdev->ih.enabled = true;
6850 }
6851 
6852 /**
6853  * cik_disable_interrupts - Disable the interrupt ring buffer
6854  *
6855  * @rdev: radeon_device pointer
6856  *
6857  * Disable the interrupt ring buffer (CIK).
6858  */
cik_disable_interrupts(struct radeon_device * rdev)6859 static void cik_disable_interrupts(struct radeon_device *rdev)
6860 {
6861 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6862 	u32 ih_cntl = RREG32(IH_CNTL);
6863 
6864 	ih_rb_cntl &= ~IH_RB_ENABLE;
6865 	ih_cntl &= ~ENABLE_INTR;
6866 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6867 	WREG32(IH_CNTL, ih_cntl);
6868 	/* set rptr, wptr to 0 */
6869 	WREG32(IH_RB_RPTR, 0);
6870 	WREG32(IH_RB_WPTR, 0);
6871 	rdev->ih.enabled = false;
6872 	rdev->ih.rptr = 0;
6873 }
6874 
6875 /**
6876  * cik_disable_interrupt_state - Disable all interrupt sources
6877  *
6878  * @rdev: radeon_device pointer
6879  *
6880  * Clear all interrupt enable bits used by the driver (CIK).
6881  */
cik_disable_interrupt_state(struct radeon_device * rdev)6882 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6883 {
6884 	u32 tmp;
6885 
6886 	/* gfx ring */
6887 	tmp = RREG32(CP_INT_CNTL_RING0) &
6888 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6889 	WREG32(CP_INT_CNTL_RING0, tmp);
6890 	/* sdma */
6891 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6892 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6893 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6894 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6895 	/* compute queues */
6896 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6897 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6898 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6899 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6900 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6901 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6902 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6903 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6904 	/* grbm */
6905 	WREG32(GRBM_INT_CNTL, 0);
6906 	/* SRBM */
6907 	WREG32(SRBM_INT_CNTL, 0);
6908 	/* vline/vblank, etc. */
6909 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6910 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6911 	if (rdev->num_crtc >= 4) {
6912 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6913 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6914 	}
6915 	if (rdev->num_crtc >= 6) {
6916 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6917 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6918 	}
6919 	/* pflip */
6920 	if (rdev->num_crtc >= 2) {
6921 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6922 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6923 	}
6924 	if (rdev->num_crtc >= 4) {
6925 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6926 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6927 	}
6928 	if (rdev->num_crtc >= 6) {
6929 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6930 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6931 	}
6932 
6933 	/* dac hotplug */
6934 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6935 
6936 	/* digital hotplug */
6937 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6938 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6939 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6940 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6941 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6942 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6943 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6944 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6945 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6946 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6947 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6948 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6949 
6950 }
6951 
6952 /**
6953  * cik_irq_init - init and enable the interrupt ring
6954  *
6955  * @rdev: radeon_device pointer
6956  *
6957  * Allocate a ring buffer for the interrupt controller,
6958  * enable the RLC, disable interrupts, enable the IH
6959  * ring buffer and enable it (CIK).
6960  * Called at device load and reume.
6961  * Returns 0 for success, errors for failure.
6962  */
cik_irq_init(struct radeon_device * rdev)6963 static int cik_irq_init(struct radeon_device *rdev)
6964 {
6965 	int ret = 0;
6966 	int rb_bufsz;
6967 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6968 
6969 	/* allocate ring */
6970 	ret = r600_ih_ring_alloc(rdev);
6971 	if (ret)
6972 		return ret;
6973 
6974 	/* disable irqs */
6975 	cik_disable_interrupts(rdev);
6976 
6977 	/* init rlc */
6978 	ret = cik_rlc_resume(rdev);
6979 	if (ret) {
6980 		r600_ih_ring_fini(rdev);
6981 		return ret;
6982 	}
6983 
6984 	/* setup interrupt control */
6985 	/* set dummy read address to dummy page address */
6986 	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6987 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6988 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6989 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6990 	 */
6991 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6992 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6993 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6994 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6995 
6996 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6997 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6998 
6999 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7000 		      IH_WPTR_OVERFLOW_CLEAR |
7001 		      (rb_bufsz << 1));
7002 
7003 	if (rdev->wb.enabled)
7004 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7005 
7006 	/* set the writeback address whether it's enabled or not */
7007 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7008 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7009 
7010 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7011 
7012 	/* set rptr, wptr to 0 */
7013 	WREG32(IH_RB_RPTR, 0);
7014 	WREG32(IH_RB_WPTR, 0);
7015 
7016 	/* Default settings for IH_CNTL (disabled at first) */
7017 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7018 	/* RPTR_REARM only works if msi's are enabled */
7019 	if (rdev->msi_enabled)
7020 		ih_cntl |= RPTR_REARM;
7021 	WREG32(IH_CNTL, ih_cntl);
7022 
7023 	/* force the active interrupt state to all disabled */
7024 	cik_disable_interrupt_state(rdev);
7025 
7026 	pci_set_master(rdev->pdev);
7027 
7028 	/* enable irqs */
7029 	cik_enable_interrupts(rdev);
7030 
7031 	return ret;
7032 }
7033 
7034 /**
7035  * cik_irq_set - enable/disable interrupt sources
7036  *
7037  * @rdev: radeon_device pointer
7038  *
7039  * Enable interrupt sources on the GPU (vblanks, hpd,
7040  * etc.) (CIK).
7041  * Returns 0 for success, errors for failure.
7042  */
cik_irq_set(struct radeon_device * rdev)7043 int cik_irq_set(struct radeon_device *rdev)
7044 {
7045 	u32 cp_int_cntl;
7046 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7047 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7048 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7049 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7050 	u32 grbm_int_cntl = 0;
7051 	u32 dma_cntl, dma_cntl1;
7052 
7053 	if (!rdev->irq.installed) {
7054 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7055 		return -EINVAL;
7056 	}
7057 	/* don't enable anything if the ih is disabled */
7058 	if (!rdev->ih.enabled) {
7059 		cik_disable_interrupts(rdev);
7060 		/* force the active interrupt state to all disabled */
7061 		cik_disable_interrupt_state(rdev);
7062 		return 0;
7063 	}
7064 
7065 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7066 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7067 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7068 
7069 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7070 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7071 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7072 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7073 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7074 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7075 
7076 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7077 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7078 
7079 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7080 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7081 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7082 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7083 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7084 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7085 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7086 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7087 
7088 	/* enable CP interrupts on all rings */
7089 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7090 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7091 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7092 	}
7093 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7094 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7095 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7096 		if (ring->me == 1) {
7097 			switch (ring->pipe) {
7098 			case 0:
7099 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7100 				break;
7101 			case 1:
7102 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7103 				break;
7104 			case 2:
7105 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7106 				break;
7107 			case 3:
7108 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7109 				break;
7110 			default:
7111 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7112 				break;
7113 			}
7114 		} else if (ring->me == 2) {
7115 			switch (ring->pipe) {
7116 			case 0:
7117 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7118 				break;
7119 			case 1:
7120 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7121 				break;
7122 			case 2:
7123 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7124 				break;
7125 			case 3:
7126 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7127 				break;
7128 			default:
7129 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7130 				break;
7131 			}
7132 		} else {
7133 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7134 		}
7135 	}
7136 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7137 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7138 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7139 		if (ring->me == 1) {
7140 			switch (ring->pipe) {
7141 			case 0:
7142 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7143 				break;
7144 			case 1:
7145 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7146 				break;
7147 			case 2:
7148 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7149 				break;
7150 			case 3:
7151 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7152 				break;
7153 			default:
7154 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7155 				break;
7156 			}
7157 		} else if (ring->me == 2) {
7158 			switch (ring->pipe) {
7159 			case 0:
7160 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7161 				break;
7162 			case 1:
7163 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7164 				break;
7165 			case 2:
7166 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7167 				break;
7168 			case 3:
7169 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7170 				break;
7171 			default:
7172 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7173 				break;
7174 			}
7175 		} else {
7176 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7177 		}
7178 	}
7179 
7180 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7181 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7182 		dma_cntl |= TRAP_ENABLE;
7183 	}
7184 
7185 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7186 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7187 		dma_cntl1 |= TRAP_ENABLE;
7188 	}
7189 
7190 	if (rdev->irq.crtc_vblank_int[0] ||
7191 	    atomic_read(&rdev->irq.pflip[0])) {
7192 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7193 		crtc1 |= VBLANK_INTERRUPT_MASK;
7194 	}
7195 	if (rdev->irq.crtc_vblank_int[1] ||
7196 	    atomic_read(&rdev->irq.pflip[1])) {
7197 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7198 		crtc2 |= VBLANK_INTERRUPT_MASK;
7199 	}
7200 	if (rdev->irq.crtc_vblank_int[2] ||
7201 	    atomic_read(&rdev->irq.pflip[2])) {
7202 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7203 		crtc3 |= VBLANK_INTERRUPT_MASK;
7204 	}
7205 	if (rdev->irq.crtc_vblank_int[3] ||
7206 	    atomic_read(&rdev->irq.pflip[3])) {
7207 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7208 		crtc4 |= VBLANK_INTERRUPT_MASK;
7209 	}
7210 	if (rdev->irq.crtc_vblank_int[4] ||
7211 	    atomic_read(&rdev->irq.pflip[4])) {
7212 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7213 		crtc5 |= VBLANK_INTERRUPT_MASK;
7214 	}
7215 	if (rdev->irq.crtc_vblank_int[5] ||
7216 	    atomic_read(&rdev->irq.pflip[5])) {
7217 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7218 		crtc6 |= VBLANK_INTERRUPT_MASK;
7219 	}
7220 	if (rdev->irq.hpd[0]) {
7221 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7222 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7223 	}
7224 	if (rdev->irq.hpd[1]) {
7225 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7226 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7227 	}
7228 	if (rdev->irq.hpd[2]) {
7229 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7230 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7231 	}
7232 	if (rdev->irq.hpd[3]) {
7233 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7234 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7235 	}
7236 	if (rdev->irq.hpd[4]) {
7237 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7238 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7239 	}
7240 	if (rdev->irq.hpd[5]) {
7241 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7242 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7243 	}
7244 
7245 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7246 
7247 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7248 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7249 
7250 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7251 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7252 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7253 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7254 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7255 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7256 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7257 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7258 
7259 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7260 
7261 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7262 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7263 	if (rdev->num_crtc >= 4) {
7264 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7265 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7266 	}
7267 	if (rdev->num_crtc >= 6) {
7268 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7269 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7270 	}
7271 
7272 	if (rdev->num_crtc >= 2) {
7273 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7274 		       GRPH_PFLIP_INT_MASK);
7275 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7276 		       GRPH_PFLIP_INT_MASK);
7277 	}
7278 	if (rdev->num_crtc >= 4) {
7279 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7280 		       GRPH_PFLIP_INT_MASK);
7281 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7282 		       GRPH_PFLIP_INT_MASK);
7283 	}
7284 	if (rdev->num_crtc >= 6) {
7285 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7286 		       GRPH_PFLIP_INT_MASK);
7287 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7288 		       GRPH_PFLIP_INT_MASK);
7289 	}
7290 
7291 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7292 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7293 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7294 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7295 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7296 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7297 
7298 	/* posting read */
7299 	RREG32(SRBM_STATUS);
7300 
7301 	return 0;
7302 }
7303 
7304 /**
7305  * cik_irq_ack - ack interrupt sources
7306  *
7307  * @rdev: radeon_device pointer
7308  *
7309  * Ack interrupt sources on the GPU (vblanks, hpd,
7310  * etc.) (CIK).  Certain interrupts sources are sw
7311  * generated and do not require an explicit ack.
7312  */
cik_irq_ack(struct radeon_device * rdev)7313 static inline void cik_irq_ack(struct radeon_device *rdev)
7314 {
7315 	u32 tmp;
7316 
7317 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7318 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7319 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7320 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7321 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7322 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7323 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7324 
7325 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7326 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7327 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7328 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7329 	if (rdev->num_crtc >= 4) {
7330 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7331 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7332 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7333 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7334 	}
7335 	if (rdev->num_crtc >= 6) {
7336 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7337 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7338 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7339 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7340 	}
7341 
7342 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7343 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7344 		       GRPH_PFLIP_INT_CLEAR);
7345 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7346 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7347 		       GRPH_PFLIP_INT_CLEAR);
7348 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7349 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7350 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7351 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7352 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7353 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7354 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7355 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7356 
7357 	if (rdev->num_crtc >= 4) {
7358 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7359 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7360 			       GRPH_PFLIP_INT_CLEAR);
7361 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7362 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7363 			       GRPH_PFLIP_INT_CLEAR);
7364 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7365 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7366 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7367 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7368 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7369 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7370 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7371 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7372 	}
7373 
7374 	if (rdev->num_crtc >= 6) {
7375 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7376 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7377 			       GRPH_PFLIP_INT_CLEAR);
7378 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7379 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7380 			       GRPH_PFLIP_INT_CLEAR);
7381 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7382 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7383 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7384 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7385 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7386 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7387 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7388 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7389 	}
7390 
7391 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7392 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7393 		tmp |= DC_HPDx_INT_ACK;
7394 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7395 	}
7396 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7397 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7398 		tmp |= DC_HPDx_INT_ACK;
7399 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7400 	}
7401 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7402 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7403 		tmp |= DC_HPDx_INT_ACK;
7404 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7405 	}
7406 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7407 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7408 		tmp |= DC_HPDx_INT_ACK;
7409 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7410 	}
7411 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7412 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7413 		tmp |= DC_HPDx_INT_ACK;
7414 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7415 	}
7416 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7417 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7418 		tmp |= DC_HPDx_INT_ACK;
7419 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7420 	}
7421 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7422 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7423 		tmp |= DC_HPDx_RX_INT_ACK;
7424 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7425 	}
7426 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7427 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7428 		tmp |= DC_HPDx_RX_INT_ACK;
7429 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7430 	}
7431 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7432 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7433 		tmp |= DC_HPDx_RX_INT_ACK;
7434 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7435 	}
7436 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7437 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7438 		tmp |= DC_HPDx_RX_INT_ACK;
7439 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7440 	}
7441 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7442 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7443 		tmp |= DC_HPDx_RX_INT_ACK;
7444 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7445 	}
7446 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7447 		tmp = RREG32(DC_HPD6_INT_CONTROL);
7448 		tmp |= DC_HPDx_RX_INT_ACK;
7449 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7450 	}
7451 }
7452 
7453 /**
7454  * cik_irq_disable - disable interrupts
7455  *
7456  * @rdev: radeon_device pointer
7457  *
7458  * Disable interrupts on the hw (CIK).
7459  */
cik_irq_disable(struct radeon_device * rdev)7460 static void cik_irq_disable(struct radeon_device *rdev)
7461 {
7462 	cik_disable_interrupts(rdev);
7463 	/* Wait and acknowledge irq */
7464 	mdelay(1);
7465 	cik_irq_ack(rdev);
7466 	cik_disable_interrupt_state(rdev);
7467 }
7468 
7469 /**
7470  * cik_irq_disable - disable interrupts for suspend
7471  *
7472  * @rdev: radeon_device pointer
7473  *
7474  * Disable interrupts and stop the RLC (CIK).
7475  * Used for suspend.
7476  */
cik_irq_suspend(struct radeon_device * rdev)7477 static void cik_irq_suspend(struct radeon_device *rdev)
7478 {
7479 	cik_irq_disable(rdev);
7480 	cik_rlc_stop(rdev);
7481 }
7482 
7483 /**
7484  * cik_irq_fini - tear down interrupt support
7485  *
7486  * @rdev: radeon_device pointer
7487  *
7488  * Disable interrupts on the hw and free the IH ring
7489  * buffer (CIK).
7490  * Used for driver unload.
7491  */
cik_irq_fini(struct radeon_device * rdev)7492 static void cik_irq_fini(struct radeon_device *rdev)
7493 {
7494 	cik_irq_suspend(rdev);
7495 	r600_ih_ring_fini(rdev);
7496 }
7497 
7498 /**
7499  * cik_get_ih_wptr - get the IH ring buffer wptr
7500  *
7501  * @rdev: radeon_device pointer
7502  *
7503  * Get the IH ring buffer wptr from either the register
7504  * or the writeback memory buffer (CIK).  Also check for
7505  * ring buffer overflow and deal with it.
7506  * Used by cik_irq_process().
7507  * Returns the value of the wptr.
7508  */
cik_get_ih_wptr(struct radeon_device * rdev)7509 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7510 {
7511 	u32 wptr, tmp;
7512 
7513 	if (rdev->wb.enabled)
7514 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7515 	else
7516 		wptr = RREG32(IH_RB_WPTR);
7517 
7518 	if (wptr & RB_OVERFLOW) {
7519 		wptr &= ~RB_OVERFLOW;
7520 		/* When a ring buffer overflow happen start parsing interrupt
7521 		 * from the last not overwritten vector (wptr + 16). Hopefully
7522 		 * this should allow us to catchup.
7523 		 */
7524 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7525 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7526 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7527 		tmp = RREG32(IH_RB_CNTL);
7528 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7529 		WREG32(IH_RB_CNTL, tmp);
7530 	}
7531 	return (wptr & rdev->ih.ptr_mask);
7532 }
7533 
7534 /*        CIK IV Ring
7535  * Each IV ring entry is 128 bits:
7536  * [7:0]    - interrupt source id
7537  * [31:8]   - reserved
7538  * [59:32]  - interrupt source data
7539  * [63:60]  - reserved
7540  * [71:64]  - RINGID
7541  *            CP:
7542  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7543  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7544  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7545  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7546  *            PIPE_ID - ME0 0=3D
7547  *                    - ME1&2 compute dispatcher (4 pipes each)
7548  *            SDMA:
7549  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7550  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7551  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7552  * [79:72]  - VMID
7553  * [95:80]  - PASID
7554  * [127:96] - reserved
7555  */
7556 /**
7557  * cik_irq_process - interrupt handler
7558  *
7559  * @rdev: radeon_device pointer
7560  *
7561  * Interrupt hander (CIK).  Walk the IH ring,
7562  * ack interrupts and schedule work to handle
7563  * interrupt events.
7564  * Returns irq process return code.
7565  */
cik_irq_process(struct radeon_device * rdev)7566 int cik_irq_process(struct radeon_device *rdev)
7567 {
7568 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7569 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7570 	u32 wptr;
7571 	u32 rptr;
7572 	u32 src_id, src_data, ring_id;
7573 	u8 me_id, pipe_id, queue_id;
7574 	u32 ring_index;
7575 	bool queue_hotplug = false;
7576 	bool queue_dp = false;
7577 	bool queue_reset = false;
7578 	u32 addr, status, mc_client;
7579 	bool queue_thermal = false;
7580 
7581 	if (!rdev->ih.enabled || rdev->shutdown)
7582 		return IRQ_NONE;
7583 
7584 	wptr = cik_get_ih_wptr(rdev);
7585 
7586 restart_ih:
7587 	/* is somebody else already processing irqs? */
7588 	if (atomic_xchg(&rdev->ih.lock, 1))
7589 		return IRQ_NONE;
7590 
7591 	rptr = rdev->ih.rptr;
7592 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7593 
7594 	/* Order reading of wptr vs. reading of IH ring data */
7595 	rmb();
7596 
7597 	/* display interrupts */
7598 	cik_irq_ack(rdev);
7599 
7600 	while (rptr != wptr) {
7601 		/* wptr/rptr are in bytes! */
7602 		ring_index = rptr / 4;
7603 
7604 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7605 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7606 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7607 
7608 		switch (src_id) {
7609 		case 1: /* D1 vblank/vline */
7610 			switch (src_data) {
7611 			case 0: /* D1 vblank */
7612 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7613 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7614 
7615 				if (rdev->irq.crtc_vblank_int[0]) {
7616 					drm_handle_vblank(rdev->ddev, 0);
7617 #ifdef __NetBSD__
7618 						spin_lock(&rdev->irq.vblank_lock);
7619 						rdev->pm.vblank_sync = true;
7620 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7621 						spin_unlock(&rdev->irq.vblank_lock);
7622 #else
7623 					rdev->pm.vblank_sync = true;
7624 					wake_up(&rdev->irq.vblank_queue);
7625 #endif
7626 				}
7627 				if (atomic_read(&rdev->irq.pflip[0]))
7628 					radeon_crtc_handle_vblank(rdev, 0);
7629 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7630 				DRM_DEBUG("IH: D1 vblank\n");
7631 
7632 				break;
7633 			case 1: /* D1 vline */
7634 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7635 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7636 
7637 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7638 				DRM_DEBUG("IH: D1 vline\n");
7639 
7640 				break;
7641 			default:
7642 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7643 				break;
7644 			}
7645 			break;
7646 		case 2: /* D2 vblank/vline */
7647 			switch (src_data) {
7648 			case 0: /* D2 vblank */
7649 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7650 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7651 
7652 				if (rdev->irq.crtc_vblank_int[1]) {
7653 					drm_handle_vblank(rdev->ddev, 1);
7654 #ifdef __NetBSD__
7655 						spin_lock(&rdev->irq.vblank_lock);
7656 						rdev->pm.vblank_sync = true;
7657 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7658 						spin_unlock(&rdev->irq.vblank_lock);
7659 #else
7660 					rdev->pm.vblank_sync = true;
7661 					wake_up(&rdev->irq.vblank_queue);
7662 #endif
7663 				}
7664 				if (atomic_read(&rdev->irq.pflip[1]))
7665 					radeon_crtc_handle_vblank(rdev, 1);
7666 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7667 				DRM_DEBUG("IH: D2 vblank\n");
7668 
7669 				break;
7670 			case 1: /* D2 vline */
7671 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7672 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7673 
7674 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7675 				DRM_DEBUG("IH: D2 vline\n");
7676 
7677 				break;
7678 			default:
7679 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7680 				break;
7681 			}
7682 			break;
7683 		case 3: /* D3 vblank/vline */
7684 			switch (src_data) {
7685 			case 0: /* D3 vblank */
7686 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7687 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7688 
7689 				if (rdev->irq.crtc_vblank_int[2]) {
7690 					drm_handle_vblank(rdev->ddev, 2);
7691 #ifdef __NetBSD__
7692 						spin_lock(&rdev->irq.vblank_lock);
7693 						rdev->pm.vblank_sync = true;
7694 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7695 						spin_unlock(&rdev->irq.vblank_lock);
7696 #else
7697 					rdev->pm.vblank_sync = true;
7698 					wake_up(&rdev->irq.vblank_queue);
7699 #endif
7700 				}
7701 				if (atomic_read(&rdev->irq.pflip[2]))
7702 					radeon_crtc_handle_vblank(rdev, 2);
7703 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7704 				DRM_DEBUG("IH: D3 vblank\n");
7705 
7706 				break;
7707 			case 1: /* D3 vline */
7708 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7709 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7710 
7711 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7712 				DRM_DEBUG("IH: D3 vline\n");
7713 
7714 				break;
7715 			default:
7716 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7717 				break;
7718 			}
7719 			break;
7720 		case 4: /* D4 vblank/vline */
7721 			switch (src_data) {
7722 			case 0: /* D4 vblank */
7723 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7724 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7725 
7726 				if (rdev->irq.crtc_vblank_int[3]) {
7727 					drm_handle_vblank(rdev->ddev, 3);
7728 #ifdef __NetBSD__
7729 						spin_lock(&rdev->irq.vblank_lock);
7730 						rdev->pm.vblank_sync = true;
7731 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7732 						spin_unlock(&rdev->irq.vblank_lock);
7733 #else
7734 					rdev->pm.vblank_sync = true;
7735 					wake_up(&rdev->irq.vblank_queue);
7736 #endif
7737 				}
7738 				if (atomic_read(&rdev->irq.pflip[3]))
7739 					radeon_crtc_handle_vblank(rdev, 3);
7740 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7741 				DRM_DEBUG("IH: D4 vblank\n");
7742 
7743 				break;
7744 			case 1: /* D4 vline */
7745 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7746 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7747 
7748 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7749 				DRM_DEBUG("IH: D4 vline\n");
7750 
7751 				break;
7752 			default:
7753 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7754 				break;
7755 			}
7756 			break;
7757 		case 5: /* D5 vblank/vline */
7758 			switch (src_data) {
7759 			case 0: /* D5 vblank */
7760 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7761 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7762 
7763 				if (rdev->irq.crtc_vblank_int[4]) {
7764 					drm_handle_vblank(rdev->ddev, 4);
7765 #ifdef __NetBSD__
7766 						spin_lock(&rdev->irq.vblank_lock);
7767 						rdev->pm.vblank_sync = true;
7768 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7769 						spin_unlock(&rdev->irq.vblank_lock);
7770 #else
7771 					rdev->pm.vblank_sync = true;
7772 					wake_up(&rdev->irq.vblank_queue);
7773 #endif
7774 				}
7775 				if (atomic_read(&rdev->irq.pflip[4]))
7776 					radeon_crtc_handle_vblank(rdev, 4);
7777 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7778 				DRM_DEBUG("IH: D5 vblank\n");
7779 
7780 				break;
7781 			case 1: /* D5 vline */
7782 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7783 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7784 
7785 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7786 				DRM_DEBUG("IH: D5 vline\n");
7787 
7788 				break;
7789 			default:
7790 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7791 				break;
7792 			}
7793 			break;
7794 		case 6: /* D6 vblank/vline */
7795 			switch (src_data) {
7796 			case 0: /* D6 vblank */
7797 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7798 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7799 
7800 				if (rdev->irq.crtc_vblank_int[5]) {
7801 					drm_handle_vblank(rdev->ddev, 5);
7802 #ifdef __NetBSD__
7803 						spin_lock(&rdev->irq.vblank_lock);
7804 						rdev->pm.vblank_sync = true;
7805 						DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7806 						spin_unlock(&rdev->irq.vblank_lock);
7807 #else
7808 					rdev->pm.vblank_sync = true;
7809 					wake_up(&rdev->irq.vblank_queue);
7810 #endif
7811 				}
7812 				if (atomic_read(&rdev->irq.pflip[5]))
7813 					radeon_crtc_handle_vblank(rdev, 5);
7814 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7815 				DRM_DEBUG("IH: D6 vblank\n");
7816 
7817 				break;
7818 			case 1: /* D6 vline */
7819 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7820 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821 
7822 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7823 				DRM_DEBUG("IH: D6 vline\n");
7824 
7825 				break;
7826 			default:
7827 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7828 				break;
7829 			}
7830 			break;
7831 		case 8: /* D1 page flip */
7832 		case 10: /* D2 page flip */
7833 		case 12: /* D3 page flip */
7834 		case 14: /* D4 page flip */
7835 		case 16: /* D5 page flip */
7836 		case 18: /* D6 page flip */
7837 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7838 			if (radeon_use_pflipirq > 0)
7839 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7840 			break;
7841 		case 42: /* HPD hotplug */
7842 			switch (src_data) {
7843 			case 0:
7844 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7845 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7846 
7847 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7848 				queue_hotplug = true;
7849 				DRM_DEBUG("IH: HPD1\n");
7850 
7851 				break;
7852 			case 1:
7853 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7854 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7855 
7856 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7857 				queue_hotplug = true;
7858 				DRM_DEBUG("IH: HPD2\n");
7859 
7860 				break;
7861 			case 2:
7862 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7863 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7864 
7865 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7866 				queue_hotplug = true;
7867 				DRM_DEBUG("IH: HPD3\n");
7868 
7869 				break;
7870 			case 3:
7871 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7872 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7873 
7874 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7875 				queue_hotplug = true;
7876 				DRM_DEBUG("IH: HPD4\n");
7877 
7878 				break;
7879 			case 4:
7880 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7881 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7882 
7883 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7884 				queue_hotplug = true;
7885 				DRM_DEBUG("IH: HPD5\n");
7886 
7887 				break;
7888 			case 5:
7889 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7890 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7891 
7892 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7893 				queue_hotplug = true;
7894 				DRM_DEBUG("IH: HPD6\n");
7895 
7896 				break;
7897 			case 6:
7898 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7899 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7900 
7901 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7902 				queue_dp = true;
7903 				DRM_DEBUG("IH: HPD_RX 1\n");
7904 
7905 				break;
7906 			case 7:
7907 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7908 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7909 
7910 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7911 				queue_dp = true;
7912 				DRM_DEBUG("IH: HPD_RX 2\n");
7913 
7914 				break;
7915 			case 8:
7916 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7917 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7918 
7919 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7920 				queue_dp = true;
7921 				DRM_DEBUG("IH: HPD_RX 3\n");
7922 
7923 				break;
7924 			case 9:
7925 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7926 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7927 
7928 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7929 				queue_dp = true;
7930 				DRM_DEBUG("IH: HPD_RX 4\n");
7931 
7932 				break;
7933 			case 10:
7934 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7935 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7936 
7937 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7938 				queue_dp = true;
7939 				DRM_DEBUG("IH: HPD_RX 5\n");
7940 
7941 				break;
7942 			case 11:
7943 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7944 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7945 
7946 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7947 				queue_dp = true;
7948 				DRM_DEBUG("IH: HPD_RX 6\n");
7949 
7950 				break;
7951 			default:
7952 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7953 				break;
7954 			}
7955 			break;
7956 		case 96:
7957 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7958 			WREG32(SRBM_INT_ACK, 0x1);
7959 			break;
7960 		case 124: /* UVD */
7961 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7962 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7963 			break;
7964 		case 146:
7965 		case 147:
7966 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7967 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7968 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7969 			/* reset addr and status */
7970 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7971 			if (addr == 0x0 && status == 0x0)
7972 				break;
7973 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7974 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7975 				addr);
7976 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7977 				status);
7978 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7979 			break;
7980 		case 167: /* VCE */
7981 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7982 			switch (src_data) {
7983 			case 0:
7984 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7985 				break;
7986 			case 1:
7987 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7988 				break;
7989 			default:
7990 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7991 				break;
7992 			}
7993 			break;
7994 		case 176: /* GFX RB CP_INT */
7995 		case 177: /* GFX IB CP_INT */
7996 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7997 			break;
7998 		case 181: /* CP EOP event */
7999 			DRM_DEBUG("IH: CP EOP\n");
8000 			/* XXX check the bitfield order! */
8001 			me_id = (ring_id & 0x60) >> 5;
8002 			pipe_id = (ring_id & 0x18) >> 3;
8003 			queue_id = (ring_id & 0x7) >> 0;
8004 			switch (me_id) {
8005 			case 0:
8006 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8007 				break;
8008 			case 1:
8009 			case 2:
8010 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8011 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8012 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8013 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8014 				break;
8015 			}
8016 			break;
8017 		case 184: /* CP Privileged reg access */
8018 			DRM_ERROR("Illegal register access in command stream\n");
8019 			/* XXX check the bitfield order! */
8020 			me_id = (ring_id & 0x60) >> 5;
8021 			pipe_id = (ring_id & 0x18) >> 3;
8022 			queue_id = (ring_id & 0x7) >> 0;
8023 			switch (me_id) {
8024 			case 0:
8025 				/* This results in a full GPU reset, but all we need to do is soft
8026 				 * reset the CP for gfx
8027 				 */
8028 				queue_reset = true;
8029 				break;
8030 			case 1:
8031 				/* XXX compute */
8032 				queue_reset = true;
8033 				break;
8034 			case 2:
8035 				/* XXX compute */
8036 				queue_reset = true;
8037 				break;
8038 			}
8039 			break;
8040 		case 185: /* CP Privileged inst */
8041 			DRM_ERROR("Illegal instruction in command stream\n");
8042 			/* XXX check the bitfield order! */
8043 			me_id = (ring_id & 0x60) >> 5;
8044 			pipe_id = (ring_id & 0x18) >> 3;
8045 			queue_id = (ring_id & 0x7) >> 0;
8046 			switch (me_id) {
8047 			case 0:
8048 				/* This results in a full GPU reset, but all we need to do is soft
8049 				 * reset the CP for gfx
8050 				 */
8051 				queue_reset = true;
8052 				break;
8053 			case 1:
8054 				/* XXX compute */
8055 				queue_reset = true;
8056 				break;
8057 			case 2:
8058 				/* XXX compute */
8059 				queue_reset = true;
8060 				break;
8061 			}
8062 			break;
8063 		case 224: /* SDMA trap event */
8064 			/* XXX check the bitfield order! */
8065 			me_id = (ring_id & 0x3) >> 0;
8066 			queue_id = (ring_id & 0xc) >> 2;
8067 			DRM_DEBUG("IH: SDMA trap\n");
8068 			switch (me_id) {
8069 			case 0:
8070 				switch (queue_id) {
8071 				case 0:
8072 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8073 					break;
8074 				case 1:
8075 					/* XXX compute */
8076 					break;
8077 				case 2:
8078 					/* XXX compute */
8079 					break;
8080 				}
8081 				break;
8082 			case 1:
8083 				switch (queue_id) {
8084 				case 0:
8085 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8086 					break;
8087 				case 1:
8088 					/* XXX compute */
8089 					break;
8090 				case 2:
8091 					/* XXX compute */
8092 					break;
8093 				}
8094 				break;
8095 			}
8096 			break;
8097 		case 230: /* thermal low to high */
8098 			DRM_DEBUG("IH: thermal low to high\n");
8099 			rdev->pm.dpm.thermal.high_to_low = false;
8100 			queue_thermal = true;
8101 			break;
8102 		case 231: /* thermal high to low */
8103 			DRM_DEBUG("IH: thermal high to low\n");
8104 			rdev->pm.dpm.thermal.high_to_low = true;
8105 			queue_thermal = true;
8106 			break;
8107 		case 233: /* GUI IDLE */
8108 			DRM_DEBUG("IH: GUI idle\n");
8109 			break;
8110 		case 241: /* SDMA Privileged inst */
8111 		case 247: /* SDMA Privileged inst */
8112 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8113 			/* XXX check the bitfield order! */
8114 			me_id = (ring_id & 0x3) >> 0;
8115 			queue_id = (ring_id & 0xc) >> 2;
8116 			switch (me_id) {
8117 			case 0:
8118 				switch (queue_id) {
8119 				case 0:
8120 					queue_reset = true;
8121 					break;
8122 				case 1:
8123 					/* XXX compute */
8124 					queue_reset = true;
8125 					break;
8126 				case 2:
8127 					/* XXX compute */
8128 					queue_reset = true;
8129 					break;
8130 				}
8131 				break;
8132 			case 1:
8133 				switch (queue_id) {
8134 				case 0:
8135 					queue_reset = true;
8136 					break;
8137 				case 1:
8138 					/* XXX compute */
8139 					queue_reset = true;
8140 					break;
8141 				case 2:
8142 					/* XXX compute */
8143 					queue_reset = true;
8144 					break;
8145 				}
8146 				break;
8147 			}
8148 			break;
8149 		default:
8150 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8151 			break;
8152 		}
8153 
8154 		/* wptr/rptr are in bytes! */
8155 		rptr += 16;
8156 		rptr &= rdev->ih.ptr_mask;
8157 		WREG32(IH_RB_RPTR, rptr);
8158 	}
8159 	if (queue_dp)
8160 		schedule_work(&rdev->dp_work);
8161 	if (queue_hotplug)
8162 		schedule_delayed_work(&rdev->hotplug_work, 0);
8163 	if (queue_reset) {
8164 #ifdef __NetBSD__
8165 		spin_lock(&rdev->fence_lock);
8166 		rdev->needs_reset = true;
8167 		radeon_fence_wakeup_locked(rdev);
8168 		spin_unlock(&rdev->fence_lock);
8169 #else
8170 		rdev->needs_reset = true;
8171 		wake_up_all(&rdev->fence_queue);
8172 #endif
8173 	}
8174 	if (queue_thermal)
8175 		schedule_work(&rdev->pm.dpm.thermal.work);
8176 	rdev->ih.rptr = rptr;
8177 	atomic_set(&rdev->ih.lock, 0);
8178 
8179 	/* make sure wptr hasn't changed while processing */
8180 	wptr = cik_get_ih_wptr(rdev);
8181 	if (wptr != rptr)
8182 		goto restart_ih;
8183 
8184 	return IRQ_HANDLED;
8185 }
8186 
8187 /*
8188  * startup/shutdown callbacks
8189  */
cik_uvd_init(struct radeon_device * rdev)8190 static void cik_uvd_init(struct radeon_device *rdev)
8191 {
8192 	int r;
8193 
8194 	if (!rdev->has_uvd)
8195 		return;
8196 
8197 	r = radeon_uvd_init(rdev);
8198 	if (r) {
8199 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8200 		/*
8201 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8202 		 * to early fails cik_uvd_start() and thus nothing happens
8203 		 * there. So it is pointless to try to go through that code
8204 		 * hence why we disable uvd here.
8205 		 */
8206 		rdev->has_uvd = false;
8207 		return;
8208 	}
8209 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8210 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8211 }
8212 
cik_uvd_start(struct radeon_device * rdev)8213 static void cik_uvd_start(struct radeon_device *rdev)
8214 {
8215 	int r;
8216 
8217 	if (!rdev->has_uvd)
8218 		return;
8219 
8220 	r = radeon_uvd_resume(rdev);
8221 	if (r) {
8222 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8223 		goto error;
8224 	}
8225 	r = uvd_v4_2_resume(rdev);
8226 	if (r) {
8227 		dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8228 		goto error;
8229 	}
8230 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8231 	if (r) {
8232 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8233 		goto error;
8234 	}
8235 	return;
8236 
8237 error:
8238 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8239 }
8240 
cik_uvd_resume(struct radeon_device * rdev)8241 static void cik_uvd_resume(struct radeon_device *rdev)
8242 {
8243 	struct radeon_ring *ring;
8244 	int r;
8245 
8246 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8247 		return;
8248 
8249 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8250 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8251 	if (r) {
8252 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8253 		return;
8254 	}
8255 	r = uvd_v1_0_init(rdev);
8256 	if (r) {
8257 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8258 		return;
8259 	}
8260 }
8261 
cik_vce_init(struct radeon_device * rdev)8262 static void cik_vce_init(struct radeon_device *rdev)
8263 {
8264 	int r;
8265 
8266 	if (!rdev->has_vce)
8267 		return;
8268 
8269 	r = radeon_vce_init(rdev);
8270 	if (r) {
8271 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8272 		/*
8273 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8274 		 * to early fails cik_vce_start() and thus nothing happens
8275 		 * there. So it is pointless to try to go through that code
8276 		 * hence why we disable vce here.
8277 		 */
8278 		rdev->has_vce = false;
8279 		return;
8280 	}
8281 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8282 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8283 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8284 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8285 }
8286 
cik_vce_start(struct radeon_device * rdev)8287 static void cik_vce_start(struct radeon_device *rdev)
8288 {
8289 	int r;
8290 
8291 	if (!rdev->has_vce)
8292 		return;
8293 
8294 	r = radeon_vce_resume(rdev);
8295 	if (r) {
8296 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8297 		goto error;
8298 	}
8299 	r = vce_v2_0_resume(rdev);
8300 	if (r) {
8301 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8302 		goto error;
8303 	}
8304 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8305 	if (r) {
8306 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8307 		goto error;
8308 	}
8309 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8310 	if (r) {
8311 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8312 		goto error;
8313 	}
8314 	return;
8315 
8316 error:
8317 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8318 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8319 }
8320 
cik_vce_resume(struct radeon_device * rdev)8321 static void cik_vce_resume(struct radeon_device *rdev)
8322 {
8323 	struct radeon_ring *ring;
8324 	int r;
8325 
8326 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8327 		return;
8328 
8329 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8330 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8331 	if (r) {
8332 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8333 		return;
8334 	}
8335 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8336 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8337 	if (r) {
8338 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8339 		return;
8340 	}
8341 	r = vce_v1_0_init(rdev);
8342 	if (r) {
8343 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8344 		return;
8345 	}
8346 }
8347 
8348 /**
8349  * cik_startup - program the asic to a functional state
8350  *
8351  * @rdev: radeon_device pointer
8352  *
8353  * Programs the asic to a functional state (CIK).
8354  * Called by cik_init() and cik_resume().
8355  * Returns 0 for success, error for failure.
8356  */
cik_startup(struct radeon_device * rdev)8357 static int cik_startup(struct radeon_device *rdev)
8358 {
8359 	struct radeon_ring *ring;
8360 	u32 nop;
8361 	int r;
8362 
8363 	/* enable pcie gen2/3 link */
8364 	cik_pcie_gen3_enable(rdev);
8365 	/* enable aspm */
8366 	cik_program_aspm(rdev);
8367 
8368 	/* scratch needs to be initialized before MC */
8369 	r = r600_vram_scratch_init(rdev);
8370 	if (r)
8371 		return r;
8372 
8373 	cik_mc_program(rdev);
8374 
8375 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8376 		r = ci_mc_load_microcode(rdev);
8377 		if (r) {
8378 			DRM_ERROR("Failed to load MC firmware!\n");
8379 			return r;
8380 		}
8381 	}
8382 
8383 	r = cik_pcie_gart_enable(rdev);
8384 	if (r)
8385 		return r;
8386 	cik_gpu_init(rdev);
8387 
8388 	/* allocate rlc buffers */
8389 	if (rdev->flags & RADEON_IS_IGP) {
8390 		if (rdev->family == CHIP_KAVERI) {
8391 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8392 			rdev->rlc.reg_list_size =
8393 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8394 		} else {
8395 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8396 			rdev->rlc.reg_list_size =
8397 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8398 		}
8399 	}
8400 	rdev->rlc.cs_data = ci_cs_data;
8401 	rdev->rlc.cp_table_size = round_up(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8402 	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8403 	r = sumo_rlc_init(rdev);
8404 	if (r) {
8405 		DRM_ERROR("Failed to init rlc BOs!\n");
8406 		return r;
8407 	}
8408 
8409 	/* allocate wb buffer */
8410 	r = radeon_wb_init(rdev);
8411 	if (r)
8412 		return r;
8413 
8414 	/* allocate mec buffers */
8415 	r = cik_mec_init(rdev);
8416 	if (r) {
8417 		DRM_ERROR("Failed to init MEC BOs!\n");
8418 		return r;
8419 	}
8420 
8421 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8422 	if (r) {
8423 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8424 		return r;
8425 	}
8426 
8427 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8428 	if (r) {
8429 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8430 		return r;
8431 	}
8432 
8433 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8434 	if (r) {
8435 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8436 		return r;
8437 	}
8438 
8439 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8440 	if (r) {
8441 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8442 		return r;
8443 	}
8444 
8445 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8446 	if (r) {
8447 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8448 		return r;
8449 	}
8450 
8451 	cik_uvd_start(rdev);
8452 	cik_vce_start(rdev);
8453 
8454 	/* Enable IRQ */
8455 	if (!rdev->irq.installed) {
8456 		r = radeon_irq_kms_init(rdev);
8457 		if (r)
8458 			return r;
8459 	}
8460 
8461 	r = cik_irq_init(rdev);
8462 	if (r) {
8463 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8464 		radeon_irq_kms_fini(rdev);
8465 		return r;
8466 	}
8467 	cik_irq_set(rdev);
8468 
8469 	if (rdev->family == CHIP_HAWAII) {
8470 		if (rdev->new_fw)
8471 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8472 		else
8473 			nop = RADEON_CP_PACKET2;
8474 	} else {
8475 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8476 	}
8477 
8478 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8479 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8480 			     nop);
8481 	if (r)
8482 		return r;
8483 
8484 	/* set up the compute queues */
8485 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8486 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8487 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8488 			     nop);
8489 	if (r)
8490 		return r;
8491 	ring->me = 1; /* first MEC */
8492 	ring->pipe = 0; /* first pipe */
8493 	ring->queue = 0; /* first queue */
8494 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8495 
8496 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8497 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8498 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8499 			     nop);
8500 	if (r)
8501 		return r;
8502 	/* dGPU only have 1 MEC */
8503 	ring->me = 1; /* first MEC */
8504 	ring->pipe = 0; /* first pipe */
8505 	ring->queue = 1; /* second queue */
8506 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8507 
8508 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8509 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8510 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8511 	if (r)
8512 		return r;
8513 
8514 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8515 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8516 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8517 	if (r)
8518 		return r;
8519 
8520 	r = cik_cp_resume(rdev);
8521 	if (r)
8522 		return r;
8523 
8524 	r = cik_sdma_resume(rdev);
8525 	if (r)
8526 		return r;
8527 
8528 	cik_uvd_resume(rdev);
8529 	cik_vce_resume(rdev);
8530 
8531 	r = radeon_ib_pool_init(rdev);
8532 	if (r) {
8533 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8534 		return r;
8535 	}
8536 
8537 	r = radeon_vm_manager_init(rdev);
8538 	if (r) {
8539 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8540 		return r;
8541 	}
8542 
8543 	r = radeon_audio_init(rdev);
8544 	if (r)
8545 		return r;
8546 
8547 	return 0;
8548 }
8549 
8550 /**
8551  * cik_resume - resume the asic to a functional state
8552  *
8553  * @rdev: radeon_device pointer
8554  *
8555  * Programs the asic to a functional state (CIK).
8556  * Called at resume.
8557  * Returns 0 for success, error for failure.
8558  */
cik_resume(struct radeon_device * rdev)8559 int cik_resume(struct radeon_device *rdev)
8560 {
8561 	int r;
8562 
8563 	/* post card */
8564 	atom_asic_init(rdev->mode_info.atom_context);
8565 
8566 	/* init golden registers */
8567 	cik_init_golden_registers(rdev);
8568 
8569 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8570 		radeon_pm_resume(rdev);
8571 
8572 	rdev->accel_working = true;
8573 	r = cik_startup(rdev);
8574 	if (r) {
8575 		DRM_ERROR("cik startup failed on resume\n");
8576 		rdev->accel_working = false;
8577 		return r;
8578 	}
8579 
8580 	return r;
8581 
8582 }
8583 
8584 /**
8585  * cik_suspend - suspend the asic
8586  *
8587  * @rdev: radeon_device pointer
8588  *
8589  * Bring the chip into a state suitable for suspend (CIK).
8590  * Called at suspend.
8591  * Returns 0 for success.
8592  */
cik_suspend(struct radeon_device * rdev)8593 int cik_suspend(struct radeon_device *rdev)
8594 {
8595 	radeon_pm_suspend(rdev);
8596 	radeon_audio_fini(rdev);
8597 	radeon_vm_manager_fini(rdev);
8598 	cik_cp_enable(rdev, false);
8599 	cik_sdma_enable(rdev, false);
8600 	if (rdev->has_uvd) {
8601 		uvd_v1_0_fini(rdev);
8602 		radeon_uvd_suspend(rdev);
8603 	}
8604 	if (rdev->has_vce)
8605 		radeon_vce_suspend(rdev);
8606 	cik_fini_pg(rdev);
8607 	cik_fini_cg(rdev);
8608 	cik_irq_suspend(rdev);
8609 	radeon_wb_disable(rdev);
8610 	cik_pcie_gart_disable(rdev);
8611 	return 0;
8612 }
8613 
8614 /* Plan is to move initialization in that function and use
8615  * helper function so that radeon_device_init pretty much
8616  * do nothing more than calling asic specific function. This
8617  * should also allow to remove a bunch of callback function
8618  * like vram_info.
8619  */
8620 /**
8621  * cik_init - asic specific driver and hw init
8622  *
8623  * @rdev: radeon_device pointer
8624  *
8625  * Setup asic specific driver variables and program the hw
8626  * to a functional state (CIK).
8627  * Called at driver startup.
8628  * Returns 0 for success, errors for failure.
8629  */
cik_init(struct radeon_device * rdev)8630 int cik_init(struct radeon_device *rdev)
8631 {
8632 	struct radeon_ring *ring;
8633 	int r;
8634 
8635 	/* Read BIOS */
8636 	if (!radeon_get_bios(rdev)) {
8637 		if (ASIC_IS_AVIVO(rdev))
8638 			return -EINVAL;
8639 	}
8640 	/* Must be an ATOMBIOS */
8641 	if (!rdev->is_atom_bios) {
8642 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8643 		return -EINVAL;
8644 	}
8645 	r = radeon_atombios_init(rdev);
8646 	if (r)
8647 		return r;
8648 
8649 	/* Post card if necessary */
8650 	if (!radeon_card_posted(rdev)) {
8651 		if (!rdev->bios) {
8652 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8653 			return -EINVAL;
8654 		}
8655 		DRM_INFO("GPU not posted. posting now...\n");
8656 		atom_asic_init(rdev->mode_info.atom_context);
8657 	}
8658 	/* init golden registers */
8659 	cik_init_golden_registers(rdev);
8660 	/* Initialize scratch registers */
8661 	cik_scratch_init(rdev);
8662 	/* Initialize surface registers */
8663 	radeon_surface_init(rdev);
8664 	/* Initialize clocks */
8665 	radeon_get_clock_info(rdev->ddev);
8666 
8667 	/* Fence driver */
8668 	r = radeon_fence_driver_init(rdev);
8669 	if (r)
8670 		return r;
8671 
8672 	/* initialize memory controller */
8673 	r = cik_mc_init(rdev);
8674 	if (r)
8675 		return r;
8676 	/* Memory manager */
8677 	r = radeon_bo_init(rdev);
8678 	if (r)
8679 		return r;
8680 
8681 	if (rdev->flags & RADEON_IS_IGP) {
8682 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8683 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8684 			r = cik_init_microcode(rdev);
8685 			if (r) {
8686 				DRM_ERROR("Failed to load firmware!\n");
8687 				return r;
8688 			}
8689 		}
8690 	} else {
8691 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8692 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8693 		    !rdev->mc_fw) {
8694 			r = cik_init_microcode(rdev);
8695 			if (r) {
8696 				DRM_ERROR("Failed to load firmware!\n");
8697 				return r;
8698 			}
8699 		}
8700 	}
8701 
8702 	/* Initialize power management */
8703 	radeon_pm_init(rdev);
8704 
8705 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8706 	ring->ring_obj = NULL;
8707 	r600_ring_init(rdev, ring, 1024 * 1024);
8708 
8709 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8710 	ring->ring_obj = NULL;
8711 	r600_ring_init(rdev, ring, 1024 * 1024);
8712 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8713 	if (r)
8714 		return r;
8715 
8716 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8717 	ring->ring_obj = NULL;
8718 	r600_ring_init(rdev, ring, 1024 * 1024);
8719 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8720 	if (r)
8721 		return r;
8722 
8723 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8724 	ring->ring_obj = NULL;
8725 	r600_ring_init(rdev, ring, 256 * 1024);
8726 
8727 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8728 	ring->ring_obj = NULL;
8729 	r600_ring_init(rdev, ring, 256 * 1024);
8730 
8731 	cik_uvd_init(rdev);
8732 	cik_vce_init(rdev);
8733 
8734 	rdev->ih.ring_obj = NULL;
8735 	r600_ih_ring_init(rdev, 64 * 1024);
8736 
8737 	r = r600_pcie_gart_init(rdev);
8738 	if (r)
8739 		return r;
8740 
8741 	rdev->accel_working = true;
8742 	r = cik_startup(rdev);
8743 	if (r) {
8744 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8745 		cik_cp_fini(rdev);
8746 		cik_sdma_fini(rdev);
8747 		cik_irq_fini(rdev);
8748 		sumo_rlc_fini(rdev);
8749 		cik_mec_fini(rdev);
8750 		radeon_wb_fini(rdev);
8751 		radeon_ib_pool_fini(rdev);
8752 		radeon_vm_manager_fini(rdev);
8753 		radeon_irq_kms_fini(rdev);
8754 		cik_pcie_gart_fini(rdev);
8755 		rdev->accel_working = false;
8756 	}
8757 
8758 	/* Don't start up if the MC ucode is missing.
8759 	 * The default clocks and voltages before the MC ucode
8760 	 * is loaded are not suffient for advanced operations.
8761 	 */
8762 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8763 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8764 		return -EINVAL;
8765 	}
8766 
8767 	return 0;
8768 }
8769 
8770 /**
8771  * cik_fini - asic specific driver and hw fini
8772  *
8773  * @rdev: radeon_device pointer
8774  *
8775  * Tear down the asic specific driver variables and program the hw
8776  * to an idle state (CIK).
8777  * Called at driver unload.
8778  */
cik_fini(struct radeon_device * rdev)8779 void cik_fini(struct radeon_device *rdev)
8780 {
8781 	radeon_pm_fini(rdev);
8782 	cik_cp_fini(rdev);
8783 	cik_sdma_fini(rdev);
8784 	cik_fini_pg(rdev);
8785 	cik_fini_cg(rdev);
8786 	cik_irq_fini(rdev);
8787 	sumo_rlc_fini(rdev);
8788 	cik_mec_fini(rdev);
8789 	radeon_wb_fini(rdev);
8790 	radeon_vm_manager_fini(rdev);
8791 	radeon_ib_pool_fini(rdev);
8792 	radeon_irq_kms_fini(rdev);
8793 	uvd_v1_0_fini(rdev);
8794 	radeon_uvd_fini(rdev);
8795 	radeon_vce_fini(rdev);
8796 	cik_pcie_gart_fini(rdev);
8797 	r600_vram_scratch_fini(rdev);
8798 	radeon_gem_fini(rdev);
8799 	radeon_fence_driver_fini(rdev);
8800 	radeon_bo_fini(rdev);
8801 	radeon_atombios_fini(rdev);
8802 	kfree(rdev->bios);
8803 	rdev->bios = NULL;
8804 }
8805 
dce8_program_fmt(struct drm_encoder * encoder)8806 void dce8_program_fmt(struct drm_encoder *encoder)
8807 {
8808 	struct drm_device *dev = encoder->dev;
8809 	struct radeon_device *rdev = dev->dev_private;
8810 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8811 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8812 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8813 	int bpc = 0;
8814 	u32 tmp = 0;
8815 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8816 
8817 	if (connector) {
8818 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8819 		bpc = radeon_get_monitor_bpc(connector);
8820 		dither = radeon_connector->dither;
8821 	}
8822 
8823 	/* LVDS/eDP FMT is set up by atom */
8824 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8825 		return;
8826 
8827 	/* not needed for analog */
8828 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8829 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8830 		return;
8831 
8832 	if (bpc == 0)
8833 		return;
8834 
8835 	switch (bpc) {
8836 	case 6:
8837 		if (dither == RADEON_FMT_DITHER_ENABLE)
8838 			/* XXX sort out optimal dither settings */
8839 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8840 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8841 		else
8842 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8843 		break;
8844 	case 8:
8845 		if (dither == RADEON_FMT_DITHER_ENABLE)
8846 			/* XXX sort out optimal dither settings */
8847 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8848 				FMT_RGB_RANDOM_ENABLE |
8849 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8850 		else
8851 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8852 		break;
8853 	case 10:
8854 		if (dither == RADEON_FMT_DITHER_ENABLE)
8855 			/* XXX sort out optimal dither settings */
8856 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8857 				FMT_RGB_RANDOM_ENABLE |
8858 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8859 		else
8860 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8861 		break;
8862 	default:
8863 		/* not needed */
8864 		break;
8865 	}
8866 
8867 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8868 }
8869 
8870 /* display watermark setup */
8871 /**
8872  * dce8_line_buffer_adjust - Set up the line buffer
8873  *
8874  * @rdev: radeon_device pointer
8875  * @radeon_crtc: the selected display controller
8876  * @mode: the current display mode on the selected display
8877  * controller
8878  *
8879  * Setup up the line buffer allocation for
8880  * the selected display controller (CIK).
8881  * Returns the line buffer size in pixels.
8882  */
dce8_line_buffer_adjust(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,struct drm_display_mode * mode)8883 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8884 				   struct radeon_crtc *radeon_crtc,
8885 				   struct drm_display_mode *mode)
8886 {
8887 	u32 tmp, buffer_alloc, i;
8888 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8889 	/*
8890 	 * Line Buffer Setup
8891 	 * There are 6 line buffers, one for each display controllers.
8892 	 * There are 3 partitions per LB. Select the number of partitions
8893 	 * to enable based on the display width.  For display widths larger
8894 	 * than 4096, you need use to use 2 display controllers and combine
8895 	 * them using the stereo blender.
8896 	 */
8897 	if (radeon_crtc->base.enabled && mode) {
8898 		if (mode->crtc_hdisplay < 1920) {
8899 			tmp = 1;
8900 			buffer_alloc = 2;
8901 		} else if (mode->crtc_hdisplay < 2560) {
8902 			tmp = 2;
8903 			buffer_alloc = 2;
8904 		} else if (mode->crtc_hdisplay < 4096) {
8905 			tmp = 0;
8906 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8907 		} else {
8908 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8909 			tmp = 0;
8910 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8911 		}
8912 	} else {
8913 		tmp = 1;
8914 		buffer_alloc = 0;
8915 	}
8916 
8917 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8918 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8919 
8920 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8921 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8922 	for (i = 0; i < rdev->usec_timeout; i++) {
8923 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8924 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8925 			break;
8926 		udelay(1);
8927 	}
8928 
8929 	if (radeon_crtc->base.enabled && mode) {
8930 		switch (tmp) {
8931 		case 0:
8932 		default:
8933 			return 4096 * 2;
8934 		case 1:
8935 			return 1920 * 2;
8936 		case 2:
8937 			return 2560 * 2;
8938 		}
8939 	}
8940 
8941 	/* controller not enabled, so no lb used */
8942 	return 0;
8943 }
8944 
8945 /**
8946  * cik_get_number_of_dram_channels - get the number of dram channels
8947  *
8948  * @rdev: radeon_device pointer
8949  *
8950  * Look up the number of video ram channels (CIK).
8951  * Used for display watermark bandwidth calculations
8952  * Returns the number of dram channels
8953  */
cik_get_number_of_dram_channels(struct radeon_device * rdev)8954 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8955 {
8956 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8957 
8958 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8959 	case 0:
8960 	default:
8961 		return 1;
8962 	case 1:
8963 		return 2;
8964 	case 2:
8965 		return 4;
8966 	case 3:
8967 		return 8;
8968 	case 4:
8969 		return 3;
8970 	case 5:
8971 		return 6;
8972 	case 6:
8973 		return 10;
8974 	case 7:
8975 		return 12;
8976 	case 8:
8977 		return 16;
8978 	}
8979 }
8980 
8981 struct dce8_wm_params {
8982 	u32 dram_channels; /* number of dram channels */
8983 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8984 	u32 sclk;          /* engine clock in kHz */
8985 	u32 disp_clk;      /* display clock in kHz */
8986 	u32 src_width;     /* viewport width */
8987 	u32 active_time;   /* active display time in ns */
8988 	u32 blank_time;    /* blank time in ns */
8989 	bool interlaced;    /* mode is interlaced */
8990 	fixed20_12 vsc;    /* vertical scale ratio */
8991 	u32 num_heads;     /* number of active crtcs */
8992 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8993 	u32 lb_size;       /* line buffer allocated to pipe */
8994 	u32 vtaps;         /* vertical scaler taps */
8995 };
8996 
8997 /**
8998  * dce8_dram_bandwidth - get the dram bandwidth
8999  *
9000  * @wm: watermark calculation data
9001  *
9002  * Calculate the raw dram bandwidth (CIK).
9003  * Used for display watermark bandwidth calculations
9004  * Returns the dram bandwidth in MBytes/s
9005  */
dce8_dram_bandwidth(struct dce8_wm_params * wm)9006 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9007 {
9008 	/* Calculate raw DRAM Bandwidth */
9009 	fixed20_12 dram_efficiency; /* 0.7 */
9010 	fixed20_12 yclk, dram_channels, bandwidth;
9011 	fixed20_12 a;
9012 
9013 	a.full = dfixed_const(1000);
9014 	yclk.full = dfixed_const(wm->yclk);
9015 	yclk.full = dfixed_div(yclk, a);
9016 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9017 	a.full = dfixed_const(10);
9018 	dram_efficiency.full = dfixed_const(7);
9019 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9020 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9021 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9022 
9023 	return dfixed_trunc(bandwidth);
9024 }
9025 
9026 /**
9027  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9028  *
9029  * @wm: watermark calculation data
9030  *
9031  * Calculate the dram bandwidth used for display (CIK).
9032  * Used for display watermark bandwidth calculations
9033  * Returns the dram bandwidth for display in MBytes/s
9034  */
dce8_dram_bandwidth_for_display(struct dce8_wm_params * wm)9035 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9036 {
9037 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9038 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9039 	fixed20_12 yclk, dram_channels, bandwidth;
9040 	fixed20_12 a;
9041 
9042 	a.full = dfixed_const(1000);
9043 	yclk.full = dfixed_const(wm->yclk);
9044 	yclk.full = dfixed_div(yclk, a);
9045 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9046 	a.full = dfixed_const(10);
9047 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9048 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9049 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9050 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9051 
9052 	return dfixed_trunc(bandwidth);
9053 }
9054 
9055 /**
9056  * dce8_data_return_bandwidth - get the data return bandwidth
9057  *
9058  * @wm: watermark calculation data
9059  *
9060  * Calculate the data return bandwidth used for display (CIK).
9061  * Used for display watermark bandwidth calculations
9062  * Returns the data return bandwidth in MBytes/s
9063  */
dce8_data_return_bandwidth(struct dce8_wm_params * wm)9064 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9065 {
9066 	/* Calculate the display Data return Bandwidth */
9067 	fixed20_12 return_efficiency; /* 0.8 */
9068 	fixed20_12 sclk, bandwidth;
9069 	fixed20_12 a;
9070 
9071 	a.full = dfixed_const(1000);
9072 	sclk.full = dfixed_const(wm->sclk);
9073 	sclk.full = dfixed_div(sclk, a);
9074 	a.full = dfixed_const(10);
9075 	return_efficiency.full = dfixed_const(8);
9076 	return_efficiency.full = dfixed_div(return_efficiency, a);
9077 	a.full = dfixed_const(32);
9078 	bandwidth.full = dfixed_mul(a, sclk);
9079 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9080 
9081 	return dfixed_trunc(bandwidth);
9082 }
9083 
9084 /**
9085  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9086  *
9087  * @wm: watermark calculation data
9088  *
9089  * Calculate the dmif bandwidth used for display (CIK).
9090  * Used for display watermark bandwidth calculations
9091  * Returns the dmif bandwidth in MBytes/s
9092  */
dce8_dmif_request_bandwidth(struct dce8_wm_params * wm)9093 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9094 {
9095 	/* Calculate the DMIF Request Bandwidth */
9096 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9097 	fixed20_12 disp_clk, bandwidth;
9098 	fixed20_12 a, b;
9099 
9100 	a.full = dfixed_const(1000);
9101 	disp_clk.full = dfixed_const(wm->disp_clk);
9102 	disp_clk.full = dfixed_div(disp_clk, a);
9103 	a.full = dfixed_const(32);
9104 	b.full = dfixed_mul(a, disp_clk);
9105 
9106 	a.full = dfixed_const(10);
9107 	disp_clk_request_efficiency.full = dfixed_const(8);
9108 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9109 
9110 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9111 
9112 	return dfixed_trunc(bandwidth);
9113 }
9114 
9115 /**
9116  * dce8_available_bandwidth - get the min available bandwidth
9117  *
9118  * @wm: watermark calculation data
9119  *
9120  * Calculate the min available bandwidth used for display (CIK).
9121  * Used for display watermark bandwidth calculations
9122  * Returns the min available bandwidth in MBytes/s
9123  */
dce8_available_bandwidth(struct dce8_wm_params * wm)9124 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9125 {
9126 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9127 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9128 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9129 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9130 
9131 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9132 }
9133 
9134 /**
9135  * dce8_average_bandwidth - get the average available bandwidth
9136  *
9137  * @wm: watermark calculation data
9138  *
9139  * Calculate the average available bandwidth used for display (CIK).
9140  * Used for display watermark bandwidth calculations
9141  * Returns the average available bandwidth in MBytes/s
9142  */
dce8_average_bandwidth(struct dce8_wm_params * wm)9143 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9144 {
9145 	/* Calculate the display mode Average Bandwidth
9146 	 * DisplayMode should contain the source and destination dimensions,
9147 	 * timing, etc.
9148 	 */
9149 	fixed20_12 bpp;
9150 	fixed20_12 line_time;
9151 	fixed20_12 src_width;
9152 	fixed20_12 bandwidth;
9153 	fixed20_12 a;
9154 
9155 	a.full = dfixed_const(1000);
9156 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9157 	line_time.full = dfixed_div(line_time, a);
9158 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9159 	src_width.full = dfixed_const(wm->src_width);
9160 	bandwidth.full = dfixed_mul(src_width, bpp);
9161 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9162 	bandwidth.full = dfixed_div(bandwidth, line_time);
9163 
9164 	return dfixed_trunc(bandwidth);
9165 }
9166 
9167 /**
9168  * dce8_latency_watermark - get the latency watermark
9169  *
9170  * @wm: watermark calculation data
9171  *
9172  * Calculate the latency watermark (CIK).
9173  * Used for display watermark bandwidth calculations
9174  * Returns the latency watermark in ns
9175  */
dce8_latency_watermark(struct dce8_wm_params * wm)9176 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9177 {
9178 	/* First calculate the latency in ns */
9179 	u32 mc_latency = 2000; /* 2000 ns. */
9180 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9181 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9182 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9183 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9184 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9185 		(wm->num_heads * cursor_line_pair_return_time);
9186 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9187 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9188 	u32 tmp, dmif_size = 12288;
9189 	fixed20_12 a, b, c;
9190 
9191 	if (wm->num_heads == 0)
9192 		return 0;
9193 
9194 	a.full = dfixed_const(2);
9195 	b.full = dfixed_const(1);
9196 	if ((wm->vsc.full > a.full) ||
9197 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9198 	    (wm->vtaps >= 5) ||
9199 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9200 		max_src_lines_per_dst_line = 4;
9201 	else
9202 		max_src_lines_per_dst_line = 2;
9203 
9204 	a.full = dfixed_const(available_bandwidth);
9205 	b.full = dfixed_const(wm->num_heads);
9206 	a.full = dfixed_div(a, b);
9207 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9208 	tmp = min(dfixed_trunc(a), tmp);
9209 
9210 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9211 
9212 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9213 	b.full = dfixed_const(1000);
9214 	c.full = dfixed_const(lb_fill_bw);
9215 	b.full = dfixed_div(c, b);
9216 	a.full = dfixed_div(a, b);
9217 	line_fill_time = dfixed_trunc(a);
9218 
9219 	if (line_fill_time < wm->active_time)
9220 		return latency;
9221 	else
9222 		return latency + (line_fill_time - wm->active_time);
9223 
9224 }
9225 
9226 /**
9227  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9228  * average and available dram bandwidth
9229  *
9230  * @wm: watermark calculation data
9231  *
9232  * Check if the display average bandwidth fits in the display
9233  * dram bandwidth (CIK).
9234  * Used for display watermark bandwidth calculations
9235  * Returns true if the display fits, false if not.
9236  */
dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params * wm)9237 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9238 {
9239 	if (dce8_average_bandwidth(wm) <=
9240 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9241 		return true;
9242 	else
9243 		return false;
9244 }
9245 
9246 /**
9247  * dce8_average_bandwidth_vs_available_bandwidth - check
9248  * average and available bandwidth
9249  *
9250  * @wm: watermark calculation data
9251  *
9252  * Check if the display average bandwidth fits in the display
9253  * available bandwidth (CIK).
9254  * Used for display watermark bandwidth calculations
9255  * Returns true if the display fits, false if not.
9256  */
dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params * wm)9257 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9258 {
9259 	if (dce8_average_bandwidth(wm) <=
9260 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9261 		return true;
9262 	else
9263 		return false;
9264 }
9265 
9266 /**
9267  * dce8_check_latency_hiding - check latency hiding
9268  *
9269  * @wm: watermark calculation data
9270  *
9271  * Check latency hiding (CIK).
9272  * Used for display watermark bandwidth calculations
9273  * Returns true if the display fits, false if not.
9274  */
dce8_check_latency_hiding(struct dce8_wm_params * wm)9275 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9276 {
9277 	u32 lb_partitions = wm->lb_size / wm->src_width;
9278 	u32 line_time = wm->active_time + wm->blank_time;
9279 	u32 latency_tolerant_lines;
9280 	u32 latency_hiding;
9281 	fixed20_12 a;
9282 
9283 	a.full = dfixed_const(1);
9284 	if (wm->vsc.full > a.full)
9285 		latency_tolerant_lines = 1;
9286 	else {
9287 		if (lb_partitions <= (wm->vtaps + 1))
9288 			latency_tolerant_lines = 1;
9289 		else
9290 			latency_tolerant_lines = 2;
9291 	}
9292 
9293 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9294 
9295 	if (dce8_latency_watermark(wm) <= latency_hiding)
9296 		return true;
9297 	else
9298 		return false;
9299 }
9300 
9301 /**
9302  * dce8_program_watermarks - program display watermarks
9303  *
9304  * @rdev: radeon_device pointer
9305  * @radeon_crtc: the selected display controller
9306  * @lb_size: line buffer size
9307  * @num_heads: number of display controllers in use
9308  *
9309  * Calculate and program the display watermarks for the
9310  * selected display controller (CIK).
9311  */
dce8_program_watermarks(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,u32 lb_size,u32 num_heads)9312 static void dce8_program_watermarks(struct radeon_device *rdev,
9313 				    struct radeon_crtc *radeon_crtc,
9314 				    u32 lb_size, u32 num_heads)
9315 {
9316 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9317 	struct dce8_wm_params wm_low, wm_high;
9318 	u32 active_time;
9319 	u32 line_time = 0;
9320 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9321 	u32 tmp, wm_mask;
9322 
9323 	if (radeon_crtc->base.enabled && num_heads && mode) {
9324 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9325 					    (u32)mode->clock);
9326 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9327 					  (u32)mode->clock);
9328 		line_time = min(line_time, (u32)65535);
9329 
9330 		/* watermark for high clocks */
9331 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9332 		    rdev->pm.dpm_enabled) {
9333 			wm_high.yclk =
9334 				radeon_dpm_get_mclk(rdev, false) * 10;
9335 			wm_high.sclk =
9336 				radeon_dpm_get_sclk(rdev, false) * 10;
9337 		} else {
9338 			wm_high.yclk = rdev->pm.current_mclk * 10;
9339 			wm_high.sclk = rdev->pm.current_sclk * 10;
9340 		}
9341 
9342 		wm_high.disp_clk = mode->clock;
9343 		wm_high.src_width = mode->crtc_hdisplay;
9344 		wm_high.active_time = active_time;
9345 		wm_high.blank_time = line_time - wm_high.active_time;
9346 		wm_high.interlaced = false;
9347 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9348 			wm_high.interlaced = true;
9349 		wm_high.vsc = radeon_crtc->vsc;
9350 		wm_high.vtaps = 1;
9351 		if (radeon_crtc->rmx_type != RMX_OFF)
9352 			wm_high.vtaps = 2;
9353 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9354 		wm_high.lb_size = lb_size;
9355 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9356 		wm_high.num_heads = num_heads;
9357 
9358 		/* set for high clocks */
9359 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9360 
9361 		/* possibly force display priority to high */
9362 		/* should really do this at mode validation time... */
9363 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9364 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9365 		    !dce8_check_latency_hiding(&wm_high) ||
9366 		    (rdev->disp_priority == 2)) {
9367 			DRM_DEBUG_KMS("force priority to high\n");
9368 		}
9369 
9370 		/* watermark for low clocks */
9371 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9372 		    rdev->pm.dpm_enabled) {
9373 			wm_low.yclk =
9374 				radeon_dpm_get_mclk(rdev, true) * 10;
9375 			wm_low.sclk =
9376 				radeon_dpm_get_sclk(rdev, true) * 10;
9377 		} else {
9378 			wm_low.yclk = rdev->pm.current_mclk * 10;
9379 			wm_low.sclk = rdev->pm.current_sclk * 10;
9380 		}
9381 
9382 		wm_low.disp_clk = mode->clock;
9383 		wm_low.src_width = mode->crtc_hdisplay;
9384 		wm_low.active_time = active_time;
9385 		wm_low.blank_time = line_time - wm_low.active_time;
9386 		wm_low.interlaced = false;
9387 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9388 			wm_low.interlaced = true;
9389 		wm_low.vsc = radeon_crtc->vsc;
9390 		wm_low.vtaps = 1;
9391 		if (radeon_crtc->rmx_type != RMX_OFF)
9392 			wm_low.vtaps = 2;
9393 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9394 		wm_low.lb_size = lb_size;
9395 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9396 		wm_low.num_heads = num_heads;
9397 
9398 		/* set for low clocks */
9399 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9400 
9401 		/* possibly force display priority to high */
9402 		/* should really do this at mode validation time... */
9403 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9404 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9405 		    !dce8_check_latency_hiding(&wm_low) ||
9406 		    (rdev->disp_priority == 2)) {
9407 			DRM_DEBUG_KMS("force priority to high\n");
9408 		}
9409 
9410 		/* Save number of lines the linebuffer leads before the scanout */
9411 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9412 	}
9413 
9414 	/* select wm A */
9415 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9416 	tmp = wm_mask;
9417 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9418 	tmp |= LATENCY_WATERMARK_MASK(1);
9419 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9420 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9421 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9422 		LATENCY_HIGH_WATERMARK(line_time)));
9423 	/* select wm B */
9424 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9425 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9426 	tmp |= LATENCY_WATERMARK_MASK(2);
9427 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9428 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9429 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9430 		LATENCY_HIGH_WATERMARK(line_time)));
9431 	/* restore original selection */
9432 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9433 
9434 	/* save values for DPM */
9435 	radeon_crtc->line_time = line_time;
9436 	radeon_crtc->wm_high = latency_watermark_a;
9437 	radeon_crtc->wm_low = latency_watermark_b;
9438 }
9439 
9440 /**
9441  * dce8_bandwidth_update - program display watermarks
9442  *
9443  * @rdev: radeon_device pointer
9444  *
9445  * Calculate and program the display watermarks and line
9446  * buffer allocation (CIK).
9447  */
dce8_bandwidth_update(struct radeon_device * rdev)9448 void dce8_bandwidth_update(struct radeon_device *rdev)
9449 {
9450 	struct drm_display_mode *mode = NULL;
9451 	u32 num_heads = 0, lb_size;
9452 	int i;
9453 
9454 	if (!rdev->mode_info.mode_config_initialized)
9455 		return;
9456 
9457 	radeon_update_display_priority(rdev);
9458 
9459 	for (i = 0; i < rdev->num_crtc; i++) {
9460 		if (rdev->mode_info.crtcs[i]->base.enabled)
9461 			num_heads++;
9462 	}
9463 	for (i = 0; i < rdev->num_crtc; i++) {
9464 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9465 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9466 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9467 	}
9468 }
9469 
9470 /**
9471  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9472  *
9473  * @rdev: radeon_device pointer
9474  *
9475  * Fetches a GPU clock counter snapshot (SI).
9476  * Returns the 64 bit clock counter snapshot.
9477  */
cik_get_gpu_clock_counter(struct radeon_device * rdev)9478 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9479 {
9480 	uint64_t clock;
9481 
9482 	mutex_lock(&rdev->gpu_clock_mutex);
9483 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9484 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9485 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9486 	mutex_unlock(&rdev->gpu_clock_mutex);
9487 	return clock;
9488 }
9489 
cik_set_uvd_clock(struct radeon_device * rdev,u32 clock,u32 cntl_reg,u32 status_reg)9490 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9491 			     u32 cntl_reg, u32 status_reg)
9492 {
9493 	int r, i;
9494 	struct atom_clock_dividers dividers;
9495 	uint32_t tmp;
9496 
9497 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9498 					   clock, false, &dividers);
9499 	if (r)
9500 		return r;
9501 
9502 	tmp = RREG32_SMC(cntl_reg);
9503 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9504 	tmp |= dividers.post_divider;
9505 	WREG32_SMC(cntl_reg, tmp);
9506 
9507 	for (i = 0; i < 100; i++) {
9508 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9509 			break;
9510 		mdelay(10);
9511 	}
9512 	if (i == 100)
9513 		return -ETIMEDOUT;
9514 
9515 	return 0;
9516 }
9517 
cik_set_uvd_clocks(struct radeon_device * rdev,u32 vclk,u32 dclk)9518 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9519 {
9520 	int r = 0;
9521 
9522 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9523 	if (r)
9524 		return r;
9525 
9526 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9527 	return r;
9528 }
9529 
cik_set_vce_clocks(struct radeon_device * rdev,u32 evclk,u32 ecclk)9530 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9531 {
9532 	int r, i;
9533 	struct atom_clock_dividers dividers;
9534 	u32 tmp;
9535 
9536 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9537 					   ecclk, false, &dividers);
9538 	if (r)
9539 		return r;
9540 
9541 	for (i = 0; i < 100; i++) {
9542 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9543 			break;
9544 		mdelay(10);
9545 	}
9546 	if (i == 100)
9547 		return -ETIMEDOUT;
9548 
9549 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9550 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9551 	tmp |= dividers.post_divider;
9552 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9553 
9554 	for (i = 0; i < 100; i++) {
9555 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9556 			break;
9557 		mdelay(10);
9558 	}
9559 	if (i == 100)
9560 		return -ETIMEDOUT;
9561 
9562 	return 0;
9563 }
9564 
cik_pcie_gen3_enable(struct radeon_device * rdev)9565 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9566 {
9567 	struct pci_dev *root = rdev->pdev->bus->self;
9568 	enum pci_bus_speed speed_cap;
9569 	u32 speed_cntl, current_data_rate;
9570 	int i;
9571 	u16 tmp16;
9572 
9573 	if (pci_is_root_bus(rdev->pdev->bus))
9574 		return;
9575 
9576 	if (radeon_pcie_gen2 == 0)
9577 		return;
9578 
9579 	if (rdev->flags & RADEON_IS_IGP)
9580 		return;
9581 
9582 	if (!(rdev->flags & RADEON_IS_PCIE))
9583 		return;
9584 
9585 	speed_cap = pcie_get_speed_cap(root);
9586 	if (speed_cap == PCI_SPEED_UNKNOWN)
9587 		return;
9588 
9589 	if ((speed_cap != PCIE_SPEED_8_0GT) &&
9590 	    (speed_cap != PCIE_SPEED_5_0GT))
9591 		return;
9592 
9593 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9594 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9595 		LC_CURRENT_DATA_RATE_SHIFT;
9596 	if (speed_cap == PCIE_SPEED_8_0GT) {
9597 		if (current_data_rate == 2) {
9598 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9599 			return;
9600 		}
9601 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9602 	} else if (speed_cap == PCIE_SPEED_5_0GT) {
9603 		if (current_data_rate == 1) {
9604 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9605 			return;
9606 		}
9607 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9608 	}
9609 
9610 	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
9611 		return;
9612 
9613 	if (speed_cap == PCIE_SPEED_8_0GT) {
9614 		/* re-try equalization if gen3 is not already enabled */
9615 		if (current_data_rate != 2) {
9616 			u16 bridge_cfg, gpu_cfg;
9617 			u16 bridge_cfg2, gpu_cfg2;
9618 			u32 max_lw, current_lw, tmp;
9619 
9620 			pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9621 						  &bridge_cfg);
9622 			pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
9623 						  &gpu_cfg);
9624 
9625 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9626 			pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
9627 
9628 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9629 			pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
9630 						   tmp16);
9631 
9632 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9633 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9634 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9635 
9636 			if (current_lw < max_lw) {
9637 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9638 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9639 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9640 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9641 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9642 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9643 				}
9644 			}
9645 
9646 			for (i = 0; i < 10; i++) {
9647 				/* check status */
9648 				pcie_capability_read_word(rdev->pdev,
9649 							  PCI_EXP_DEVSTA,
9650 							  &tmp16);
9651 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9652 					break;
9653 
9654 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9655 							  &bridge_cfg);
9656 				pcie_capability_read_word(rdev->pdev,
9657 							  PCI_EXP_LNKCTL,
9658 							  &gpu_cfg);
9659 
9660 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9661 							  &bridge_cfg2);
9662 				pcie_capability_read_word(rdev->pdev,
9663 							  PCI_EXP_LNKCTL2,
9664 							  &gpu_cfg2);
9665 
9666 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9667 				tmp |= LC_SET_QUIESCE;
9668 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9669 
9670 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9671 				tmp |= LC_REDO_EQ;
9672 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9673 
9674 				msleep(100);
9675 
9676 				/* linkctl */
9677 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9678 							  &tmp16);
9679 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9680 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9681 				pcie_capability_write_word(root, PCI_EXP_LNKCTL,
9682 							   tmp16);
9683 
9684 				pcie_capability_read_word(rdev->pdev,
9685 							  PCI_EXP_LNKCTL,
9686 							  &tmp16);
9687 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9688 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9689 				pcie_capability_write_word(rdev->pdev,
9690 							   PCI_EXP_LNKCTL,
9691 							   tmp16);
9692 
9693 				/* linkctl2 */
9694 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9695 							  &tmp16);
9696 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9697 					   PCI_EXP_LNKCTL2_TX_MARGIN);
9698 				tmp16 |= (bridge_cfg2 &
9699 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
9700 					   PCI_EXP_LNKCTL2_TX_MARGIN));
9701 				pcie_capability_write_word(root,
9702 							   PCI_EXP_LNKCTL2,
9703 							   tmp16);
9704 
9705 				pcie_capability_read_word(rdev->pdev,
9706 							  PCI_EXP_LNKCTL2,
9707 							  &tmp16);
9708 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9709 					   PCI_EXP_LNKCTL2_TX_MARGIN);
9710 				tmp16 |= (gpu_cfg2 &
9711 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
9712 					   PCI_EXP_LNKCTL2_TX_MARGIN));
9713 				pcie_capability_write_word(rdev->pdev,
9714 							   PCI_EXP_LNKCTL2,
9715 							   tmp16);
9716 
9717 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9718 				tmp &= ~LC_SET_QUIESCE;
9719 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9720 			}
9721 		}
9722 	}
9723 
9724 	/* set the link speed */
9725 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9726 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9727 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9728 
9729 	pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
9730 	tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
9731 	if (speed_cap == PCIE_SPEED_8_0GT)
9732 		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9733 	else if (speed_cap == PCIE_SPEED_5_0GT)
9734 		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9735 	else
9736 		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9737 	pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
9738 
9739 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9740 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9741 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9742 
9743 	for (i = 0; i < rdev->usec_timeout; i++) {
9744 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9745 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9746 			break;
9747 		udelay(1);
9748 	}
9749 }
9750 
cik_program_aspm(struct radeon_device * rdev)9751 static void cik_program_aspm(struct radeon_device *rdev)
9752 {
9753 	u32 data, orig;
9754 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9755 	bool disable_clkreq = false;
9756 
9757 	if (radeon_aspm == 0)
9758 		return;
9759 
9760 	/* XXX double check IGPs */
9761 	if (rdev->flags & RADEON_IS_IGP)
9762 		return;
9763 
9764 	if (!(rdev->flags & RADEON_IS_PCIE))
9765 		return;
9766 
9767 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9768 	data &= ~LC_XMIT_N_FTS_MASK;
9769 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9770 	if (orig != data)
9771 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9772 
9773 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9774 	data |= LC_GO_TO_RECOVERY;
9775 	if (orig != data)
9776 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9777 
9778 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9779 	data |= P_IGNORE_EDB_ERR;
9780 	if (orig != data)
9781 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9782 
9783 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9784 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9785 	data |= LC_PMI_TO_L1_DIS;
9786 	if (!disable_l0s)
9787 		data |= LC_L0S_INACTIVITY(7);
9788 
9789 	if (!disable_l1) {
9790 		data |= LC_L1_INACTIVITY(7);
9791 		data &= ~LC_PMI_TO_L1_DIS;
9792 		if (orig != data)
9793 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9794 
9795 		if (!disable_plloff_in_l1) {
9796 			bool clk_req_support;
9797 
9798 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9799 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9800 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9801 			if (orig != data)
9802 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9803 
9804 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9805 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9806 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9807 			if (orig != data)
9808 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9809 
9810 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9811 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9812 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9813 			if (orig != data)
9814 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9815 
9816 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9817 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9818 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9819 			if (orig != data)
9820 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9821 
9822 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9823 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9824 			data |= LC_DYN_LANES_PWR_STATE(3);
9825 			if (orig != data)
9826 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9827 
9828 			if (!disable_clkreq &&
9829 			    !pci_is_root_bus(rdev->pdev->bus)) {
9830 #ifndef __NetBSD__		/* XXX radeon pcie */
9831 				struct pci_dev *root = rdev->pdev->bus->self;
9832 				u32 lnkcap;
9833 #endif
9834 
9835 				clk_req_support = false;
9836 #ifndef __NetBSD__		/* XXX radeon pcie */
9837 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9838 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9839 					clk_req_support = true;
9840 #endif
9841 			} else {
9842 				clk_req_support = false;
9843 			}
9844 
9845 			if (clk_req_support) {
9846 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9847 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9848 				if (orig != data)
9849 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9850 
9851 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9852 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9853 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9854 				if (orig != data)
9855 					WREG32_SMC(THM_CLK_CNTL, data);
9856 
9857 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9858 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9859 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9860 				if (orig != data)
9861 					WREG32_SMC(MISC_CLK_CTRL, data);
9862 
9863 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9864 				data &= ~BCLK_AS_XCLK;
9865 				if (orig != data)
9866 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9867 
9868 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9869 				data &= ~FORCE_BIF_REFCLK_EN;
9870 				if (orig != data)
9871 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9872 
9873 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9874 				data &= ~MPLL_CLKOUT_SEL_MASK;
9875 				data |= MPLL_CLKOUT_SEL(4);
9876 				if (orig != data)
9877 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9878 			}
9879 		}
9880 	} else {
9881 		if (orig != data)
9882 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9883 	}
9884 
9885 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9886 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9887 	if (orig != data)
9888 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9889 
9890 	if (!disable_l0s) {
9891 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9892 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9893 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9894 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9895 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9896 				data &= ~LC_L0S_INACTIVITY_MASK;
9897 				if (orig != data)
9898 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9899 			}
9900 		}
9901 	}
9902 }
9903