1 /* $NetBSD: radeon_cik.c,v 1.7 2022/10/17 03:05:32 mrg Exp $ */
2
3 /*
4 * Copyright 2012 Advanced Micro Devices, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Alex Deucher
25 */
26
27 #include <sys/cdefs.h>
28 __KERNEL_RCSID(0, "$NetBSD: radeon_cik.c,v 1.7 2022/10/17 03:05:32 mrg Exp $");
29
30 #include <linux/firmware.h>
31 #include <linux/module.h>
32 #include <linux/pci.h>
33 #include <linux/slab.h>
34
35 #include <drm/drm_vblank.h>
36
37 #include "atom.h"
38 #include "cik_blit_shaders.h"
39 #include "cikd.h"
40 #include "clearstate_ci.h"
41 #include "radeon.h"
42 #include "radeon_asic.h"
43 #include "radeon_audio.h"
44 #include "radeon_ucode.h"
45
46 #define SH_MEM_CONFIG_GFX_DEFAULT \
47 ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
48
49 #include <linux/nbsd-namespace.h>
50
51 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
57 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
58 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
59 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
60
61 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
62 MODULE_FIRMWARE("radeon/bonaire_me.bin");
63 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
64 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
65 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
66 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
67 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
68 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
69 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
70
71 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
72 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
73 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
74 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
75 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
76 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
77 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
78 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
79 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
80
81 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
82 MODULE_FIRMWARE("radeon/hawaii_me.bin");
83 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
84 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
85 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
86 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
87 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
88 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
89 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
90
91 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
93 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
94 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
95 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
97
98 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
99 MODULE_FIRMWARE("radeon/kaveri_me.bin");
100 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
101 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
102 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
103 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
104 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
105
106 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
107 MODULE_FIRMWARE("radeon/KABINI_me.bin");
108 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
109 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
110 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
111 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
112
113 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
114 MODULE_FIRMWARE("radeon/kabini_me.bin");
115 MODULE_FIRMWARE("radeon/kabini_ce.bin");
116 MODULE_FIRMWARE("radeon/kabini_mec.bin");
117 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
118 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
119
120 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
121 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
122 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
123 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
124 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
125 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
126
127 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
128 MODULE_FIRMWARE("radeon/mullins_me.bin");
129 MODULE_FIRMWARE("radeon/mullins_ce.bin");
130 MODULE_FIRMWARE("radeon/mullins_mec.bin");
131 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
132 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
133
134 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
135 extern void r600_ih_ring_fini(struct radeon_device *rdev);
136 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
137 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
138 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
139 extern void sumo_rlc_fini(struct radeon_device *rdev);
140 extern int sumo_rlc_init(struct radeon_device *rdev);
141 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
142 extern void si_rlc_reset(struct radeon_device *rdev);
143 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
144 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
145 extern int cik_sdma_resume(struct radeon_device *rdev);
146 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
147 extern void cik_sdma_fini(struct radeon_device *rdev);
148 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
149 static void cik_rlc_stop(struct radeon_device *rdev);
150 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
151 static void cik_program_aspm(struct radeon_device *rdev);
152 static void cik_init_pg(struct radeon_device *rdev);
153 static void cik_init_cg(struct radeon_device *rdev);
154 static void cik_fini_pg(struct radeon_device *rdev);
155 static void cik_fini_cg(struct radeon_device *rdev);
156 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
157 bool enable);
158
159 /**
160 * cik_get_allowed_info_register - fetch the register for the info ioctl
161 *
162 * @rdev: radeon_device pointer
163 * @reg: register offset in bytes
164 * @val: register value
165 *
166 * Returns 0 for success or -EINVAL for an invalid register
167 *
168 */
cik_get_allowed_info_register(struct radeon_device * rdev,u32 reg,u32 * val)169 int cik_get_allowed_info_register(struct radeon_device *rdev,
170 u32 reg, u32 *val)
171 {
172 switch (reg) {
173 case GRBM_STATUS:
174 case GRBM_STATUS2:
175 case GRBM_STATUS_SE0:
176 case GRBM_STATUS_SE1:
177 case GRBM_STATUS_SE2:
178 case GRBM_STATUS_SE3:
179 case SRBM_STATUS:
180 case SRBM_STATUS2:
181 case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
182 case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
183 case UVD_STATUS:
184 /* TODO VCE */
185 *val = RREG32(reg);
186 return 0;
187 default:
188 return -EINVAL;
189 }
190 }
191
192 /*
193 * Indirect registers accessor
194 */
cik_didt_rreg(struct radeon_device * rdev,u32 reg)195 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
196 {
197 unsigned long flags;
198 u32 r;
199
200 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
201 WREG32(CIK_DIDT_IND_INDEX, (reg));
202 r = RREG32(CIK_DIDT_IND_DATA);
203 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
204 return r;
205 }
206
cik_didt_wreg(struct radeon_device * rdev,u32 reg,u32 v)207 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
208 {
209 unsigned long flags;
210
211 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
212 WREG32(CIK_DIDT_IND_INDEX, (reg));
213 WREG32(CIK_DIDT_IND_DATA, (v));
214 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
215 }
216
217 /* get temperature in millidegrees */
ci_get_temp(struct radeon_device * rdev)218 int ci_get_temp(struct radeon_device *rdev)
219 {
220 u32 temp;
221 int actual_temp = 0;
222
223 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
224 CTF_TEMP_SHIFT;
225
226 if (temp & 0x200)
227 actual_temp = 255;
228 else
229 actual_temp = temp & 0x1ff;
230
231 return actual_temp * 1000;
232 }
233
234 /* get temperature in millidegrees */
kv_get_temp(struct radeon_device * rdev)235 int kv_get_temp(struct radeon_device *rdev)
236 {
237 u32 temp;
238 int actual_temp = 0;
239
240 temp = RREG32_SMC(0xC0300E0C);
241
242 if (temp)
243 actual_temp = (temp / 8) - 49;
244 else
245 actual_temp = 0;
246
247 return actual_temp * 1000;
248 }
249
250 /*
251 * Indirect registers accessor
252 */
cik_pciep_rreg(struct radeon_device * rdev,u32 reg)253 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
254 {
255 unsigned long flags;
256 u32 r;
257
258 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
259 WREG32(PCIE_INDEX, reg);
260 (void)RREG32(PCIE_INDEX);
261 r = RREG32(PCIE_DATA);
262 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
263 return r;
264 }
265
cik_pciep_wreg(struct radeon_device * rdev,u32 reg,u32 v)266 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
267 {
268 unsigned long flags;
269
270 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
271 WREG32(PCIE_INDEX, reg);
272 (void)RREG32(PCIE_INDEX);
273 WREG32(PCIE_DATA, v);
274 (void)RREG32(PCIE_DATA);
275 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
276 }
277
278 static const u32 spectre_rlc_save_restore_register_list[] =
279 {
280 (0x0e00 << 16) | (0xc12c >> 2),
281 0x00000000,
282 (0x0e00 << 16) | (0xc140 >> 2),
283 0x00000000,
284 (0x0e00 << 16) | (0xc150 >> 2),
285 0x00000000,
286 (0x0e00 << 16) | (0xc15c >> 2),
287 0x00000000,
288 (0x0e00 << 16) | (0xc168 >> 2),
289 0x00000000,
290 (0x0e00 << 16) | (0xc170 >> 2),
291 0x00000000,
292 (0x0e00 << 16) | (0xc178 >> 2),
293 0x00000000,
294 (0x0e00 << 16) | (0xc204 >> 2),
295 0x00000000,
296 (0x0e00 << 16) | (0xc2b4 >> 2),
297 0x00000000,
298 (0x0e00 << 16) | (0xc2b8 >> 2),
299 0x00000000,
300 (0x0e00 << 16) | (0xc2bc >> 2),
301 0x00000000,
302 (0x0e00 << 16) | (0xc2c0 >> 2),
303 0x00000000,
304 (0x0e00 << 16) | (0x8228 >> 2),
305 0x00000000,
306 (0x0e00 << 16) | (0x829c >> 2),
307 0x00000000,
308 (0x0e00 << 16) | (0x869c >> 2),
309 0x00000000,
310 (0x0600 << 16) | (0x98f4 >> 2),
311 0x00000000,
312 (0x0e00 << 16) | (0x98f8 >> 2),
313 0x00000000,
314 (0x0e00 << 16) | (0x9900 >> 2),
315 0x00000000,
316 (0x0e00 << 16) | (0xc260 >> 2),
317 0x00000000,
318 (0x0e00 << 16) | (0x90e8 >> 2),
319 0x00000000,
320 (0x0e00 << 16) | (0x3c000 >> 2),
321 0x00000000,
322 (0x0e00 << 16) | (0x3c00c >> 2),
323 0x00000000,
324 (0x0e00 << 16) | (0x8c1c >> 2),
325 0x00000000,
326 (0x0e00 << 16) | (0x9700 >> 2),
327 0x00000000,
328 (0x0e00 << 16) | (0xcd20 >> 2),
329 0x00000000,
330 (0x4e00 << 16) | (0xcd20 >> 2),
331 0x00000000,
332 (0x5e00 << 16) | (0xcd20 >> 2),
333 0x00000000,
334 (0x6e00 << 16) | (0xcd20 >> 2),
335 0x00000000,
336 (0x7e00 << 16) | (0xcd20 >> 2),
337 0x00000000,
338 (0x8e00 << 16) | (0xcd20 >> 2),
339 0x00000000,
340 (0x9e00 << 16) | (0xcd20 >> 2),
341 0x00000000,
342 (0xae00 << 16) | (0xcd20 >> 2),
343 0x00000000,
344 (0xbe00 << 16) | (0xcd20 >> 2),
345 0x00000000,
346 (0x0e00 << 16) | (0x89bc >> 2),
347 0x00000000,
348 (0x0e00 << 16) | (0x8900 >> 2),
349 0x00000000,
350 0x3,
351 (0x0e00 << 16) | (0xc130 >> 2),
352 0x00000000,
353 (0x0e00 << 16) | (0xc134 >> 2),
354 0x00000000,
355 (0x0e00 << 16) | (0xc1fc >> 2),
356 0x00000000,
357 (0x0e00 << 16) | (0xc208 >> 2),
358 0x00000000,
359 (0x0e00 << 16) | (0xc264 >> 2),
360 0x00000000,
361 (0x0e00 << 16) | (0xc268 >> 2),
362 0x00000000,
363 (0x0e00 << 16) | (0xc26c >> 2),
364 0x00000000,
365 (0x0e00 << 16) | (0xc270 >> 2),
366 0x00000000,
367 (0x0e00 << 16) | (0xc274 >> 2),
368 0x00000000,
369 (0x0e00 << 16) | (0xc278 >> 2),
370 0x00000000,
371 (0x0e00 << 16) | (0xc27c >> 2),
372 0x00000000,
373 (0x0e00 << 16) | (0xc280 >> 2),
374 0x00000000,
375 (0x0e00 << 16) | (0xc284 >> 2),
376 0x00000000,
377 (0x0e00 << 16) | (0xc288 >> 2),
378 0x00000000,
379 (0x0e00 << 16) | (0xc28c >> 2),
380 0x00000000,
381 (0x0e00 << 16) | (0xc290 >> 2),
382 0x00000000,
383 (0x0e00 << 16) | (0xc294 >> 2),
384 0x00000000,
385 (0x0e00 << 16) | (0xc298 >> 2),
386 0x00000000,
387 (0x0e00 << 16) | (0xc29c >> 2),
388 0x00000000,
389 (0x0e00 << 16) | (0xc2a0 >> 2),
390 0x00000000,
391 (0x0e00 << 16) | (0xc2a4 >> 2),
392 0x00000000,
393 (0x0e00 << 16) | (0xc2a8 >> 2),
394 0x00000000,
395 (0x0e00 << 16) | (0xc2ac >> 2),
396 0x00000000,
397 (0x0e00 << 16) | (0xc2b0 >> 2),
398 0x00000000,
399 (0x0e00 << 16) | (0x301d0 >> 2),
400 0x00000000,
401 (0x0e00 << 16) | (0x30238 >> 2),
402 0x00000000,
403 (0x0e00 << 16) | (0x30250 >> 2),
404 0x00000000,
405 (0x0e00 << 16) | (0x30254 >> 2),
406 0x00000000,
407 (0x0e00 << 16) | (0x30258 >> 2),
408 0x00000000,
409 (0x0e00 << 16) | (0x3025c >> 2),
410 0x00000000,
411 (0x4e00 << 16) | (0xc900 >> 2),
412 0x00000000,
413 (0x5e00 << 16) | (0xc900 >> 2),
414 0x00000000,
415 (0x6e00 << 16) | (0xc900 >> 2),
416 0x00000000,
417 (0x7e00 << 16) | (0xc900 >> 2),
418 0x00000000,
419 (0x8e00 << 16) | (0xc900 >> 2),
420 0x00000000,
421 (0x9e00 << 16) | (0xc900 >> 2),
422 0x00000000,
423 (0xae00 << 16) | (0xc900 >> 2),
424 0x00000000,
425 (0xbe00 << 16) | (0xc900 >> 2),
426 0x00000000,
427 (0x4e00 << 16) | (0xc904 >> 2),
428 0x00000000,
429 (0x5e00 << 16) | (0xc904 >> 2),
430 0x00000000,
431 (0x6e00 << 16) | (0xc904 >> 2),
432 0x00000000,
433 (0x7e00 << 16) | (0xc904 >> 2),
434 0x00000000,
435 (0x8e00 << 16) | (0xc904 >> 2),
436 0x00000000,
437 (0x9e00 << 16) | (0xc904 >> 2),
438 0x00000000,
439 (0xae00 << 16) | (0xc904 >> 2),
440 0x00000000,
441 (0xbe00 << 16) | (0xc904 >> 2),
442 0x00000000,
443 (0x4e00 << 16) | (0xc908 >> 2),
444 0x00000000,
445 (0x5e00 << 16) | (0xc908 >> 2),
446 0x00000000,
447 (0x6e00 << 16) | (0xc908 >> 2),
448 0x00000000,
449 (0x7e00 << 16) | (0xc908 >> 2),
450 0x00000000,
451 (0x8e00 << 16) | (0xc908 >> 2),
452 0x00000000,
453 (0x9e00 << 16) | (0xc908 >> 2),
454 0x00000000,
455 (0xae00 << 16) | (0xc908 >> 2),
456 0x00000000,
457 (0xbe00 << 16) | (0xc908 >> 2),
458 0x00000000,
459 (0x4e00 << 16) | (0xc90c >> 2),
460 0x00000000,
461 (0x5e00 << 16) | (0xc90c >> 2),
462 0x00000000,
463 (0x6e00 << 16) | (0xc90c >> 2),
464 0x00000000,
465 (0x7e00 << 16) | (0xc90c >> 2),
466 0x00000000,
467 (0x8e00 << 16) | (0xc90c >> 2),
468 0x00000000,
469 (0x9e00 << 16) | (0xc90c >> 2),
470 0x00000000,
471 (0xae00 << 16) | (0xc90c >> 2),
472 0x00000000,
473 (0xbe00 << 16) | (0xc90c >> 2),
474 0x00000000,
475 (0x4e00 << 16) | (0xc910 >> 2),
476 0x00000000,
477 (0x5e00 << 16) | (0xc910 >> 2),
478 0x00000000,
479 (0x6e00 << 16) | (0xc910 >> 2),
480 0x00000000,
481 (0x7e00 << 16) | (0xc910 >> 2),
482 0x00000000,
483 (0x8e00 << 16) | (0xc910 >> 2),
484 0x00000000,
485 (0x9e00 << 16) | (0xc910 >> 2),
486 0x00000000,
487 (0xae00 << 16) | (0xc910 >> 2),
488 0x00000000,
489 (0xbe00 << 16) | (0xc910 >> 2),
490 0x00000000,
491 (0x0e00 << 16) | (0xc99c >> 2),
492 0x00000000,
493 (0x0e00 << 16) | (0x9834 >> 2),
494 0x00000000,
495 (0x0000 << 16) | (0x30f00 >> 2),
496 0x00000000,
497 (0x0001 << 16) | (0x30f00 >> 2),
498 0x00000000,
499 (0x0000 << 16) | (0x30f04 >> 2),
500 0x00000000,
501 (0x0001 << 16) | (0x30f04 >> 2),
502 0x00000000,
503 (0x0000 << 16) | (0x30f08 >> 2),
504 0x00000000,
505 (0x0001 << 16) | (0x30f08 >> 2),
506 0x00000000,
507 (0x0000 << 16) | (0x30f0c >> 2),
508 0x00000000,
509 (0x0001 << 16) | (0x30f0c >> 2),
510 0x00000000,
511 (0x0600 << 16) | (0x9b7c >> 2),
512 0x00000000,
513 (0x0e00 << 16) | (0x8a14 >> 2),
514 0x00000000,
515 (0x0e00 << 16) | (0x8a18 >> 2),
516 0x00000000,
517 (0x0600 << 16) | (0x30a00 >> 2),
518 0x00000000,
519 (0x0e00 << 16) | (0x8bf0 >> 2),
520 0x00000000,
521 (0x0e00 << 16) | (0x8bcc >> 2),
522 0x00000000,
523 (0x0e00 << 16) | (0x8b24 >> 2),
524 0x00000000,
525 (0x0e00 << 16) | (0x30a04 >> 2),
526 0x00000000,
527 (0x0600 << 16) | (0x30a10 >> 2),
528 0x00000000,
529 (0x0600 << 16) | (0x30a14 >> 2),
530 0x00000000,
531 (0x0600 << 16) | (0x30a18 >> 2),
532 0x00000000,
533 (0x0600 << 16) | (0x30a2c >> 2),
534 0x00000000,
535 (0x0e00 << 16) | (0xc700 >> 2),
536 0x00000000,
537 (0x0e00 << 16) | (0xc704 >> 2),
538 0x00000000,
539 (0x0e00 << 16) | (0xc708 >> 2),
540 0x00000000,
541 (0x0e00 << 16) | (0xc768 >> 2),
542 0x00000000,
543 (0x0400 << 16) | (0xc770 >> 2),
544 0x00000000,
545 (0x0400 << 16) | (0xc774 >> 2),
546 0x00000000,
547 (0x0400 << 16) | (0xc778 >> 2),
548 0x00000000,
549 (0x0400 << 16) | (0xc77c >> 2),
550 0x00000000,
551 (0x0400 << 16) | (0xc780 >> 2),
552 0x00000000,
553 (0x0400 << 16) | (0xc784 >> 2),
554 0x00000000,
555 (0x0400 << 16) | (0xc788 >> 2),
556 0x00000000,
557 (0x0400 << 16) | (0xc78c >> 2),
558 0x00000000,
559 (0x0400 << 16) | (0xc798 >> 2),
560 0x00000000,
561 (0x0400 << 16) | (0xc79c >> 2),
562 0x00000000,
563 (0x0400 << 16) | (0xc7a0 >> 2),
564 0x00000000,
565 (0x0400 << 16) | (0xc7a4 >> 2),
566 0x00000000,
567 (0x0400 << 16) | (0xc7a8 >> 2),
568 0x00000000,
569 (0x0400 << 16) | (0xc7ac >> 2),
570 0x00000000,
571 (0x0400 << 16) | (0xc7b0 >> 2),
572 0x00000000,
573 (0x0400 << 16) | (0xc7b4 >> 2),
574 0x00000000,
575 (0x0e00 << 16) | (0x9100 >> 2),
576 0x00000000,
577 (0x0e00 << 16) | (0x3c010 >> 2),
578 0x00000000,
579 (0x0e00 << 16) | (0x92a8 >> 2),
580 0x00000000,
581 (0x0e00 << 16) | (0x92ac >> 2),
582 0x00000000,
583 (0x0e00 << 16) | (0x92b4 >> 2),
584 0x00000000,
585 (0x0e00 << 16) | (0x92b8 >> 2),
586 0x00000000,
587 (0x0e00 << 16) | (0x92bc >> 2),
588 0x00000000,
589 (0x0e00 << 16) | (0x92c0 >> 2),
590 0x00000000,
591 (0x0e00 << 16) | (0x92c4 >> 2),
592 0x00000000,
593 (0x0e00 << 16) | (0x92c8 >> 2),
594 0x00000000,
595 (0x0e00 << 16) | (0x92cc >> 2),
596 0x00000000,
597 (0x0e00 << 16) | (0x92d0 >> 2),
598 0x00000000,
599 (0x0e00 << 16) | (0x8c00 >> 2),
600 0x00000000,
601 (0x0e00 << 16) | (0x8c04 >> 2),
602 0x00000000,
603 (0x0e00 << 16) | (0x8c20 >> 2),
604 0x00000000,
605 (0x0e00 << 16) | (0x8c38 >> 2),
606 0x00000000,
607 (0x0e00 << 16) | (0x8c3c >> 2),
608 0x00000000,
609 (0x0e00 << 16) | (0xae00 >> 2),
610 0x00000000,
611 (0x0e00 << 16) | (0x9604 >> 2),
612 0x00000000,
613 (0x0e00 << 16) | (0xac08 >> 2),
614 0x00000000,
615 (0x0e00 << 16) | (0xac0c >> 2),
616 0x00000000,
617 (0x0e00 << 16) | (0xac10 >> 2),
618 0x00000000,
619 (0x0e00 << 16) | (0xac14 >> 2),
620 0x00000000,
621 (0x0e00 << 16) | (0xac58 >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0xac68 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xac6c >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0xac70 >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0xac74 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0xac78 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0xac7c >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0xac80 >> 2),
636 0x00000000,
637 (0x0e00 << 16) | (0xac84 >> 2),
638 0x00000000,
639 (0x0e00 << 16) | (0xac88 >> 2),
640 0x00000000,
641 (0x0e00 << 16) | (0xac8c >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0x970c >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0x9714 >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0x9718 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0x971c >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0x31068 >> 2),
652 0x00000000,
653 (0x4e00 << 16) | (0x31068 >> 2),
654 0x00000000,
655 (0x5e00 << 16) | (0x31068 >> 2),
656 0x00000000,
657 (0x6e00 << 16) | (0x31068 >> 2),
658 0x00000000,
659 (0x7e00 << 16) | (0x31068 >> 2),
660 0x00000000,
661 (0x8e00 << 16) | (0x31068 >> 2),
662 0x00000000,
663 (0x9e00 << 16) | (0x31068 >> 2),
664 0x00000000,
665 (0xae00 << 16) | (0x31068 >> 2),
666 0x00000000,
667 (0xbe00 << 16) | (0x31068 >> 2),
668 0x00000000,
669 (0x0e00 << 16) | (0xcd10 >> 2),
670 0x00000000,
671 (0x0e00 << 16) | (0xcd14 >> 2),
672 0x00000000,
673 (0x0e00 << 16) | (0x88b0 >> 2),
674 0x00000000,
675 (0x0e00 << 16) | (0x88b4 >> 2),
676 0x00000000,
677 (0x0e00 << 16) | (0x88b8 >> 2),
678 0x00000000,
679 (0x0e00 << 16) | (0x88bc >> 2),
680 0x00000000,
681 (0x0400 << 16) | (0x89c0 >> 2),
682 0x00000000,
683 (0x0e00 << 16) | (0x88c4 >> 2),
684 0x00000000,
685 (0x0e00 << 16) | (0x88c8 >> 2),
686 0x00000000,
687 (0x0e00 << 16) | (0x88d0 >> 2),
688 0x00000000,
689 (0x0e00 << 16) | (0x88d4 >> 2),
690 0x00000000,
691 (0x0e00 << 16) | (0x88d8 >> 2),
692 0x00000000,
693 (0x0e00 << 16) | (0x8980 >> 2),
694 0x00000000,
695 (0x0e00 << 16) | (0x30938 >> 2),
696 0x00000000,
697 (0x0e00 << 16) | (0x3093c >> 2),
698 0x00000000,
699 (0x0e00 << 16) | (0x30940 >> 2),
700 0x00000000,
701 (0x0e00 << 16) | (0x89a0 >> 2),
702 0x00000000,
703 (0x0e00 << 16) | (0x30900 >> 2),
704 0x00000000,
705 (0x0e00 << 16) | (0x30904 >> 2),
706 0x00000000,
707 (0x0e00 << 16) | (0x89b4 >> 2),
708 0x00000000,
709 (0x0e00 << 16) | (0x3c210 >> 2),
710 0x00000000,
711 (0x0e00 << 16) | (0x3c214 >> 2),
712 0x00000000,
713 (0x0e00 << 16) | (0x3c218 >> 2),
714 0x00000000,
715 (0x0e00 << 16) | (0x8904 >> 2),
716 0x00000000,
717 0x5,
718 (0x0e00 << 16) | (0x8c28 >> 2),
719 (0x0e00 << 16) | (0x8c2c >> 2),
720 (0x0e00 << 16) | (0x8c30 >> 2),
721 (0x0e00 << 16) | (0x8c34 >> 2),
722 (0x0e00 << 16) | (0x9600 >> 2),
723 };
724
725 static const u32 kalindi_rlc_save_restore_register_list[] =
726 {
727 (0x0e00 << 16) | (0xc12c >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0xc140 >> 2),
730 0x00000000,
731 (0x0e00 << 16) | (0xc150 >> 2),
732 0x00000000,
733 (0x0e00 << 16) | (0xc15c >> 2),
734 0x00000000,
735 (0x0e00 << 16) | (0xc168 >> 2),
736 0x00000000,
737 (0x0e00 << 16) | (0xc170 >> 2),
738 0x00000000,
739 (0x0e00 << 16) | (0xc204 >> 2),
740 0x00000000,
741 (0x0e00 << 16) | (0xc2b4 >> 2),
742 0x00000000,
743 (0x0e00 << 16) | (0xc2b8 >> 2),
744 0x00000000,
745 (0x0e00 << 16) | (0xc2bc >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0xc2c0 >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0x8228 >> 2),
750 0x00000000,
751 (0x0e00 << 16) | (0x829c >> 2),
752 0x00000000,
753 (0x0e00 << 16) | (0x869c >> 2),
754 0x00000000,
755 (0x0600 << 16) | (0x98f4 >> 2),
756 0x00000000,
757 (0x0e00 << 16) | (0x98f8 >> 2),
758 0x00000000,
759 (0x0e00 << 16) | (0x9900 >> 2),
760 0x00000000,
761 (0x0e00 << 16) | (0xc260 >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0x90e8 >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0x3c000 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0x3c00c >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0x8c1c >> 2),
770 0x00000000,
771 (0x0e00 << 16) | (0x9700 >> 2),
772 0x00000000,
773 (0x0e00 << 16) | (0xcd20 >> 2),
774 0x00000000,
775 (0x4e00 << 16) | (0xcd20 >> 2),
776 0x00000000,
777 (0x5e00 << 16) | (0xcd20 >> 2),
778 0x00000000,
779 (0x6e00 << 16) | (0xcd20 >> 2),
780 0x00000000,
781 (0x7e00 << 16) | (0xcd20 >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0x89bc >> 2),
784 0x00000000,
785 (0x0e00 << 16) | (0x8900 >> 2),
786 0x00000000,
787 0x3,
788 (0x0e00 << 16) | (0xc130 >> 2),
789 0x00000000,
790 (0x0e00 << 16) | (0xc134 >> 2),
791 0x00000000,
792 (0x0e00 << 16) | (0xc1fc >> 2),
793 0x00000000,
794 (0x0e00 << 16) | (0xc208 >> 2),
795 0x00000000,
796 (0x0e00 << 16) | (0xc264 >> 2),
797 0x00000000,
798 (0x0e00 << 16) | (0xc268 >> 2),
799 0x00000000,
800 (0x0e00 << 16) | (0xc26c >> 2),
801 0x00000000,
802 (0x0e00 << 16) | (0xc270 >> 2),
803 0x00000000,
804 (0x0e00 << 16) | (0xc274 >> 2),
805 0x00000000,
806 (0x0e00 << 16) | (0xc28c >> 2),
807 0x00000000,
808 (0x0e00 << 16) | (0xc290 >> 2),
809 0x00000000,
810 (0x0e00 << 16) | (0xc294 >> 2),
811 0x00000000,
812 (0x0e00 << 16) | (0xc298 >> 2),
813 0x00000000,
814 (0x0e00 << 16) | (0xc2a0 >> 2),
815 0x00000000,
816 (0x0e00 << 16) | (0xc2a4 >> 2),
817 0x00000000,
818 (0x0e00 << 16) | (0xc2a8 >> 2),
819 0x00000000,
820 (0x0e00 << 16) | (0xc2ac >> 2),
821 0x00000000,
822 (0x0e00 << 16) | (0x301d0 >> 2),
823 0x00000000,
824 (0x0e00 << 16) | (0x30238 >> 2),
825 0x00000000,
826 (0x0e00 << 16) | (0x30250 >> 2),
827 0x00000000,
828 (0x0e00 << 16) | (0x30254 >> 2),
829 0x00000000,
830 (0x0e00 << 16) | (0x30258 >> 2),
831 0x00000000,
832 (0x0e00 << 16) | (0x3025c >> 2),
833 0x00000000,
834 (0x4e00 << 16) | (0xc900 >> 2),
835 0x00000000,
836 (0x5e00 << 16) | (0xc900 >> 2),
837 0x00000000,
838 (0x6e00 << 16) | (0xc900 >> 2),
839 0x00000000,
840 (0x7e00 << 16) | (0xc900 >> 2),
841 0x00000000,
842 (0x4e00 << 16) | (0xc904 >> 2),
843 0x00000000,
844 (0x5e00 << 16) | (0xc904 >> 2),
845 0x00000000,
846 (0x6e00 << 16) | (0xc904 >> 2),
847 0x00000000,
848 (0x7e00 << 16) | (0xc904 >> 2),
849 0x00000000,
850 (0x4e00 << 16) | (0xc908 >> 2),
851 0x00000000,
852 (0x5e00 << 16) | (0xc908 >> 2),
853 0x00000000,
854 (0x6e00 << 16) | (0xc908 >> 2),
855 0x00000000,
856 (0x7e00 << 16) | (0xc908 >> 2),
857 0x00000000,
858 (0x4e00 << 16) | (0xc90c >> 2),
859 0x00000000,
860 (0x5e00 << 16) | (0xc90c >> 2),
861 0x00000000,
862 (0x6e00 << 16) | (0xc90c >> 2),
863 0x00000000,
864 (0x7e00 << 16) | (0xc90c >> 2),
865 0x00000000,
866 (0x4e00 << 16) | (0xc910 >> 2),
867 0x00000000,
868 (0x5e00 << 16) | (0xc910 >> 2),
869 0x00000000,
870 (0x6e00 << 16) | (0xc910 >> 2),
871 0x00000000,
872 (0x7e00 << 16) | (0xc910 >> 2),
873 0x00000000,
874 (0x0e00 << 16) | (0xc99c >> 2),
875 0x00000000,
876 (0x0e00 << 16) | (0x9834 >> 2),
877 0x00000000,
878 (0x0000 << 16) | (0x30f00 >> 2),
879 0x00000000,
880 (0x0000 << 16) | (0x30f04 >> 2),
881 0x00000000,
882 (0x0000 << 16) | (0x30f08 >> 2),
883 0x00000000,
884 (0x0000 << 16) | (0x30f0c >> 2),
885 0x00000000,
886 (0x0600 << 16) | (0x9b7c >> 2),
887 0x00000000,
888 (0x0e00 << 16) | (0x8a14 >> 2),
889 0x00000000,
890 (0x0e00 << 16) | (0x8a18 >> 2),
891 0x00000000,
892 (0x0600 << 16) | (0x30a00 >> 2),
893 0x00000000,
894 (0x0e00 << 16) | (0x8bf0 >> 2),
895 0x00000000,
896 (0x0e00 << 16) | (0x8bcc >> 2),
897 0x00000000,
898 (0x0e00 << 16) | (0x8b24 >> 2),
899 0x00000000,
900 (0x0e00 << 16) | (0x30a04 >> 2),
901 0x00000000,
902 (0x0600 << 16) | (0x30a10 >> 2),
903 0x00000000,
904 (0x0600 << 16) | (0x30a14 >> 2),
905 0x00000000,
906 (0x0600 << 16) | (0x30a18 >> 2),
907 0x00000000,
908 (0x0600 << 16) | (0x30a2c >> 2),
909 0x00000000,
910 (0x0e00 << 16) | (0xc700 >> 2),
911 0x00000000,
912 (0x0e00 << 16) | (0xc704 >> 2),
913 0x00000000,
914 (0x0e00 << 16) | (0xc708 >> 2),
915 0x00000000,
916 (0x0e00 << 16) | (0xc768 >> 2),
917 0x00000000,
918 (0x0400 << 16) | (0xc770 >> 2),
919 0x00000000,
920 (0x0400 << 16) | (0xc774 >> 2),
921 0x00000000,
922 (0x0400 << 16) | (0xc798 >> 2),
923 0x00000000,
924 (0x0400 << 16) | (0xc79c >> 2),
925 0x00000000,
926 (0x0e00 << 16) | (0x9100 >> 2),
927 0x00000000,
928 (0x0e00 << 16) | (0x3c010 >> 2),
929 0x00000000,
930 (0x0e00 << 16) | (0x8c00 >> 2),
931 0x00000000,
932 (0x0e00 << 16) | (0x8c04 >> 2),
933 0x00000000,
934 (0x0e00 << 16) | (0x8c20 >> 2),
935 0x00000000,
936 (0x0e00 << 16) | (0x8c38 >> 2),
937 0x00000000,
938 (0x0e00 << 16) | (0x8c3c >> 2),
939 0x00000000,
940 (0x0e00 << 16) | (0xae00 >> 2),
941 0x00000000,
942 (0x0e00 << 16) | (0x9604 >> 2),
943 0x00000000,
944 (0x0e00 << 16) | (0xac08 >> 2),
945 0x00000000,
946 (0x0e00 << 16) | (0xac0c >> 2),
947 0x00000000,
948 (0x0e00 << 16) | (0xac10 >> 2),
949 0x00000000,
950 (0x0e00 << 16) | (0xac14 >> 2),
951 0x00000000,
952 (0x0e00 << 16) | (0xac58 >> 2),
953 0x00000000,
954 (0x0e00 << 16) | (0xac68 >> 2),
955 0x00000000,
956 (0x0e00 << 16) | (0xac6c >> 2),
957 0x00000000,
958 (0x0e00 << 16) | (0xac70 >> 2),
959 0x00000000,
960 (0x0e00 << 16) | (0xac74 >> 2),
961 0x00000000,
962 (0x0e00 << 16) | (0xac78 >> 2),
963 0x00000000,
964 (0x0e00 << 16) | (0xac7c >> 2),
965 0x00000000,
966 (0x0e00 << 16) | (0xac80 >> 2),
967 0x00000000,
968 (0x0e00 << 16) | (0xac84 >> 2),
969 0x00000000,
970 (0x0e00 << 16) | (0xac88 >> 2),
971 0x00000000,
972 (0x0e00 << 16) | (0xac8c >> 2),
973 0x00000000,
974 (0x0e00 << 16) | (0x970c >> 2),
975 0x00000000,
976 (0x0e00 << 16) | (0x9714 >> 2),
977 0x00000000,
978 (0x0e00 << 16) | (0x9718 >> 2),
979 0x00000000,
980 (0x0e00 << 16) | (0x971c >> 2),
981 0x00000000,
982 (0x0e00 << 16) | (0x31068 >> 2),
983 0x00000000,
984 (0x4e00 << 16) | (0x31068 >> 2),
985 0x00000000,
986 (0x5e00 << 16) | (0x31068 >> 2),
987 0x00000000,
988 (0x6e00 << 16) | (0x31068 >> 2),
989 0x00000000,
990 (0x7e00 << 16) | (0x31068 >> 2),
991 0x00000000,
992 (0x0e00 << 16) | (0xcd10 >> 2),
993 0x00000000,
994 (0x0e00 << 16) | (0xcd14 >> 2),
995 0x00000000,
996 (0x0e00 << 16) | (0x88b0 >> 2),
997 0x00000000,
998 (0x0e00 << 16) | (0x88b4 >> 2),
999 0x00000000,
1000 (0x0e00 << 16) | (0x88b8 >> 2),
1001 0x00000000,
1002 (0x0e00 << 16) | (0x88bc >> 2),
1003 0x00000000,
1004 (0x0400 << 16) | (0x89c0 >> 2),
1005 0x00000000,
1006 (0x0e00 << 16) | (0x88c4 >> 2),
1007 0x00000000,
1008 (0x0e00 << 16) | (0x88c8 >> 2),
1009 0x00000000,
1010 (0x0e00 << 16) | (0x88d0 >> 2),
1011 0x00000000,
1012 (0x0e00 << 16) | (0x88d4 >> 2),
1013 0x00000000,
1014 (0x0e00 << 16) | (0x88d8 >> 2),
1015 0x00000000,
1016 (0x0e00 << 16) | (0x8980 >> 2),
1017 0x00000000,
1018 (0x0e00 << 16) | (0x30938 >> 2),
1019 0x00000000,
1020 (0x0e00 << 16) | (0x3093c >> 2),
1021 0x00000000,
1022 (0x0e00 << 16) | (0x30940 >> 2),
1023 0x00000000,
1024 (0x0e00 << 16) | (0x89a0 >> 2),
1025 0x00000000,
1026 (0x0e00 << 16) | (0x30900 >> 2),
1027 0x00000000,
1028 (0x0e00 << 16) | (0x30904 >> 2),
1029 0x00000000,
1030 (0x0e00 << 16) | (0x89b4 >> 2),
1031 0x00000000,
1032 (0x0e00 << 16) | (0x3e1fc >> 2),
1033 0x00000000,
1034 (0x0e00 << 16) | (0x3c210 >> 2),
1035 0x00000000,
1036 (0x0e00 << 16) | (0x3c214 >> 2),
1037 0x00000000,
1038 (0x0e00 << 16) | (0x3c218 >> 2),
1039 0x00000000,
1040 (0x0e00 << 16) | (0x8904 >> 2),
1041 0x00000000,
1042 0x5,
1043 (0x0e00 << 16) | (0x8c28 >> 2),
1044 (0x0e00 << 16) | (0x8c2c >> 2),
1045 (0x0e00 << 16) | (0x8c30 >> 2),
1046 (0x0e00 << 16) | (0x8c34 >> 2),
1047 (0x0e00 << 16) | (0x9600 >> 2),
1048 };
1049
1050 static const u32 bonaire_golden_spm_registers[] =
1051 {
1052 0x30800, 0xe0ffffff, 0xe0000000
1053 };
1054
1055 static const u32 bonaire_golden_common_registers[] =
1056 {
1057 0xc770, 0xffffffff, 0x00000800,
1058 0xc774, 0xffffffff, 0x00000800,
1059 0xc798, 0xffffffff, 0x00007fbf,
1060 0xc79c, 0xffffffff, 0x00007faf
1061 };
1062
1063 static const u32 bonaire_golden_registers[] =
1064 {
1065 0x3354, 0x00000333, 0x00000333,
1066 0x3350, 0x000c0fc0, 0x00040200,
1067 0x9a10, 0x00010000, 0x00058208,
1068 0x3c000, 0xffff1fff, 0x00140000,
1069 0x3c200, 0xfdfc0fff, 0x00000100,
1070 0x3c234, 0x40000000, 0x40000200,
1071 0x9830, 0xffffffff, 0x00000000,
1072 0x9834, 0xf00fffff, 0x00000400,
1073 0x9838, 0x0002021c, 0x00020200,
1074 0xc78, 0x00000080, 0x00000000,
1075 0x5bb0, 0x000000f0, 0x00000070,
1076 0x5bc0, 0xf0311fff, 0x80300000,
1077 0x98f8, 0x73773777, 0x12010001,
1078 0x350c, 0x00810000, 0x408af000,
1079 0x7030, 0x31000111, 0x00000011,
1080 0x2f48, 0x73773777, 0x12010001,
1081 0x220c, 0x00007fb6, 0x0021a1b1,
1082 0x2210, 0x00007fb6, 0x002021b1,
1083 0x2180, 0x00007fb6, 0x00002191,
1084 0x2218, 0x00007fb6, 0x002121b1,
1085 0x221c, 0x00007fb6, 0x002021b1,
1086 0x21dc, 0x00007fb6, 0x00002191,
1087 0x21e0, 0x00007fb6, 0x00002191,
1088 0x3628, 0x0000003f, 0x0000000a,
1089 0x362c, 0x0000003f, 0x0000000a,
1090 0x2ae4, 0x00073ffe, 0x000022a2,
1091 0x240c, 0x000007ff, 0x00000000,
1092 0x8a14, 0xf000003f, 0x00000007,
1093 0x8bf0, 0x00002001, 0x00000001,
1094 0x8b24, 0xffffffff, 0x00ffffff,
1095 0x30a04, 0x0000ff0f, 0x00000000,
1096 0x28a4c, 0x07ffffff, 0x06000000,
1097 0x4d8, 0x00000fff, 0x00000100,
1098 0x3e78, 0x00000001, 0x00000002,
1099 0x9100, 0x03000000, 0x0362c688,
1100 0x8c00, 0x000000ff, 0x00000001,
1101 0xe40, 0x00001fff, 0x00001fff,
1102 0x9060, 0x0000007f, 0x00000020,
1103 0x9508, 0x00010000, 0x00010000,
1104 0xac14, 0x000003ff, 0x000000f3,
1105 0xac0c, 0xffffffff, 0x00001032
1106 };
1107
1108 static const u32 bonaire_mgcg_cgcg_init[] =
1109 {
1110 0xc420, 0xffffffff, 0xfffffffc,
1111 0x30800, 0xffffffff, 0xe0000000,
1112 0x3c2a0, 0xffffffff, 0x00000100,
1113 0x3c208, 0xffffffff, 0x00000100,
1114 0x3c2c0, 0xffffffff, 0xc0000100,
1115 0x3c2c8, 0xffffffff, 0xc0000100,
1116 0x3c2c4, 0xffffffff, 0xc0000100,
1117 0x55e4, 0xffffffff, 0x00600100,
1118 0x3c280, 0xffffffff, 0x00000100,
1119 0x3c214, 0xffffffff, 0x06000100,
1120 0x3c220, 0xffffffff, 0x00000100,
1121 0x3c218, 0xffffffff, 0x06000100,
1122 0x3c204, 0xffffffff, 0x00000100,
1123 0x3c2e0, 0xffffffff, 0x00000100,
1124 0x3c224, 0xffffffff, 0x00000100,
1125 0x3c200, 0xffffffff, 0x00000100,
1126 0x3c230, 0xffffffff, 0x00000100,
1127 0x3c234, 0xffffffff, 0x00000100,
1128 0x3c250, 0xffffffff, 0x00000100,
1129 0x3c254, 0xffffffff, 0x00000100,
1130 0x3c258, 0xffffffff, 0x00000100,
1131 0x3c25c, 0xffffffff, 0x00000100,
1132 0x3c260, 0xffffffff, 0x00000100,
1133 0x3c27c, 0xffffffff, 0x00000100,
1134 0x3c278, 0xffffffff, 0x00000100,
1135 0x3c210, 0xffffffff, 0x06000100,
1136 0x3c290, 0xffffffff, 0x00000100,
1137 0x3c274, 0xffffffff, 0x00000100,
1138 0x3c2b4, 0xffffffff, 0x00000100,
1139 0x3c2b0, 0xffffffff, 0x00000100,
1140 0x3c270, 0xffffffff, 0x00000100,
1141 0x30800, 0xffffffff, 0xe0000000,
1142 0x3c020, 0xffffffff, 0x00010000,
1143 0x3c024, 0xffffffff, 0x00030002,
1144 0x3c028, 0xffffffff, 0x00040007,
1145 0x3c02c, 0xffffffff, 0x00060005,
1146 0x3c030, 0xffffffff, 0x00090008,
1147 0x3c034, 0xffffffff, 0x00010000,
1148 0x3c038, 0xffffffff, 0x00030002,
1149 0x3c03c, 0xffffffff, 0x00040007,
1150 0x3c040, 0xffffffff, 0x00060005,
1151 0x3c044, 0xffffffff, 0x00090008,
1152 0x3c048, 0xffffffff, 0x00010000,
1153 0x3c04c, 0xffffffff, 0x00030002,
1154 0x3c050, 0xffffffff, 0x00040007,
1155 0x3c054, 0xffffffff, 0x00060005,
1156 0x3c058, 0xffffffff, 0x00090008,
1157 0x3c05c, 0xffffffff, 0x00010000,
1158 0x3c060, 0xffffffff, 0x00030002,
1159 0x3c064, 0xffffffff, 0x00040007,
1160 0x3c068, 0xffffffff, 0x00060005,
1161 0x3c06c, 0xffffffff, 0x00090008,
1162 0x3c070, 0xffffffff, 0x00010000,
1163 0x3c074, 0xffffffff, 0x00030002,
1164 0x3c078, 0xffffffff, 0x00040007,
1165 0x3c07c, 0xffffffff, 0x00060005,
1166 0x3c080, 0xffffffff, 0x00090008,
1167 0x3c084, 0xffffffff, 0x00010000,
1168 0x3c088, 0xffffffff, 0x00030002,
1169 0x3c08c, 0xffffffff, 0x00040007,
1170 0x3c090, 0xffffffff, 0x00060005,
1171 0x3c094, 0xffffffff, 0x00090008,
1172 0x3c098, 0xffffffff, 0x00010000,
1173 0x3c09c, 0xffffffff, 0x00030002,
1174 0x3c0a0, 0xffffffff, 0x00040007,
1175 0x3c0a4, 0xffffffff, 0x00060005,
1176 0x3c0a8, 0xffffffff, 0x00090008,
1177 0x3c000, 0xffffffff, 0x96e00200,
1178 0x8708, 0xffffffff, 0x00900100,
1179 0xc424, 0xffffffff, 0x0020003f,
1180 0x38, 0xffffffff, 0x0140001c,
1181 0x3c, 0x000f0000, 0x000f0000,
1182 0x220, 0xffffffff, 0xC060000C,
1183 0x224, 0xc0000fff, 0x00000100,
1184 0xf90, 0xffffffff, 0x00000100,
1185 0xf98, 0x00000101, 0x00000000,
1186 0x20a8, 0xffffffff, 0x00000104,
1187 0x55e4, 0xff000fff, 0x00000100,
1188 0x30cc, 0xc0000fff, 0x00000104,
1189 0xc1e4, 0x00000001, 0x00000001,
1190 0xd00c, 0xff000ff0, 0x00000100,
1191 0xd80c, 0xff000ff0, 0x00000100
1192 };
1193
1194 static const u32 spectre_golden_spm_registers[] =
1195 {
1196 0x30800, 0xe0ffffff, 0xe0000000
1197 };
1198
1199 static const u32 spectre_golden_common_registers[] =
1200 {
1201 0xc770, 0xffffffff, 0x00000800,
1202 0xc774, 0xffffffff, 0x00000800,
1203 0xc798, 0xffffffff, 0x00007fbf,
1204 0xc79c, 0xffffffff, 0x00007faf
1205 };
1206
1207 static const u32 spectre_golden_registers[] =
1208 {
1209 0x3c000, 0xffff1fff, 0x96940200,
1210 0x3c00c, 0xffff0001, 0xff000000,
1211 0x3c200, 0xfffc0fff, 0x00000100,
1212 0x6ed8, 0x00010101, 0x00010000,
1213 0x9834, 0xf00fffff, 0x00000400,
1214 0x9838, 0xfffffffc, 0x00020200,
1215 0x5bb0, 0x000000f0, 0x00000070,
1216 0x5bc0, 0xf0311fff, 0x80300000,
1217 0x98f8, 0x73773777, 0x12010001,
1218 0x9b7c, 0x00ff0000, 0x00fc0000,
1219 0x2f48, 0x73773777, 0x12010001,
1220 0x8a14, 0xf000003f, 0x00000007,
1221 0x8b24, 0xffffffff, 0x00ffffff,
1222 0x28350, 0x3f3f3fff, 0x00000082,
1223 0x28354, 0x0000003f, 0x00000000,
1224 0x3e78, 0x00000001, 0x00000002,
1225 0x913c, 0xffff03df, 0x00000004,
1226 0xc768, 0x00000008, 0x00000008,
1227 0x8c00, 0x000008ff, 0x00000800,
1228 0x9508, 0x00010000, 0x00010000,
1229 0xac0c, 0xffffffff, 0x54763210,
1230 0x214f8, 0x01ff01ff, 0x00000002,
1231 0x21498, 0x007ff800, 0x00200000,
1232 0x2015c, 0xffffffff, 0x00000f40,
1233 0x30934, 0xffffffff, 0x00000001
1234 };
1235
1236 static const u32 spectre_mgcg_cgcg_init[] =
1237 {
1238 0xc420, 0xffffffff, 0xfffffffc,
1239 0x30800, 0xffffffff, 0xe0000000,
1240 0x3c2a0, 0xffffffff, 0x00000100,
1241 0x3c208, 0xffffffff, 0x00000100,
1242 0x3c2c0, 0xffffffff, 0x00000100,
1243 0x3c2c8, 0xffffffff, 0x00000100,
1244 0x3c2c4, 0xffffffff, 0x00000100,
1245 0x55e4, 0xffffffff, 0x00600100,
1246 0x3c280, 0xffffffff, 0x00000100,
1247 0x3c214, 0xffffffff, 0x06000100,
1248 0x3c220, 0xffffffff, 0x00000100,
1249 0x3c218, 0xffffffff, 0x06000100,
1250 0x3c204, 0xffffffff, 0x00000100,
1251 0x3c2e0, 0xffffffff, 0x00000100,
1252 0x3c224, 0xffffffff, 0x00000100,
1253 0x3c200, 0xffffffff, 0x00000100,
1254 0x3c230, 0xffffffff, 0x00000100,
1255 0x3c234, 0xffffffff, 0x00000100,
1256 0x3c250, 0xffffffff, 0x00000100,
1257 0x3c254, 0xffffffff, 0x00000100,
1258 0x3c258, 0xffffffff, 0x00000100,
1259 0x3c25c, 0xffffffff, 0x00000100,
1260 0x3c260, 0xffffffff, 0x00000100,
1261 0x3c27c, 0xffffffff, 0x00000100,
1262 0x3c278, 0xffffffff, 0x00000100,
1263 0x3c210, 0xffffffff, 0x06000100,
1264 0x3c290, 0xffffffff, 0x00000100,
1265 0x3c274, 0xffffffff, 0x00000100,
1266 0x3c2b4, 0xffffffff, 0x00000100,
1267 0x3c2b0, 0xffffffff, 0x00000100,
1268 0x3c270, 0xffffffff, 0x00000100,
1269 0x30800, 0xffffffff, 0xe0000000,
1270 0x3c020, 0xffffffff, 0x00010000,
1271 0x3c024, 0xffffffff, 0x00030002,
1272 0x3c028, 0xffffffff, 0x00040007,
1273 0x3c02c, 0xffffffff, 0x00060005,
1274 0x3c030, 0xffffffff, 0x00090008,
1275 0x3c034, 0xffffffff, 0x00010000,
1276 0x3c038, 0xffffffff, 0x00030002,
1277 0x3c03c, 0xffffffff, 0x00040007,
1278 0x3c040, 0xffffffff, 0x00060005,
1279 0x3c044, 0xffffffff, 0x00090008,
1280 0x3c048, 0xffffffff, 0x00010000,
1281 0x3c04c, 0xffffffff, 0x00030002,
1282 0x3c050, 0xffffffff, 0x00040007,
1283 0x3c054, 0xffffffff, 0x00060005,
1284 0x3c058, 0xffffffff, 0x00090008,
1285 0x3c05c, 0xffffffff, 0x00010000,
1286 0x3c060, 0xffffffff, 0x00030002,
1287 0x3c064, 0xffffffff, 0x00040007,
1288 0x3c068, 0xffffffff, 0x00060005,
1289 0x3c06c, 0xffffffff, 0x00090008,
1290 0x3c070, 0xffffffff, 0x00010000,
1291 0x3c074, 0xffffffff, 0x00030002,
1292 0x3c078, 0xffffffff, 0x00040007,
1293 0x3c07c, 0xffffffff, 0x00060005,
1294 0x3c080, 0xffffffff, 0x00090008,
1295 0x3c084, 0xffffffff, 0x00010000,
1296 0x3c088, 0xffffffff, 0x00030002,
1297 0x3c08c, 0xffffffff, 0x00040007,
1298 0x3c090, 0xffffffff, 0x00060005,
1299 0x3c094, 0xffffffff, 0x00090008,
1300 0x3c098, 0xffffffff, 0x00010000,
1301 0x3c09c, 0xffffffff, 0x00030002,
1302 0x3c0a0, 0xffffffff, 0x00040007,
1303 0x3c0a4, 0xffffffff, 0x00060005,
1304 0x3c0a8, 0xffffffff, 0x00090008,
1305 0x3c0ac, 0xffffffff, 0x00010000,
1306 0x3c0b0, 0xffffffff, 0x00030002,
1307 0x3c0b4, 0xffffffff, 0x00040007,
1308 0x3c0b8, 0xffffffff, 0x00060005,
1309 0x3c0bc, 0xffffffff, 0x00090008,
1310 0x3c000, 0xffffffff, 0x96e00200,
1311 0x8708, 0xffffffff, 0x00900100,
1312 0xc424, 0xffffffff, 0x0020003f,
1313 0x38, 0xffffffff, 0x0140001c,
1314 0x3c, 0x000f0000, 0x000f0000,
1315 0x220, 0xffffffff, 0xC060000C,
1316 0x224, 0xc0000fff, 0x00000100,
1317 0xf90, 0xffffffff, 0x00000100,
1318 0xf98, 0x00000101, 0x00000000,
1319 0x20a8, 0xffffffff, 0x00000104,
1320 0x55e4, 0xff000fff, 0x00000100,
1321 0x30cc, 0xc0000fff, 0x00000104,
1322 0xc1e4, 0x00000001, 0x00000001,
1323 0xd00c, 0xff000ff0, 0x00000100,
1324 0xd80c, 0xff000ff0, 0x00000100
1325 };
1326
1327 static const u32 kalindi_golden_spm_registers[] =
1328 {
1329 0x30800, 0xe0ffffff, 0xe0000000
1330 };
1331
1332 static const u32 kalindi_golden_common_registers[] =
1333 {
1334 0xc770, 0xffffffff, 0x00000800,
1335 0xc774, 0xffffffff, 0x00000800,
1336 0xc798, 0xffffffff, 0x00007fbf,
1337 0xc79c, 0xffffffff, 0x00007faf
1338 };
1339
1340 static const u32 kalindi_golden_registers[] =
1341 {
1342 0x3c000, 0xffffdfff, 0x6e944040,
1343 0x55e4, 0xff607fff, 0xfc000100,
1344 0x3c220, 0xff000fff, 0x00000100,
1345 0x3c224, 0xff000fff, 0x00000100,
1346 0x3c200, 0xfffc0fff, 0x00000100,
1347 0x6ed8, 0x00010101, 0x00010000,
1348 0x9830, 0xffffffff, 0x00000000,
1349 0x9834, 0xf00fffff, 0x00000400,
1350 0x5bb0, 0x000000f0, 0x00000070,
1351 0x5bc0, 0xf0311fff, 0x80300000,
1352 0x98f8, 0x73773777, 0x12010001,
1353 0x98fc, 0xffffffff, 0x00000010,
1354 0x9b7c, 0x00ff0000, 0x00fc0000,
1355 0x8030, 0x00001f0f, 0x0000100a,
1356 0x2f48, 0x73773777, 0x12010001,
1357 0x2408, 0x000fffff, 0x000c007f,
1358 0x8a14, 0xf000003f, 0x00000007,
1359 0x8b24, 0x3fff3fff, 0x00ffcfff,
1360 0x30a04, 0x0000ff0f, 0x00000000,
1361 0x28a4c, 0x07ffffff, 0x06000000,
1362 0x4d8, 0x00000fff, 0x00000100,
1363 0x3e78, 0x00000001, 0x00000002,
1364 0xc768, 0x00000008, 0x00000008,
1365 0x8c00, 0x000000ff, 0x00000003,
1366 0x214f8, 0x01ff01ff, 0x00000002,
1367 0x21498, 0x007ff800, 0x00200000,
1368 0x2015c, 0xffffffff, 0x00000f40,
1369 0x88c4, 0x001f3ae3, 0x00000082,
1370 0x88d4, 0x0000001f, 0x00000010,
1371 0x30934, 0xffffffff, 0x00000000
1372 };
1373
1374 static const u32 kalindi_mgcg_cgcg_init[] =
1375 {
1376 0xc420, 0xffffffff, 0xfffffffc,
1377 0x30800, 0xffffffff, 0xe0000000,
1378 0x3c2a0, 0xffffffff, 0x00000100,
1379 0x3c208, 0xffffffff, 0x00000100,
1380 0x3c2c0, 0xffffffff, 0x00000100,
1381 0x3c2c8, 0xffffffff, 0x00000100,
1382 0x3c2c4, 0xffffffff, 0x00000100,
1383 0x55e4, 0xffffffff, 0x00600100,
1384 0x3c280, 0xffffffff, 0x00000100,
1385 0x3c214, 0xffffffff, 0x06000100,
1386 0x3c220, 0xffffffff, 0x00000100,
1387 0x3c218, 0xffffffff, 0x06000100,
1388 0x3c204, 0xffffffff, 0x00000100,
1389 0x3c2e0, 0xffffffff, 0x00000100,
1390 0x3c224, 0xffffffff, 0x00000100,
1391 0x3c200, 0xffffffff, 0x00000100,
1392 0x3c230, 0xffffffff, 0x00000100,
1393 0x3c234, 0xffffffff, 0x00000100,
1394 0x3c250, 0xffffffff, 0x00000100,
1395 0x3c254, 0xffffffff, 0x00000100,
1396 0x3c258, 0xffffffff, 0x00000100,
1397 0x3c25c, 0xffffffff, 0x00000100,
1398 0x3c260, 0xffffffff, 0x00000100,
1399 0x3c27c, 0xffffffff, 0x00000100,
1400 0x3c278, 0xffffffff, 0x00000100,
1401 0x3c210, 0xffffffff, 0x06000100,
1402 0x3c290, 0xffffffff, 0x00000100,
1403 0x3c274, 0xffffffff, 0x00000100,
1404 0x3c2b4, 0xffffffff, 0x00000100,
1405 0x3c2b0, 0xffffffff, 0x00000100,
1406 0x3c270, 0xffffffff, 0x00000100,
1407 0x30800, 0xffffffff, 0xe0000000,
1408 0x3c020, 0xffffffff, 0x00010000,
1409 0x3c024, 0xffffffff, 0x00030002,
1410 0x3c028, 0xffffffff, 0x00040007,
1411 0x3c02c, 0xffffffff, 0x00060005,
1412 0x3c030, 0xffffffff, 0x00090008,
1413 0x3c034, 0xffffffff, 0x00010000,
1414 0x3c038, 0xffffffff, 0x00030002,
1415 0x3c03c, 0xffffffff, 0x00040007,
1416 0x3c040, 0xffffffff, 0x00060005,
1417 0x3c044, 0xffffffff, 0x00090008,
1418 0x3c000, 0xffffffff, 0x96e00200,
1419 0x8708, 0xffffffff, 0x00900100,
1420 0xc424, 0xffffffff, 0x0020003f,
1421 0x38, 0xffffffff, 0x0140001c,
1422 0x3c, 0x000f0000, 0x000f0000,
1423 0x220, 0xffffffff, 0xC060000C,
1424 0x224, 0xc0000fff, 0x00000100,
1425 0x20a8, 0xffffffff, 0x00000104,
1426 0x55e4, 0xff000fff, 0x00000100,
1427 0x30cc, 0xc0000fff, 0x00000104,
1428 0xc1e4, 0x00000001, 0x00000001,
1429 0xd00c, 0xff000ff0, 0x00000100,
1430 0xd80c, 0xff000ff0, 0x00000100
1431 };
1432
1433 static const u32 hawaii_golden_spm_registers[] =
1434 {
1435 0x30800, 0xe0ffffff, 0xe0000000
1436 };
1437
1438 static const u32 hawaii_golden_common_registers[] =
1439 {
1440 0x30800, 0xffffffff, 0xe0000000,
1441 0x28350, 0xffffffff, 0x3a00161a,
1442 0x28354, 0xffffffff, 0x0000002e,
1443 0x9a10, 0xffffffff, 0x00018208,
1444 0x98f8, 0xffffffff, 0x12011003
1445 };
1446
1447 static const u32 hawaii_golden_registers[] =
1448 {
1449 0x3354, 0x00000333, 0x00000333,
1450 0x9a10, 0x00010000, 0x00058208,
1451 0x9830, 0xffffffff, 0x00000000,
1452 0x9834, 0xf00fffff, 0x00000400,
1453 0x9838, 0x0002021c, 0x00020200,
1454 0xc78, 0x00000080, 0x00000000,
1455 0x5bb0, 0x000000f0, 0x00000070,
1456 0x5bc0, 0xf0311fff, 0x80300000,
1457 0x350c, 0x00810000, 0x408af000,
1458 0x7030, 0x31000111, 0x00000011,
1459 0x2f48, 0x73773777, 0x12010001,
1460 0x2120, 0x0000007f, 0x0000001b,
1461 0x21dc, 0x00007fb6, 0x00002191,
1462 0x3628, 0x0000003f, 0x0000000a,
1463 0x362c, 0x0000003f, 0x0000000a,
1464 0x2ae4, 0x00073ffe, 0x000022a2,
1465 0x240c, 0x000007ff, 0x00000000,
1466 0x8bf0, 0x00002001, 0x00000001,
1467 0x8b24, 0xffffffff, 0x00ffffff,
1468 0x30a04, 0x0000ff0f, 0x00000000,
1469 0x28a4c, 0x07ffffff, 0x06000000,
1470 0x3e78, 0x00000001, 0x00000002,
1471 0xc768, 0x00000008, 0x00000008,
1472 0xc770, 0x00000f00, 0x00000800,
1473 0xc774, 0x00000f00, 0x00000800,
1474 0xc798, 0x00ffffff, 0x00ff7fbf,
1475 0xc79c, 0x00ffffff, 0x00ff7faf,
1476 0x8c00, 0x000000ff, 0x00000800,
1477 0xe40, 0x00001fff, 0x00001fff,
1478 0x9060, 0x0000007f, 0x00000020,
1479 0x9508, 0x00010000, 0x00010000,
1480 0xae00, 0x00100000, 0x000ff07c,
1481 0xac14, 0x000003ff, 0x0000000f,
1482 0xac10, 0xffffffff, 0x7564fdec,
1483 0xac0c, 0xffffffff, 0x3120b9a8,
1484 0xac08, 0x20000000, 0x0f9c0000
1485 };
1486
1487 static const u32 hawaii_mgcg_cgcg_init[] =
1488 {
1489 0xc420, 0xffffffff, 0xfffffffd,
1490 0x30800, 0xffffffff, 0xe0000000,
1491 0x3c2a0, 0xffffffff, 0x00000100,
1492 0x3c208, 0xffffffff, 0x00000100,
1493 0x3c2c0, 0xffffffff, 0x00000100,
1494 0x3c2c8, 0xffffffff, 0x00000100,
1495 0x3c2c4, 0xffffffff, 0x00000100,
1496 0x55e4, 0xffffffff, 0x00200100,
1497 0x3c280, 0xffffffff, 0x00000100,
1498 0x3c214, 0xffffffff, 0x06000100,
1499 0x3c220, 0xffffffff, 0x00000100,
1500 0x3c218, 0xffffffff, 0x06000100,
1501 0x3c204, 0xffffffff, 0x00000100,
1502 0x3c2e0, 0xffffffff, 0x00000100,
1503 0x3c224, 0xffffffff, 0x00000100,
1504 0x3c200, 0xffffffff, 0x00000100,
1505 0x3c230, 0xffffffff, 0x00000100,
1506 0x3c234, 0xffffffff, 0x00000100,
1507 0x3c250, 0xffffffff, 0x00000100,
1508 0x3c254, 0xffffffff, 0x00000100,
1509 0x3c258, 0xffffffff, 0x00000100,
1510 0x3c25c, 0xffffffff, 0x00000100,
1511 0x3c260, 0xffffffff, 0x00000100,
1512 0x3c27c, 0xffffffff, 0x00000100,
1513 0x3c278, 0xffffffff, 0x00000100,
1514 0x3c210, 0xffffffff, 0x06000100,
1515 0x3c290, 0xffffffff, 0x00000100,
1516 0x3c274, 0xffffffff, 0x00000100,
1517 0x3c2b4, 0xffffffff, 0x00000100,
1518 0x3c2b0, 0xffffffff, 0x00000100,
1519 0x3c270, 0xffffffff, 0x00000100,
1520 0x30800, 0xffffffff, 0xe0000000,
1521 0x3c020, 0xffffffff, 0x00010000,
1522 0x3c024, 0xffffffff, 0x00030002,
1523 0x3c028, 0xffffffff, 0x00040007,
1524 0x3c02c, 0xffffffff, 0x00060005,
1525 0x3c030, 0xffffffff, 0x00090008,
1526 0x3c034, 0xffffffff, 0x00010000,
1527 0x3c038, 0xffffffff, 0x00030002,
1528 0x3c03c, 0xffffffff, 0x00040007,
1529 0x3c040, 0xffffffff, 0x00060005,
1530 0x3c044, 0xffffffff, 0x00090008,
1531 0x3c048, 0xffffffff, 0x00010000,
1532 0x3c04c, 0xffffffff, 0x00030002,
1533 0x3c050, 0xffffffff, 0x00040007,
1534 0x3c054, 0xffffffff, 0x00060005,
1535 0x3c058, 0xffffffff, 0x00090008,
1536 0x3c05c, 0xffffffff, 0x00010000,
1537 0x3c060, 0xffffffff, 0x00030002,
1538 0x3c064, 0xffffffff, 0x00040007,
1539 0x3c068, 0xffffffff, 0x00060005,
1540 0x3c06c, 0xffffffff, 0x00090008,
1541 0x3c070, 0xffffffff, 0x00010000,
1542 0x3c074, 0xffffffff, 0x00030002,
1543 0x3c078, 0xffffffff, 0x00040007,
1544 0x3c07c, 0xffffffff, 0x00060005,
1545 0x3c080, 0xffffffff, 0x00090008,
1546 0x3c084, 0xffffffff, 0x00010000,
1547 0x3c088, 0xffffffff, 0x00030002,
1548 0x3c08c, 0xffffffff, 0x00040007,
1549 0x3c090, 0xffffffff, 0x00060005,
1550 0x3c094, 0xffffffff, 0x00090008,
1551 0x3c098, 0xffffffff, 0x00010000,
1552 0x3c09c, 0xffffffff, 0x00030002,
1553 0x3c0a0, 0xffffffff, 0x00040007,
1554 0x3c0a4, 0xffffffff, 0x00060005,
1555 0x3c0a8, 0xffffffff, 0x00090008,
1556 0x3c0ac, 0xffffffff, 0x00010000,
1557 0x3c0b0, 0xffffffff, 0x00030002,
1558 0x3c0b4, 0xffffffff, 0x00040007,
1559 0x3c0b8, 0xffffffff, 0x00060005,
1560 0x3c0bc, 0xffffffff, 0x00090008,
1561 0x3c0c0, 0xffffffff, 0x00010000,
1562 0x3c0c4, 0xffffffff, 0x00030002,
1563 0x3c0c8, 0xffffffff, 0x00040007,
1564 0x3c0cc, 0xffffffff, 0x00060005,
1565 0x3c0d0, 0xffffffff, 0x00090008,
1566 0x3c0d4, 0xffffffff, 0x00010000,
1567 0x3c0d8, 0xffffffff, 0x00030002,
1568 0x3c0dc, 0xffffffff, 0x00040007,
1569 0x3c0e0, 0xffffffff, 0x00060005,
1570 0x3c0e4, 0xffffffff, 0x00090008,
1571 0x3c0e8, 0xffffffff, 0x00010000,
1572 0x3c0ec, 0xffffffff, 0x00030002,
1573 0x3c0f0, 0xffffffff, 0x00040007,
1574 0x3c0f4, 0xffffffff, 0x00060005,
1575 0x3c0f8, 0xffffffff, 0x00090008,
1576 0xc318, 0xffffffff, 0x00020200,
1577 0x3350, 0xffffffff, 0x00000200,
1578 0x15c0, 0xffffffff, 0x00000400,
1579 0x55e8, 0xffffffff, 0x00000000,
1580 0x2f50, 0xffffffff, 0x00000902,
1581 0x3c000, 0xffffffff, 0x96940200,
1582 0x8708, 0xffffffff, 0x00900100,
1583 0xc424, 0xffffffff, 0x0020003f,
1584 0x38, 0xffffffff, 0x0140001c,
1585 0x3c, 0x000f0000, 0x000f0000,
1586 0x220, 0xffffffff, 0xc060000c,
1587 0x224, 0xc0000fff, 0x00000100,
1588 0xf90, 0xffffffff, 0x00000100,
1589 0xf98, 0x00000101, 0x00000000,
1590 0x20a8, 0xffffffff, 0x00000104,
1591 0x55e4, 0xff000fff, 0x00000100,
1592 0x30cc, 0xc0000fff, 0x00000104,
1593 0xc1e4, 0x00000001, 0x00000001,
1594 0xd00c, 0xff000ff0, 0x00000100,
1595 0xd80c, 0xff000ff0, 0x00000100
1596 };
1597
1598 static const u32 godavari_golden_registers[] =
1599 {
1600 0x55e4, 0xff607fff, 0xfc000100,
1601 0x6ed8, 0x00010101, 0x00010000,
1602 0x9830, 0xffffffff, 0x00000000,
1603 0x98302, 0xf00fffff, 0x00000400,
1604 0x6130, 0xffffffff, 0x00010000,
1605 0x5bb0, 0x000000f0, 0x00000070,
1606 0x5bc0, 0xf0311fff, 0x80300000,
1607 0x98f8, 0x73773777, 0x12010001,
1608 0x98fc, 0xffffffff, 0x00000010,
1609 0x8030, 0x00001f0f, 0x0000100a,
1610 0x2f48, 0x73773777, 0x12010001,
1611 0x2408, 0x000fffff, 0x000c007f,
1612 0x8a14, 0xf000003f, 0x00000007,
1613 0x8b24, 0xffffffff, 0x00ff0fff,
1614 0x30a04, 0x0000ff0f, 0x00000000,
1615 0x28a4c, 0x07ffffff, 0x06000000,
1616 0x4d8, 0x00000fff, 0x00000100,
1617 0xd014, 0x00010000, 0x00810001,
1618 0xd814, 0x00010000, 0x00810001,
1619 0x3e78, 0x00000001, 0x00000002,
1620 0xc768, 0x00000008, 0x00000008,
1621 0xc770, 0x00000f00, 0x00000800,
1622 0xc774, 0x00000f00, 0x00000800,
1623 0xc798, 0x00ffffff, 0x00ff7fbf,
1624 0xc79c, 0x00ffffff, 0x00ff7faf,
1625 0x8c00, 0x000000ff, 0x00000001,
1626 0x214f8, 0x01ff01ff, 0x00000002,
1627 0x21498, 0x007ff800, 0x00200000,
1628 0x2015c, 0xffffffff, 0x00000f40,
1629 0x88c4, 0x001f3ae3, 0x00000082,
1630 0x88d4, 0x0000001f, 0x00000010,
1631 0x30934, 0xffffffff, 0x00000000
1632 };
1633
1634
cik_init_golden_registers(struct radeon_device * rdev)1635 static void cik_init_golden_registers(struct radeon_device *rdev)
1636 {
1637 switch (rdev->family) {
1638 case CHIP_BONAIRE:
1639 radeon_program_register_sequence(rdev,
1640 bonaire_mgcg_cgcg_init,
1641 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1642 radeon_program_register_sequence(rdev,
1643 bonaire_golden_registers,
1644 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1645 radeon_program_register_sequence(rdev,
1646 bonaire_golden_common_registers,
1647 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1648 radeon_program_register_sequence(rdev,
1649 bonaire_golden_spm_registers,
1650 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1651 break;
1652 case CHIP_KABINI:
1653 radeon_program_register_sequence(rdev,
1654 kalindi_mgcg_cgcg_init,
1655 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1656 radeon_program_register_sequence(rdev,
1657 kalindi_golden_registers,
1658 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1659 radeon_program_register_sequence(rdev,
1660 kalindi_golden_common_registers,
1661 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1662 radeon_program_register_sequence(rdev,
1663 kalindi_golden_spm_registers,
1664 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1665 break;
1666 case CHIP_MULLINS:
1667 radeon_program_register_sequence(rdev,
1668 kalindi_mgcg_cgcg_init,
1669 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1670 radeon_program_register_sequence(rdev,
1671 godavari_golden_registers,
1672 (const u32)ARRAY_SIZE(godavari_golden_registers));
1673 radeon_program_register_sequence(rdev,
1674 kalindi_golden_common_registers,
1675 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1676 radeon_program_register_sequence(rdev,
1677 kalindi_golden_spm_registers,
1678 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1679 break;
1680 case CHIP_KAVERI:
1681 radeon_program_register_sequence(rdev,
1682 spectre_mgcg_cgcg_init,
1683 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1684 radeon_program_register_sequence(rdev,
1685 spectre_golden_registers,
1686 (const u32)ARRAY_SIZE(spectre_golden_registers));
1687 radeon_program_register_sequence(rdev,
1688 spectre_golden_common_registers,
1689 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1690 radeon_program_register_sequence(rdev,
1691 spectre_golden_spm_registers,
1692 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1693 break;
1694 case CHIP_HAWAII:
1695 radeon_program_register_sequence(rdev,
1696 hawaii_mgcg_cgcg_init,
1697 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1698 radeon_program_register_sequence(rdev,
1699 hawaii_golden_registers,
1700 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1701 radeon_program_register_sequence(rdev,
1702 hawaii_golden_common_registers,
1703 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1704 radeon_program_register_sequence(rdev,
1705 hawaii_golden_spm_registers,
1706 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1707 break;
1708 default:
1709 break;
1710 }
1711 }
1712
1713 /**
1714 * cik_get_xclk - get the xclk
1715 *
1716 * @rdev: radeon_device pointer
1717 *
1718 * Returns the reference clock used by the gfx engine
1719 * (CIK).
1720 */
cik_get_xclk(struct radeon_device * rdev)1721 u32 cik_get_xclk(struct radeon_device *rdev)
1722 {
1723 u32 reference_clock = rdev->clock.spll.reference_freq;
1724
1725 if (rdev->flags & RADEON_IS_IGP) {
1726 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1727 return reference_clock / 2;
1728 } else {
1729 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1730 return reference_clock / 4;
1731 }
1732 return reference_clock;
1733 }
1734
1735 /**
1736 * cik_mm_rdoorbell - read a doorbell dword
1737 *
1738 * @rdev: radeon_device pointer
1739 * @index: doorbell index
1740 *
1741 * Returns the value in the doorbell aperture at the
1742 * requested doorbell index (CIK).
1743 */
cik_mm_rdoorbell(struct radeon_device * rdev,u32 index)1744 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1745 {
1746 if (index < rdev->doorbell.num_doorbells) {
1747 #ifdef __NetBSD__
1748 return bus_space_read_4(rdev->doorbell.bst, rdev->doorbell.bsh,
1749 index*4);
1750 #else
1751 return readl(rdev->doorbell.ptr + index);
1752 #endif
1753 } else {
1754 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1755 return 0;
1756 }
1757 }
1758
1759 /**
1760 * cik_mm_wdoorbell - write a doorbell dword
1761 *
1762 * @rdev: radeon_device pointer
1763 * @index: doorbell index
1764 * @v: value to write
1765 *
1766 * Writes @v to the doorbell aperture at the
1767 * requested doorbell index (CIK).
1768 */
cik_mm_wdoorbell(struct radeon_device * rdev,u32 index,u32 v)1769 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1770 {
1771 if (index < rdev->doorbell.num_doorbells) {
1772 #ifdef __NetBSD__
1773 bus_space_write_4(rdev->doorbell.bst, rdev->doorbell.bsh,
1774 index*4, v);
1775 #else
1776 writel(v, rdev->doorbell.ptr + index);
1777 #endif
1778 } else {
1779 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1780 }
1781 }
1782
1783 #define BONAIRE_IO_MC_REGS_SIZE 36
1784
1785 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1786 {
1787 {0x00000070, 0x04400000},
1788 {0x00000071, 0x80c01803},
1789 {0x00000072, 0x00004004},
1790 {0x00000073, 0x00000100},
1791 {0x00000074, 0x00ff0000},
1792 {0x00000075, 0x34000000},
1793 {0x00000076, 0x08000014},
1794 {0x00000077, 0x00cc08ec},
1795 {0x00000078, 0x00000400},
1796 {0x00000079, 0x00000000},
1797 {0x0000007a, 0x04090000},
1798 {0x0000007c, 0x00000000},
1799 {0x0000007e, 0x4408a8e8},
1800 {0x0000007f, 0x00000304},
1801 {0x00000080, 0x00000000},
1802 {0x00000082, 0x00000001},
1803 {0x00000083, 0x00000002},
1804 {0x00000084, 0xf3e4f400},
1805 {0x00000085, 0x052024e3},
1806 {0x00000087, 0x00000000},
1807 {0x00000088, 0x01000000},
1808 {0x0000008a, 0x1c0a0000},
1809 {0x0000008b, 0xff010000},
1810 {0x0000008d, 0xffffefff},
1811 {0x0000008e, 0xfff3efff},
1812 {0x0000008f, 0xfff3efbf},
1813 {0x00000092, 0xf7ffffff},
1814 {0x00000093, 0xffffff7f},
1815 {0x00000095, 0x00101101},
1816 {0x00000096, 0x00000fff},
1817 {0x00000097, 0x00116fff},
1818 {0x00000098, 0x60010000},
1819 {0x00000099, 0x10010000},
1820 {0x0000009a, 0x00006000},
1821 {0x0000009b, 0x00001000},
1822 {0x0000009f, 0x00b48000}
1823 };
1824
1825 #define HAWAII_IO_MC_REGS_SIZE 22
1826
1827 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1828 {
1829 {0x0000007d, 0x40000000},
1830 {0x0000007e, 0x40180304},
1831 {0x0000007f, 0x0000ff00},
1832 {0x00000081, 0x00000000},
1833 {0x00000083, 0x00000800},
1834 {0x00000086, 0x00000000},
1835 {0x00000087, 0x00000100},
1836 {0x00000088, 0x00020100},
1837 {0x00000089, 0x00000000},
1838 {0x0000008b, 0x00040000},
1839 {0x0000008c, 0x00000100},
1840 {0x0000008e, 0xff010000},
1841 {0x00000090, 0xffffefff},
1842 {0x00000091, 0xfff3efff},
1843 {0x00000092, 0xfff3efbf},
1844 {0x00000093, 0xf7ffffff},
1845 {0x00000094, 0xffffff7f},
1846 {0x00000095, 0x00000fff},
1847 {0x00000096, 0x00116fff},
1848 {0x00000097, 0x60010000},
1849 {0x00000098, 0x10010000},
1850 {0x0000009f, 0x00c79000}
1851 };
1852
1853
1854 /**
1855 * cik_srbm_select - select specific register instances
1856 *
1857 * @rdev: radeon_device pointer
1858 * @me: selected ME (micro engine)
1859 * @pipe: pipe
1860 * @queue: queue
1861 * @vmid: VMID
1862 *
1863 * Switches the currently active registers instances. Some
1864 * registers are instanced per VMID, others are instanced per
1865 * me/pipe/queue combination.
1866 */
cik_srbm_select(struct radeon_device * rdev,u32 me,u32 pipe,u32 queue,u32 vmid)1867 static void cik_srbm_select(struct radeon_device *rdev,
1868 u32 me, u32 pipe, u32 queue, u32 vmid)
1869 {
1870 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1871 MEID(me & 0x3) |
1872 VMID(vmid & 0xf) |
1873 QUEUEID(queue & 0x7));
1874 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1875 }
1876
1877 /* ucode loading */
1878 /**
1879 * ci_mc_load_microcode - load MC ucode into the hw
1880 *
1881 * @rdev: radeon_device pointer
1882 *
1883 * Load the GDDR MC ucode into the hw (CIK).
1884 * Returns 0 on success, error on failure.
1885 */
ci_mc_load_microcode(struct radeon_device * rdev)1886 int ci_mc_load_microcode(struct radeon_device *rdev)
1887 {
1888 const __be32 *fw_data = NULL;
1889 const __le32 *new_fw_data = NULL;
1890 u32 running, tmp;
1891 const u32 *io_mc_regs = NULL;
1892 const __le32 *new_io_mc_regs = NULL;
1893 int i, regs_size, ucode_size;
1894
1895 if (!rdev->mc_fw)
1896 return -EINVAL;
1897
1898 if (rdev->new_fw) {
1899 const struct mc_firmware_header_v1_0 *hdr =
1900 (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1901
1902 radeon_ucode_print_mc_hdr(&hdr->header);
1903
1904 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1905 new_io_mc_regs = (const __le32 *)
1906 (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1907 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1908 new_fw_data = (const __le32 *)
1909 (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1910 } else {
1911 ucode_size = rdev->mc_fw->size / 4;
1912
1913 switch (rdev->family) {
1914 case CHIP_BONAIRE:
1915 io_mc_regs = &bonaire_io_mc_regs[0][0];
1916 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1917 break;
1918 case CHIP_HAWAII:
1919 io_mc_regs = &hawaii_io_mc_regs[0][0];
1920 regs_size = HAWAII_IO_MC_REGS_SIZE;
1921 break;
1922 default:
1923 return -EINVAL;
1924 }
1925 fw_data = (const __be32 *)rdev->mc_fw->data;
1926 }
1927
1928 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1929
1930 if (running == 0) {
1931 /* reset the engine and set to writable */
1932 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1933 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1934
1935 /* load mc io regs */
1936 for (i = 0; i < regs_size; i++) {
1937 if (rdev->new_fw) {
1938 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1939 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1940 } else {
1941 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1942 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1943 }
1944 }
1945
1946 tmp = RREG32(MC_SEQ_MISC0);
1947 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1948 WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1949 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1950 WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1951 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1952 }
1953
1954 /* load the MC ucode */
1955 for (i = 0; i < ucode_size; i++) {
1956 if (rdev->new_fw)
1957 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1958 else
1959 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1960 }
1961
1962 /* put the engine back into the active state */
1963 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1964 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1965 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1966
1967 /* wait for training to complete */
1968 for (i = 0; i < rdev->usec_timeout; i++) {
1969 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1970 break;
1971 udelay(1);
1972 }
1973 for (i = 0; i < rdev->usec_timeout; i++) {
1974 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1975 break;
1976 udelay(1);
1977 }
1978 }
1979
1980 return 0;
1981 }
1982
1983 /**
1984 * cik_init_microcode - load ucode images from disk
1985 *
1986 * @rdev: radeon_device pointer
1987 *
1988 * Use the firmware interface to load the ucode images into
1989 * the driver (not loaded into hw).
1990 * Returns 0 on success, error on failure.
1991 */
cik_init_microcode(struct radeon_device * rdev)1992 static int cik_init_microcode(struct radeon_device *rdev)
1993 {
1994 const char *chip_name;
1995 const char *new_chip_name;
1996 size_t pfp_req_size, me_req_size, ce_req_size,
1997 mec_req_size, rlc_req_size, mc_req_size = 0,
1998 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1999 char fw_name[30];
2000 int new_fw = 0;
2001 int err;
2002 int num_fw;
2003 bool new_smc = false;
2004
2005 DRM_DEBUG("\n");
2006
2007 switch (rdev->family) {
2008 case CHIP_BONAIRE:
2009 chip_name = "BONAIRE";
2010 if ((rdev->pdev->revision == 0x80) ||
2011 (rdev->pdev->revision == 0x81) ||
2012 (rdev->pdev->device == 0x665f))
2013 new_smc = true;
2014 new_chip_name = "bonaire";
2015 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2016 me_req_size = CIK_ME_UCODE_SIZE * 4;
2017 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2018 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2019 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2020 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2021 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2022 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2023 smc_req_size = round_up(BONAIRE_SMC_UCODE_SIZE, 4);
2024 num_fw = 8;
2025 break;
2026 case CHIP_HAWAII:
2027 chip_name = "HAWAII";
2028 if (rdev->pdev->revision == 0x80)
2029 new_smc = true;
2030 new_chip_name = "hawaii";
2031 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2032 me_req_size = CIK_ME_UCODE_SIZE * 4;
2033 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2034 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2035 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2036 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2037 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2038 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2039 smc_req_size = round_up(HAWAII_SMC_UCODE_SIZE, 4);
2040 num_fw = 8;
2041 break;
2042 case CHIP_KAVERI:
2043 chip_name = "KAVERI";
2044 new_chip_name = "kaveri";
2045 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2046 me_req_size = CIK_ME_UCODE_SIZE * 4;
2047 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2048 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2049 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2050 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2051 num_fw = 7;
2052 break;
2053 case CHIP_KABINI:
2054 chip_name = "KABINI";
2055 new_chip_name = "kabini";
2056 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2057 me_req_size = CIK_ME_UCODE_SIZE * 4;
2058 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2059 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2060 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2061 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2062 num_fw = 6;
2063 break;
2064 case CHIP_MULLINS:
2065 chip_name = "MULLINS";
2066 new_chip_name = "mullins";
2067 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2068 me_req_size = CIK_ME_UCODE_SIZE * 4;
2069 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2070 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2071 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2072 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2073 num_fw = 6;
2074 break;
2075 default: BUG();
2076 }
2077
2078 DRM_INFO("Loading %s Microcode\n", new_chip_name);
2079
2080 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2081 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2082 if (err) {
2083 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2084 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2085 if (err)
2086 goto out;
2087 if (rdev->pfp_fw->size != pfp_req_size) {
2088 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2089 rdev->pfp_fw->size, fw_name);
2090 err = -EINVAL;
2091 goto out;
2092 }
2093 } else {
2094 err = radeon_ucode_validate(rdev->pfp_fw);
2095 if (err) {
2096 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2097 fw_name);
2098 goto out;
2099 } else {
2100 new_fw++;
2101 }
2102 }
2103
2104 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2105 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2106 if (err) {
2107 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2108 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2109 if (err)
2110 goto out;
2111 if (rdev->me_fw->size != me_req_size) {
2112 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2113 rdev->me_fw->size, fw_name);
2114 err = -EINVAL;
2115 }
2116 } else {
2117 err = radeon_ucode_validate(rdev->me_fw);
2118 if (err) {
2119 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2120 fw_name);
2121 goto out;
2122 } else {
2123 new_fw++;
2124 }
2125 }
2126
2127 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2128 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2129 if (err) {
2130 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2131 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2132 if (err)
2133 goto out;
2134 if (rdev->ce_fw->size != ce_req_size) {
2135 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2136 rdev->ce_fw->size, fw_name);
2137 err = -EINVAL;
2138 }
2139 } else {
2140 err = radeon_ucode_validate(rdev->ce_fw);
2141 if (err) {
2142 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2143 fw_name);
2144 goto out;
2145 } else {
2146 new_fw++;
2147 }
2148 }
2149
2150 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2151 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2152 if (err) {
2153 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2154 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2155 if (err)
2156 goto out;
2157 if (rdev->mec_fw->size != mec_req_size) {
2158 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2159 rdev->mec_fw->size, fw_name);
2160 err = -EINVAL;
2161 }
2162 } else {
2163 err = radeon_ucode_validate(rdev->mec_fw);
2164 if (err) {
2165 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2166 fw_name);
2167 goto out;
2168 } else {
2169 new_fw++;
2170 }
2171 }
2172
2173 if (rdev->family == CHIP_KAVERI) {
2174 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2175 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2176 if (err) {
2177 goto out;
2178 } else {
2179 err = radeon_ucode_validate(rdev->mec2_fw);
2180 if (err) {
2181 goto out;
2182 } else {
2183 new_fw++;
2184 }
2185 }
2186 }
2187
2188 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2189 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2190 if (err) {
2191 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2192 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2193 if (err)
2194 goto out;
2195 if (rdev->rlc_fw->size != rlc_req_size) {
2196 pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2197 rdev->rlc_fw->size, fw_name);
2198 err = -EINVAL;
2199 }
2200 } else {
2201 err = radeon_ucode_validate(rdev->rlc_fw);
2202 if (err) {
2203 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2204 fw_name);
2205 goto out;
2206 } else {
2207 new_fw++;
2208 }
2209 }
2210
2211 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2212 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2213 if (err) {
2214 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2215 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2216 if (err)
2217 goto out;
2218 if (rdev->sdma_fw->size != sdma_req_size) {
2219 pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2220 rdev->sdma_fw->size, fw_name);
2221 err = -EINVAL;
2222 }
2223 } else {
2224 err = radeon_ucode_validate(rdev->sdma_fw);
2225 if (err) {
2226 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2227 fw_name);
2228 goto out;
2229 } else {
2230 new_fw++;
2231 }
2232 }
2233
2234 /* No SMC, MC ucode on APUs */
2235 if (!(rdev->flags & RADEON_IS_IGP)) {
2236 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2237 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2238 if (err) {
2239 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2240 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2241 if (err) {
2242 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2243 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2244 if (err)
2245 goto out;
2246 }
2247 if ((rdev->mc_fw->size != mc_req_size) &&
2248 (rdev->mc_fw->size != mc2_req_size)){
2249 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2250 rdev->mc_fw->size, fw_name);
2251 err = -EINVAL;
2252 }
2253 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2254 } else {
2255 err = radeon_ucode_validate(rdev->mc_fw);
2256 if (err) {
2257 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2258 fw_name);
2259 goto out;
2260 } else {
2261 new_fw++;
2262 }
2263 }
2264
2265 if (new_smc)
2266 snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2267 else
2268 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2269 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2270 if (err) {
2271 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2272 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2273 if (err) {
2274 pr_err("smc: error loading firmware \"%s\"\n",
2275 fw_name);
2276 release_firmware(rdev->smc_fw);
2277 rdev->smc_fw = NULL;
2278 err = 0;
2279 } else if (rdev->smc_fw->size != smc_req_size) {
2280 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2281 rdev->smc_fw->size, fw_name);
2282 err = -EINVAL;
2283 }
2284 } else {
2285 err = radeon_ucode_validate(rdev->smc_fw);
2286 if (err) {
2287 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2288 fw_name);
2289 goto out;
2290 } else {
2291 new_fw++;
2292 }
2293 }
2294 }
2295
2296 if (new_fw == 0) {
2297 rdev->new_fw = false;
2298 } else if (new_fw < num_fw) {
2299 pr_err("ci_fw: mixing new and old firmware!\n");
2300 err = -EINVAL;
2301 } else {
2302 rdev->new_fw = true;
2303 }
2304
2305 out:
2306 if (err) {
2307 if (err != -EINVAL)
2308 pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2309 fw_name);
2310 release_firmware(rdev->pfp_fw);
2311 rdev->pfp_fw = NULL;
2312 release_firmware(rdev->me_fw);
2313 rdev->me_fw = NULL;
2314 release_firmware(rdev->ce_fw);
2315 rdev->ce_fw = NULL;
2316 release_firmware(rdev->mec_fw);
2317 rdev->mec_fw = NULL;
2318 release_firmware(rdev->mec2_fw);
2319 rdev->mec2_fw = NULL;
2320 release_firmware(rdev->rlc_fw);
2321 rdev->rlc_fw = NULL;
2322 release_firmware(rdev->sdma_fw);
2323 rdev->sdma_fw = NULL;
2324 release_firmware(rdev->mc_fw);
2325 rdev->mc_fw = NULL;
2326 release_firmware(rdev->smc_fw);
2327 rdev->smc_fw = NULL;
2328 }
2329 return err;
2330 }
2331
2332 /*
2333 * Core functions
2334 */
2335 /**
2336 * cik_tiling_mode_table_init - init the hw tiling table
2337 *
2338 * @rdev: radeon_device pointer
2339 *
2340 * Starting with SI, the tiling setup is done globally in a
2341 * set of 32 tiling modes. Rather than selecting each set of
2342 * parameters per surface as on older asics, we just select
2343 * which index in the tiling table we want to use, and the
2344 * surface uses those parameters (CIK).
2345 */
cik_tiling_mode_table_init(struct radeon_device * rdev)2346 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2347 {
2348 u32 *tile = rdev->config.cik.tile_mode_array;
2349 u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2350 const u32 num_tile_mode_states =
2351 ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2352 const u32 num_secondary_tile_mode_states =
2353 ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2354 u32 reg_offset, split_equal_to_row_size;
2355 u32 num_pipe_configs;
2356 u32 num_rbs = rdev->config.cik.max_backends_per_se *
2357 rdev->config.cik.max_shader_engines;
2358
2359 switch (rdev->config.cik.mem_row_size_in_kb) {
2360 case 1:
2361 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2362 break;
2363 case 2:
2364 default:
2365 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2366 break;
2367 case 4:
2368 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2369 break;
2370 }
2371
2372 num_pipe_configs = rdev->config.cik.max_tile_pipes;
2373 if (num_pipe_configs > 8)
2374 num_pipe_configs = 16;
2375
2376 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2377 tile[reg_offset] = 0;
2378 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2379 macrotile[reg_offset] = 0;
2380
2381 switch(num_pipe_configs) {
2382 case 16:
2383 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2384 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2385 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2387 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2388 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2389 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2391 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2393 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2395 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2397 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2399 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2401 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402 TILE_SPLIT(split_equal_to_row_size));
2403 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2406 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2408 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2410 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2411 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2412 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 TILE_SPLIT(split_equal_to_row_size));
2414 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2415 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2416 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2419 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2423 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2425 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2432 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2434 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2436 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2440 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2444 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2448 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2449 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2458 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2459 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461
2462 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465 NUM_BANKS(ADDR_SURF_16_BANK));
2466 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2469 NUM_BANKS(ADDR_SURF_16_BANK));
2470 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473 NUM_BANKS(ADDR_SURF_16_BANK));
2474 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477 NUM_BANKS(ADDR_SURF_16_BANK));
2478 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2481 NUM_BANKS(ADDR_SURF_8_BANK));
2482 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485 NUM_BANKS(ADDR_SURF_4_BANK));
2486 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2489 NUM_BANKS(ADDR_SURF_2_BANK));
2490 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2493 NUM_BANKS(ADDR_SURF_16_BANK));
2494 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2497 NUM_BANKS(ADDR_SURF_16_BANK));
2498 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2501 NUM_BANKS(ADDR_SURF_16_BANK));
2502 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505 NUM_BANKS(ADDR_SURF_8_BANK));
2506 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2509 NUM_BANKS(ADDR_SURF_4_BANK));
2510 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2513 NUM_BANKS(ADDR_SURF_2_BANK));
2514 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2517 NUM_BANKS(ADDR_SURF_2_BANK));
2518
2519 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2520 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2521 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2522 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2523 break;
2524
2525 case 8:
2526 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2528 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2530 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2531 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2532 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2534 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2536 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2538 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2540 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2542 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2544 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545 TILE_SPLIT(split_equal_to_row_size));
2546 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2549 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2551 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2553 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2554 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2555 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 TILE_SPLIT(split_equal_to_row_size));
2557 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2558 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2559 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2560 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2561 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2562 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2564 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2567 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2568 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2571 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2575 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2577 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2578 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2579 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2581 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2582 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2583 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2584 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2586 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2587 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2589 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2590 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2592 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2594 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2597 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2598 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2599 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2600 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2601 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2602 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2604
2605 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2608 NUM_BANKS(ADDR_SURF_16_BANK));
2609 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2612 NUM_BANKS(ADDR_SURF_16_BANK));
2613 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2615 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2616 NUM_BANKS(ADDR_SURF_16_BANK));
2617 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2619 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2620 NUM_BANKS(ADDR_SURF_16_BANK));
2621 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2622 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2623 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2624 NUM_BANKS(ADDR_SURF_8_BANK));
2625 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2627 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2628 NUM_BANKS(ADDR_SURF_4_BANK));
2629 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2631 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2632 NUM_BANKS(ADDR_SURF_2_BANK));
2633 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2636 NUM_BANKS(ADDR_SURF_16_BANK));
2637 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2639 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2640 NUM_BANKS(ADDR_SURF_16_BANK));
2641 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2643 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2644 NUM_BANKS(ADDR_SURF_16_BANK));
2645 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2647 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2648 NUM_BANKS(ADDR_SURF_16_BANK));
2649 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2651 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2652 NUM_BANKS(ADDR_SURF_8_BANK));
2653 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2655 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2656 NUM_BANKS(ADDR_SURF_4_BANK));
2657 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2660 NUM_BANKS(ADDR_SURF_2_BANK));
2661
2662 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2663 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2664 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2665 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2666 break;
2667
2668 case 4:
2669 if (num_rbs == 4) {
2670 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2672 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2674 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2676 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2677 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2678 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2680 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2681 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2682 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2683 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2684 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2685 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2686 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2688 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689 TILE_SPLIT(split_equal_to_row_size));
2690 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2694 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2695 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2697 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2698 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2699 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 TILE_SPLIT(split_equal_to_row_size));
2701 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2702 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2703 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2706 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2708 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2710 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2712 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2716 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2721 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2725 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2726 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2727 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2729 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2730 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2733 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2734 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2736 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2737 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2738 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2742 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2744 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2745 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2746 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2748
2749 } else if (num_rbs < 4) {
2750 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2751 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2752 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2753 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2754 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2755 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2756 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2757 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2758 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2760 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2761 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2762 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2764 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2765 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2766 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2767 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2768 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2769 TILE_SPLIT(split_equal_to_row_size));
2770 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2773 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2774 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2775 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2777 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2778 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2779 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 TILE_SPLIT(split_equal_to_row_size));
2781 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2782 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2783 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2784 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2785 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2786 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2788 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2789 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2790 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2791 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2792 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2795 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2796 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2799 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2800 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2801 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2803 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2804 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2805 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2806 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2807 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2810 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2811 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2812 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2813 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2814 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2815 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2816 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2817 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2818 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2819 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2820 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2821 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2822 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2824 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2825 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2826 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2827 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2828 }
2829
2830 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2832 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2833 NUM_BANKS(ADDR_SURF_16_BANK));
2834 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837 NUM_BANKS(ADDR_SURF_16_BANK));
2838 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2840 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2841 NUM_BANKS(ADDR_SURF_16_BANK));
2842 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845 NUM_BANKS(ADDR_SURF_16_BANK));
2846 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849 NUM_BANKS(ADDR_SURF_16_BANK));
2850 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2852 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2853 NUM_BANKS(ADDR_SURF_8_BANK));
2854 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2855 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2856 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2857 NUM_BANKS(ADDR_SURF_4_BANK));
2858 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2859 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2860 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2861 NUM_BANKS(ADDR_SURF_16_BANK));
2862 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2863 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865 NUM_BANKS(ADDR_SURF_16_BANK));
2866 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869 NUM_BANKS(ADDR_SURF_16_BANK));
2870 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2872 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2873 NUM_BANKS(ADDR_SURF_16_BANK));
2874 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2875 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2876 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2877 NUM_BANKS(ADDR_SURF_16_BANK));
2878 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2879 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2880 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2881 NUM_BANKS(ADDR_SURF_8_BANK));
2882 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2885 NUM_BANKS(ADDR_SURF_4_BANK));
2886
2887 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2888 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2889 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2890 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2891 break;
2892
2893 case 2:
2894 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2895 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2896 PIPE_CONFIG(ADDR_SURF_P2) |
2897 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2898 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2900 PIPE_CONFIG(ADDR_SURF_P2) |
2901 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2902 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2904 PIPE_CONFIG(ADDR_SURF_P2) |
2905 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2906 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2907 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2908 PIPE_CONFIG(ADDR_SURF_P2) |
2909 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2910 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2912 PIPE_CONFIG(ADDR_SURF_P2) |
2913 TILE_SPLIT(split_equal_to_row_size));
2914 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915 PIPE_CONFIG(ADDR_SURF_P2) |
2916 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2917 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2919 PIPE_CONFIG(ADDR_SURF_P2) |
2920 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2921 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2923 PIPE_CONFIG(ADDR_SURF_P2) |
2924 TILE_SPLIT(split_equal_to_row_size));
2925 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2926 PIPE_CONFIG(ADDR_SURF_P2);
2927 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2928 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2929 PIPE_CONFIG(ADDR_SURF_P2));
2930 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2931 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932 PIPE_CONFIG(ADDR_SURF_P2) |
2933 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2934 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2935 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936 PIPE_CONFIG(ADDR_SURF_P2) |
2937 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2939 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2940 PIPE_CONFIG(ADDR_SURF_P2) |
2941 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2943 PIPE_CONFIG(ADDR_SURF_P2) |
2944 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2945 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2947 PIPE_CONFIG(ADDR_SURF_P2) |
2948 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2951 PIPE_CONFIG(ADDR_SURF_P2) |
2952 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2953 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2955 PIPE_CONFIG(ADDR_SURF_P2) |
2956 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2957 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2959 PIPE_CONFIG(ADDR_SURF_P2));
2960 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2961 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2962 PIPE_CONFIG(ADDR_SURF_P2) |
2963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2965 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2966 PIPE_CONFIG(ADDR_SURF_P2) |
2967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2969 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2970 PIPE_CONFIG(ADDR_SURF_P2) |
2971 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2972
2973 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2974 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2975 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2976 NUM_BANKS(ADDR_SURF_16_BANK));
2977 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2980 NUM_BANKS(ADDR_SURF_16_BANK));
2981 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2982 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2983 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2984 NUM_BANKS(ADDR_SURF_16_BANK));
2985 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2986 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2987 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2988 NUM_BANKS(ADDR_SURF_16_BANK));
2989 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2990 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2991 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2992 NUM_BANKS(ADDR_SURF_16_BANK));
2993 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2994 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2995 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2996 NUM_BANKS(ADDR_SURF_16_BANK));
2997 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3000 NUM_BANKS(ADDR_SURF_8_BANK));
3001 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3002 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3003 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3004 NUM_BANKS(ADDR_SURF_16_BANK));
3005 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3006 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3007 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3008 NUM_BANKS(ADDR_SURF_16_BANK));
3009 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3010 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3011 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3012 NUM_BANKS(ADDR_SURF_16_BANK));
3013 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3014 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3015 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3016 NUM_BANKS(ADDR_SURF_16_BANK));
3017 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3018 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3019 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3020 NUM_BANKS(ADDR_SURF_16_BANK));
3021 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3023 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024 NUM_BANKS(ADDR_SURF_16_BANK));
3025 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3027 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3028 NUM_BANKS(ADDR_SURF_8_BANK));
3029
3030 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3031 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3032 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3033 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3034 break;
3035
3036 default:
3037 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3038 }
3039 }
3040
3041 /**
3042 * cik_select_se_sh - select which SE, SH to address
3043 *
3044 * @rdev: radeon_device pointer
3045 * @se_num: shader engine to address
3046 * @sh_num: sh block to address
3047 *
3048 * Select which SE, SH combinations to address. Certain
3049 * registers are instanced per SE or SH. 0xffffffff means
3050 * broadcast to all SEs or SHs (CIK).
3051 */
cik_select_se_sh(struct radeon_device * rdev,u32 se_num,u32 sh_num)3052 static void cik_select_se_sh(struct radeon_device *rdev,
3053 u32 se_num, u32 sh_num)
3054 {
3055 u32 data = INSTANCE_BROADCAST_WRITES;
3056
3057 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3058 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3059 else if (se_num == 0xffffffff)
3060 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3061 else if (sh_num == 0xffffffff)
3062 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3063 else
3064 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3065 WREG32(GRBM_GFX_INDEX, data);
3066 }
3067
3068 /**
3069 * cik_create_bitmask - create a bitmask
3070 *
3071 * @bit_width: length of the mask
3072 *
3073 * create a variable length bit mask (CIK).
3074 * Returns the bitmask.
3075 */
cik_create_bitmask(u32 bit_width)3076 static u32 cik_create_bitmask(u32 bit_width)
3077 {
3078 u32 i, mask = 0;
3079
3080 for (i = 0; i < bit_width; i++) {
3081 mask <<= 1;
3082 mask |= 1;
3083 }
3084 return mask;
3085 }
3086
3087 /**
3088 * cik_get_rb_disabled - computes the mask of disabled RBs
3089 *
3090 * @rdev: radeon_device pointer
3091 * @max_rb_num: max RBs (render backends) for the asic
3092 * @se_num: number of SEs (shader engines) for the asic
3093 * @sh_per_se: number of SH blocks per SE for the asic
3094 *
3095 * Calculates the bitmask of disabled RBs (CIK).
3096 * Returns the disabled RB bitmask.
3097 */
cik_get_rb_disabled(struct radeon_device * rdev,u32 max_rb_num_per_se,u32 sh_per_se)3098 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3099 u32 max_rb_num_per_se,
3100 u32 sh_per_se)
3101 {
3102 u32 data, mask;
3103
3104 data = RREG32(CC_RB_BACKEND_DISABLE);
3105 if (data & 1)
3106 data &= BACKEND_DISABLE_MASK;
3107 else
3108 data = 0;
3109 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3110
3111 data >>= BACKEND_DISABLE_SHIFT;
3112
3113 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3114
3115 return data & mask;
3116 }
3117
3118 /**
3119 * cik_setup_rb - setup the RBs on the asic
3120 *
3121 * @rdev: radeon_device pointer
3122 * @se_num: number of SEs (shader engines) for the asic
3123 * @sh_per_se: number of SH blocks per SE for the asic
3124 * @max_rb_num: max RBs (render backends) for the asic
3125 *
3126 * Configures per-SE/SH RB registers (CIK).
3127 */
cik_setup_rb(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 max_rb_num_per_se)3128 static void cik_setup_rb(struct radeon_device *rdev,
3129 u32 se_num, u32 sh_per_se,
3130 u32 max_rb_num_per_se)
3131 {
3132 int i, j;
3133 u32 data, mask;
3134 u32 disabled_rbs = 0;
3135 u32 enabled_rbs = 0;
3136
3137 for (i = 0; i < se_num; i++) {
3138 for (j = 0; j < sh_per_se; j++) {
3139 cik_select_se_sh(rdev, i, j);
3140 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3141 if (rdev->family == CHIP_HAWAII)
3142 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3143 else
3144 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3145 }
3146 }
3147 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3148
3149 mask = 1;
3150 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3151 if (!(disabled_rbs & mask))
3152 enabled_rbs |= mask;
3153 mask <<= 1;
3154 }
3155
3156 rdev->config.cik.backend_enable_mask = enabled_rbs;
3157
3158 for (i = 0; i < se_num; i++) {
3159 cik_select_se_sh(rdev, i, 0xffffffff);
3160 data = 0;
3161 for (j = 0; j < sh_per_se; j++) {
3162 switch (enabled_rbs & 3) {
3163 case 0:
3164 if (j == 0)
3165 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3166 else
3167 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3168 break;
3169 case 1:
3170 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3171 break;
3172 case 2:
3173 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3174 break;
3175 case 3:
3176 default:
3177 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3178 break;
3179 }
3180 enabled_rbs >>= 2;
3181 }
3182 WREG32(PA_SC_RASTER_CONFIG, data);
3183 }
3184 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3185 }
3186
3187 /**
3188 * cik_gpu_init - setup the 3D engine
3189 *
3190 * @rdev: radeon_device pointer
3191 *
3192 * Configures the 3D engine and tiling configuration
3193 * registers so that the 3D engine is usable.
3194 */
cik_gpu_init(struct radeon_device * rdev)3195 static void cik_gpu_init(struct radeon_device *rdev)
3196 {
3197 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3198 u32 mc_shared_chmap __unused, mc_arb_ramcfg;
3199 u32 hdp_host_path_cntl;
3200 u32 tmp;
3201 int i, j;
3202
3203 switch (rdev->family) {
3204 case CHIP_BONAIRE:
3205 rdev->config.cik.max_shader_engines = 2;
3206 rdev->config.cik.max_tile_pipes = 4;
3207 rdev->config.cik.max_cu_per_sh = 7;
3208 rdev->config.cik.max_sh_per_se = 1;
3209 rdev->config.cik.max_backends_per_se = 2;
3210 rdev->config.cik.max_texture_channel_caches = 4;
3211 rdev->config.cik.max_gprs = 256;
3212 rdev->config.cik.max_gs_threads = 32;
3213 rdev->config.cik.max_hw_contexts = 8;
3214
3215 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3216 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3217 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3218 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3219 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3220 break;
3221 case CHIP_HAWAII:
3222 rdev->config.cik.max_shader_engines = 4;
3223 rdev->config.cik.max_tile_pipes = 16;
3224 rdev->config.cik.max_cu_per_sh = 11;
3225 rdev->config.cik.max_sh_per_se = 1;
3226 rdev->config.cik.max_backends_per_se = 4;
3227 rdev->config.cik.max_texture_channel_caches = 16;
3228 rdev->config.cik.max_gprs = 256;
3229 rdev->config.cik.max_gs_threads = 32;
3230 rdev->config.cik.max_hw_contexts = 8;
3231
3232 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3233 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3234 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3235 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3236 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3237 break;
3238 case CHIP_KAVERI:
3239 rdev->config.cik.max_shader_engines = 1;
3240 rdev->config.cik.max_tile_pipes = 4;
3241 rdev->config.cik.max_cu_per_sh = 8;
3242 rdev->config.cik.max_backends_per_se = 2;
3243 rdev->config.cik.max_sh_per_se = 1;
3244 rdev->config.cik.max_texture_channel_caches = 4;
3245 rdev->config.cik.max_gprs = 256;
3246 rdev->config.cik.max_gs_threads = 16;
3247 rdev->config.cik.max_hw_contexts = 8;
3248
3249 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3250 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3251 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3252 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3253 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3254 break;
3255 case CHIP_KABINI:
3256 case CHIP_MULLINS:
3257 default:
3258 rdev->config.cik.max_shader_engines = 1;
3259 rdev->config.cik.max_tile_pipes = 2;
3260 rdev->config.cik.max_cu_per_sh = 2;
3261 rdev->config.cik.max_sh_per_se = 1;
3262 rdev->config.cik.max_backends_per_se = 1;
3263 rdev->config.cik.max_texture_channel_caches = 2;
3264 rdev->config.cik.max_gprs = 256;
3265 rdev->config.cik.max_gs_threads = 16;
3266 rdev->config.cik.max_hw_contexts = 8;
3267
3268 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3269 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3270 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3271 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3272 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3273 break;
3274 }
3275
3276 /* Initialize HDP */
3277 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3278 WREG32((0x2c14 + j), 0x00000000);
3279 WREG32((0x2c18 + j), 0x00000000);
3280 WREG32((0x2c1c + j), 0x00000000);
3281 WREG32((0x2c20 + j), 0x00000000);
3282 WREG32((0x2c24 + j), 0x00000000);
3283 }
3284
3285 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3286 WREG32(SRBM_INT_CNTL, 0x1);
3287 WREG32(SRBM_INT_ACK, 0x1);
3288
3289 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3290
3291 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3292 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3293
3294 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3295 rdev->config.cik.mem_max_burst_length_bytes = 256;
3296 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3297 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3298 if (rdev->config.cik.mem_row_size_in_kb > 4)
3299 rdev->config.cik.mem_row_size_in_kb = 4;
3300 /* XXX use MC settings? */
3301 rdev->config.cik.shader_engine_tile_size = 32;
3302 rdev->config.cik.num_gpus = 1;
3303 rdev->config.cik.multi_gpu_tile_size = 64;
3304
3305 /* fix up row size */
3306 gb_addr_config &= ~ROW_SIZE_MASK;
3307 switch (rdev->config.cik.mem_row_size_in_kb) {
3308 case 1:
3309 default:
3310 gb_addr_config |= ROW_SIZE(0);
3311 break;
3312 case 2:
3313 gb_addr_config |= ROW_SIZE(1);
3314 break;
3315 case 4:
3316 gb_addr_config |= ROW_SIZE(2);
3317 break;
3318 }
3319
3320 /* setup tiling info dword. gb_addr_config is not adequate since it does
3321 * not have bank info, so create a custom tiling dword.
3322 * bits 3:0 num_pipes
3323 * bits 7:4 num_banks
3324 * bits 11:8 group_size
3325 * bits 15:12 row_size
3326 */
3327 rdev->config.cik.tile_config = 0;
3328 switch (rdev->config.cik.num_tile_pipes) {
3329 case 1:
3330 rdev->config.cik.tile_config |= (0 << 0);
3331 break;
3332 case 2:
3333 rdev->config.cik.tile_config |= (1 << 0);
3334 break;
3335 case 4:
3336 rdev->config.cik.tile_config |= (2 << 0);
3337 break;
3338 case 8:
3339 default:
3340 /* XXX what about 12? */
3341 rdev->config.cik.tile_config |= (3 << 0);
3342 break;
3343 }
3344 rdev->config.cik.tile_config |=
3345 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3346 rdev->config.cik.tile_config |=
3347 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3348 rdev->config.cik.tile_config |=
3349 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3350
3351 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3352 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3353 WREG32(DMIF_ADDR_CALC, gb_addr_config);
3354 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3355 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3356 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3357 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3358 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3359
3360 cik_tiling_mode_table_init(rdev);
3361
3362 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3363 rdev->config.cik.max_sh_per_se,
3364 rdev->config.cik.max_backends_per_se);
3365
3366 rdev->config.cik.active_cus = 0;
3367 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3368 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3369 rdev->config.cik.active_cus +=
3370 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3371 }
3372 }
3373
3374 /* set HW defaults for 3D engine */
3375 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3376
3377 WREG32(SX_DEBUG_1, 0x20);
3378
3379 WREG32(TA_CNTL_AUX, 0x00010000);
3380
3381 tmp = RREG32(SPI_CONFIG_CNTL);
3382 tmp |= 0x03000000;
3383 WREG32(SPI_CONFIG_CNTL, tmp);
3384
3385 WREG32(SQ_CONFIG, 1);
3386
3387 WREG32(DB_DEBUG, 0);
3388
3389 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3390 tmp |= 0x00000400;
3391 WREG32(DB_DEBUG2, tmp);
3392
3393 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3394 tmp |= 0x00020200;
3395 WREG32(DB_DEBUG3, tmp);
3396
3397 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3398 tmp |= 0x00018208;
3399 WREG32(CB_HW_CONTROL, tmp);
3400
3401 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3402
3403 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3404 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3405 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3406 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3407
3408 WREG32(VGT_NUM_INSTANCES, 1);
3409
3410 WREG32(CP_PERFMON_CNTL, 0);
3411
3412 WREG32(SQ_CONFIG, 0);
3413
3414 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3415 FORCE_EOV_MAX_REZ_CNT(255)));
3416
3417 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3418 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3419
3420 WREG32(VGT_GS_VERTEX_REUSE, 16);
3421 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3422
3423 tmp = RREG32(HDP_MISC_CNTL);
3424 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3425 WREG32(HDP_MISC_CNTL, tmp);
3426
3427 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3428 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3429
3430 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3431 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3432
3433 udelay(50);
3434 }
3435
3436 /*
3437 * GPU scratch registers helpers function.
3438 */
3439 /**
3440 * cik_scratch_init - setup driver info for CP scratch regs
3441 *
3442 * @rdev: radeon_device pointer
3443 *
3444 * Set up the number and offset of the CP scratch registers.
3445 * NOTE: use of CP scratch registers is a legacy inferface and
3446 * is not used by default on newer asics (r6xx+). On newer asics,
3447 * memory buffers are used for fences rather than scratch regs.
3448 */
cik_scratch_init(struct radeon_device * rdev)3449 static void cik_scratch_init(struct radeon_device *rdev)
3450 {
3451 int i;
3452
3453 rdev->scratch.num_reg = 7;
3454 rdev->scratch.reg_base = SCRATCH_REG0;
3455 for (i = 0; i < rdev->scratch.num_reg; i++) {
3456 rdev->scratch.free[i] = true;
3457 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3458 }
3459 }
3460
3461 /**
3462 * cik_ring_test - basic gfx ring test
3463 *
3464 * @rdev: radeon_device pointer
3465 * @ring: radeon_ring structure holding ring information
3466 *
3467 * Allocate a scratch register and write to it using the gfx ring (CIK).
3468 * Provides a basic gfx ring test to verify that the ring is working.
3469 * Used by cik_cp_gfx_resume();
3470 * Returns 0 on success, error on failure.
3471 */
cik_ring_test(struct radeon_device * rdev,struct radeon_ring * ring)3472 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3473 {
3474 uint32_t scratch;
3475 uint32_t tmp = 0;
3476 unsigned i;
3477 int r;
3478
3479 r = radeon_scratch_get(rdev, &scratch);
3480 if (r) {
3481 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3482 return r;
3483 }
3484 WREG32(scratch, 0xCAFEDEAD);
3485 r = radeon_ring_lock(rdev, ring, 3);
3486 if (r) {
3487 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3488 radeon_scratch_free(rdev, scratch);
3489 return r;
3490 }
3491 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3492 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3493 radeon_ring_write(ring, 0xDEADBEEF);
3494 radeon_ring_unlock_commit(rdev, ring, false);
3495
3496 for (i = 0; i < rdev->usec_timeout; i++) {
3497 tmp = RREG32(scratch);
3498 if (tmp == 0xDEADBEEF)
3499 break;
3500 udelay(1);
3501 }
3502 if (i < rdev->usec_timeout) {
3503 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3504 } else {
3505 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3506 ring->idx, scratch, tmp);
3507 r = -EINVAL;
3508 }
3509 radeon_scratch_free(rdev, scratch);
3510 return r;
3511 }
3512
3513 /**
3514 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3515 *
3516 * @rdev: radeon_device pointer
3517 * @ridx: radeon ring index
3518 *
3519 * Emits an hdp flush on the cp.
3520 */
cik_hdp_flush_cp_ring_emit(struct radeon_device * rdev,int ridx)3521 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3522 int ridx)
3523 {
3524 struct radeon_ring *ring = &rdev->ring[ridx];
3525 u32 ref_and_mask;
3526
3527 switch (ring->idx) {
3528 case CAYMAN_RING_TYPE_CP1_INDEX:
3529 case CAYMAN_RING_TYPE_CP2_INDEX:
3530 default:
3531 switch (ring->me) {
3532 case 0:
3533 ref_and_mask = CP2 << ring->pipe;
3534 break;
3535 case 1:
3536 ref_and_mask = CP6 << ring->pipe;
3537 break;
3538 default:
3539 return;
3540 }
3541 break;
3542 case RADEON_RING_TYPE_GFX_INDEX:
3543 ref_and_mask = CP0;
3544 break;
3545 }
3546
3547 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3548 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3549 WAIT_REG_MEM_FUNCTION(3) | /* == */
3550 WAIT_REG_MEM_ENGINE(1))); /* pfp */
3551 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3552 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3553 radeon_ring_write(ring, ref_and_mask);
3554 radeon_ring_write(ring, ref_and_mask);
3555 radeon_ring_write(ring, 0x20); /* poll interval */
3556 }
3557
3558 /**
3559 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3560 *
3561 * @rdev: radeon_device pointer
3562 * @fence: radeon fence object
3563 *
3564 * Emits a fence sequnce number on the gfx ring and flushes
3565 * GPU caches.
3566 */
cik_fence_gfx_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3567 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3568 struct radeon_fence *fence)
3569 {
3570 struct radeon_ring *ring = &rdev->ring[fence->ring];
3571 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3572
3573 /* Workaround for cache flush problems. First send a dummy EOP
3574 * event down the pipe with seq one below.
3575 */
3576 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3577 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3578 EOP_TC_ACTION_EN |
3579 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3580 EVENT_INDEX(5)));
3581 radeon_ring_write(ring, addr & 0xfffffffc);
3582 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3583 DATA_SEL(1) | INT_SEL(0));
3584 radeon_ring_write(ring, fence->seq - 1);
3585 radeon_ring_write(ring, 0);
3586
3587 /* Then send the real EOP event down the pipe. */
3588 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3589 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3590 EOP_TC_ACTION_EN |
3591 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3592 EVENT_INDEX(5)));
3593 radeon_ring_write(ring, addr & 0xfffffffc);
3594 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3595 radeon_ring_write(ring, fence->seq);
3596 radeon_ring_write(ring, 0);
3597 }
3598
3599 /**
3600 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3601 *
3602 * @rdev: radeon_device pointer
3603 * @fence: radeon fence object
3604 *
3605 * Emits a fence sequnce number on the compute ring and flushes
3606 * GPU caches.
3607 */
cik_fence_compute_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3608 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3609 struct radeon_fence *fence)
3610 {
3611 struct radeon_ring *ring = &rdev->ring[fence->ring];
3612 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3613
3614 /* RELEASE_MEM - flush caches, send int */
3615 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3616 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3617 EOP_TC_ACTION_EN |
3618 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3619 EVENT_INDEX(5)));
3620 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3621 radeon_ring_write(ring, addr & 0xfffffffc);
3622 radeon_ring_write(ring, upper_32_bits(addr));
3623 radeon_ring_write(ring, fence->seq);
3624 radeon_ring_write(ring, 0);
3625 }
3626
3627 /**
3628 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3629 *
3630 * @rdev: radeon_device pointer
3631 * @ring: radeon ring buffer object
3632 * @semaphore: radeon semaphore object
3633 * @emit_wait: Is this a sempahore wait?
3634 *
3635 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3636 * from running ahead of semaphore waits.
3637 */
cik_semaphore_ring_emit(struct radeon_device * rdev,struct radeon_ring * ring,struct radeon_semaphore * semaphore,bool emit_wait)3638 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3639 struct radeon_ring *ring,
3640 struct radeon_semaphore *semaphore,
3641 bool emit_wait)
3642 {
3643 uint64_t addr = semaphore->gpu_addr;
3644 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3645
3646 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3647 radeon_ring_write(ring, lower_32_bits(addr));
3648 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3649
3650 if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3651 /* Prevent the PFP from running ahead of the semaphore wait */
3652 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3653 radeon_ring_write(ring, 0x0);
3654 }
3655
3656 return true;
3657 }
3658
3659 /**
3660 * cik_copy_cpdma - copy pages using the CP DMA engine
3661 *
3662 * @rdev: radeon_device pointer
3663 * @src_offset: src GPU address
3664 * @dst_offset: dst GPU address
3665 * @num_gpu_pages: number of GPU pages to xfer
3666 * @resv: reservation object to sync to
3667 *
3668 * Copy GPU paging using the CP DMA engine (CIK+).
3669 * Used by the radeon ttm implementation to move pages if
3670 * registered as the asic copy callback.
3671 */
cik_copy_cpdma(struct radeon_device * rdev,uint64_t src_offset,uint64_t dst_offset,unsigned num_gpu_pages,struct dma_resv * resv)3672 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3673 uint64_t src_offset, uint64_t dst_offset,
3674 unsigned num_gpu_pages,
3675 struct dma_resv *resv)
3676 {
3677 struct radeon_fence *fence;
3678 struct radeon_sync sync;
3679 int ring_index = rdev->asic->copy.blit_ring_index;
3680 struct radeon_ring *ring = &rdev->ring[ring_index];
3681 u32 size_in_bytes, cur_size_in_bytes, control;
3682 int i, num_loops;
3683 int r = 0;
3684
3685 radeon_sync_create(&sync);
3686
3687 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3688 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3689 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3690 if (r) {
3691 DRM_ERROR("radeon: moving bo (%d).\n", r);
3692 radeon_sync_free(rdev, &sync, NULL);
3693 return ERR_PTR(r);
3694 }
3695
3696 radeon_sync_resv(rdev, &sync, resv, false);
3697 radeon_sync_rings(rdev, &sync, ring->idx);
3698
3699 for (i = 0; i < num_loops; i++) {
3700 cur_size_in_bytes = size_in_bytes;
3701 if (cur_size_in_bytes > 0x1fffff)
3702 cur_size_in_bytes = 0x1fffff;
3703 size_in_bytes -= cur_size_in_bytes;
3704 control = 0;
3705 if (size_in_bytes == 0)
3706 control |= PACKET3_DMA_DATA_CP_SYNC;
3707 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3708 radeon_ring_write(ring, control);
3709 radeon_ring_write(ring, lower_32_bits(src_offset));
3710 radeon_ring_write(ring, upper_32_bits(src_offset));
3711 radeon_ring_write(ring, lower_32_bits(dst_offset));
3712 radeon_ring_write(ring, upper_32_bits(dst_offset));
3713 radeon_ring_write(ring, cur_size_in_bytes);
3714 src_offset += cur_size_in_bytes;
3715 dst_offset += cur_size_in_bytes;
3716 }
3717
3718 r = radeon_fence_emit(rdev, &fence, ring->idx);
3719 if (r) {
3720 radeon_ring_unlock_undo(rdev, ring);
3721 radeon_sync_free(rdev, &sync, NULL);
3722 return ERR_PTR(r);
3723 }
3724
3725 radeon_ring_unlock_commit(rdev, ring, false);
3726 radeon_sync_free(rdev, &sync, fence);
3727
3728 return fence;
3729 }
3730
3731 /*
3732 * IB stuff
3733 */
3734 /**
3735 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3736 *
3737 * @rdev: radeon_device pointer
3738 * @ib: radeon indirect buffer object
3739 *
3740 * Emits a DE (drawing engine) or CE (constant engine) IB
3741 * on the gfx ring. IBs are usually generated by userspace
3742 * acceleration drivers and submitted to the kernel for
3743 * scheduling on the ring. This function schedules the IB
3744 * on the gfx ring for execution by the GPU.
3745 */
cik_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)3746 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3747 {
3748 struct radeon_ring *ring = &rdev->ring[ib->ring];
3749 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3750 u32 header, control = INDIRECT_BUFFER_VALID;
3751
3752 if (ib->is_const_ib) {
3753 /* set switch buffer packet before const IB */
3754 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3755 radeon_ring_write(ring, 0);
3756
3757 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3758 } else {
3759 u32 next_rptr;
3760 if (ring->rptr_save_reg) {
3761 next_rptr = ring->wptr + 3 + 4;
3762 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3763 radeon_ring_write(ring, ((ring->rptr_save_reg -
3764 PACKET3_SET_UCONFIG_REG_START) >> 2));
3765 radeon_ring_write(ring, next_rptr);
3766 } else if (rdev->wb.enabled) {
3767 next_rptr = ring->wptr + 5 + 4;
3768 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3769 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3770 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3771 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3772 radeon_ring_write(ring, next_rptr);
3773 }
3774
3775 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3776 }
3777
3778 control |= ib->length_dw | (vm_id << 24);
3779
3780 radeon_ring_write(ring, header);
3781 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3782 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3783 radeon_ring_write(ring, control);
3784 }
3785
3786 /**
3787 * cik_ib_test - basic gfx ring IB test
3788 *
3789 * @rdev: radeon_device pointer
3790 * @ring: radeon_ring structure holding ring information
3791 *
3792 * Allocate an IB and execute it on the gfx ring (CIK).
3793 * Provides a basic gfx ring test to verify that IBs are working.
3794 * Returns 0 on success, error on failure.
3795 */
cik_ib_test(struct radeon_device * rdev,struct radeon_ring * ring)3796 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3797 {
3798 struct radeon_ib ib;
3799 uint32_t scratch;
3800 uint32_t tmp = 0;
3801 unsigned i;
3802 int r;
3803
3804 r = radeon_scratch_get(rdev, &scratch);
3805 if (r) {
3806 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3807 return r;
3808 }
3809 WREG32(scratch, 0xCAFEDEAD);
3810 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3811 if (r) {
3812 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3813 radeon_scratch_free(rdev, scratch);
3814 return r;
3815 }
3816 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3817 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3818 ib.ptr[2] = 0xDEADBEEF;
3819 ib.length_dw = 3;
3820 r = radeon_ib_schedule(rdev, &ib, NULL, false);
3821 if (r) {
3822 radeon_scratch_free(rdev, scratch);
3823 radeon_ib_free(rdev, &ib);
3824 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3825 return r;
3826 }
3827 r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3828 RADEON_USEC_IB_TEST_TIMEOUT));
3829 if (r < 0) {
3830 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3831 radeon_scratch_free(rdev, scratch);
3832 radeon_ib_free(rdev, &ib);
3833 return r;
3834 } else if (r == 0) {
3835 DRM_ERROR("radeon: fence wait timed out.\n");
3836 radeon_scratch_free(rdev, scratch);
3837 radeon_ib_free(rdev, &ib);
3838 return -ETIMEDOUT;
3839 }
3840 r = 0;
3841 for (i = 0; i < rdev->usec_timeout; i++) {
3842 tmp = RREG32(scratch);
3843 if (tmp == 0xDEADBEEF)
3844 break;
3845 udelay(1);
3846 }
3847 if (i < rdev->usec_timeout) {
3848 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3849 } else {
3850 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3851 scratch, tmp);
3852 r = -EINVAL;
3853 }
3854 radeon_scratch_free(rdev, scratch);
3855 radeon_ib_free(rdev, &ib);
3856 return r;
3857 }
3858
3859 /*
3860 * CP.
3861 * On CIK, gfx and compute now have independant command processors.
3862 *
3863 * GFX
3864 * Gfx consists of a single ring and can process both gfx jobs and
3865 * compute jobs. The gfx CP consists of three microengines (ME):
3866 * PFP - Pre-Fetch Parser
3867 * ME - Micro Engine
3868 * CE - Constant Engine
3869 * The PFP and ME make up what is considered the Drawing Engine (DE).
3870 * The CE is an asynchronous engine used for updating buffer desciptors
3871 * used by the DE so that they can be loaded into cache in parallel
3872 * while the DE is processing state update packets.
3873 *
3874 * Compute
3875 * The compute CP consists of two microengines (ME):
3876 * MEC1 - Compute MicroEngine 1
3877 * MEC2 - Compute MicroEngine 2
3878 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3879 * The queues are exposed to userspace and are programmed directly
3880 * by the compute runtime.
3881 */
3882 /**
3883 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3884 *
3885 * @rdev: radeon_device pointer
3886 * @enable: enable or disable the MEs
3887 *
3888 * Halts or unhalts the gfx MEs.
3889 */
cik_cp_gfx_enable(struct radeon_device * rdev,bool enable)3890 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3891 {
3892 if (enable)
3893 WREG32(CP_ME_CNTL, 0);
3894 else {
3895 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3896 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3897 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3898 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3899 }
3900 udelay(50);
3901 }
3902
3903 /**
3904 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3905 *
3906 * @rdev: radeon_device pointer
3907 *
3908 * Loads the gfx PFP, ME, and CE ucode.
3909 * Returns 0 for success, -EINVAL if the ucode is not available.
3910 */
cik_cp_gfx_load_microcode(struct radeon_device * rdev)3911 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3912 {
3913 int i;
3914
3915 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3916 return -EINVAL;
3917
3918 cik_cp_gfx_enable(rdev, false);
3919
3920 if (rdev->new_fw) {
3921 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3922 (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3923 const struct gfx_firmware_header_v1_0 *ce_hdr =
3924 (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3925 const struct gfx_firmware_header_v1_0 *me_hdr =
3926 (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3927 const __le32 *fw_data;
3928 u32 fw_size;
3929
3930 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3931 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3932 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3933
3934 /* PFP */
3935 fw_data = (const __le32 *)
3936 (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3937 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3938 WREG32(CP_PFP_UCODE_ADDR, 0);
3939 for (i = 0; i < fw_size; i++)
3940 WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3941 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3942
3943 /* CE */
3944 fw_data = (const __le32 *)
3945 (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3946 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3947 WREG32(CP_CE_UCODE_ADDR, 0);
3948 for (i = 0; i < fw_size; i++)
3949 WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3950 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3951
3952 /* ME */
3953 fw_data = (const __be32 *)
3954 (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3955 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3956 WREG32(CP_ME_RAM_WADDR, 0);
3957 for (i = 0; i < fw_size; i++)
3958 WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3959 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3960 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3961 } else {
3962 const __be32 *fw_data;
3963
3964 /* PFP */
3965 fw_data = (const __be32 *)rdev->pfp_fw->data;
3966 WREG32(CP_PFP_UCODE_ADDR, 0);
3967 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3968 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3969 WREG32(CP_PFP_UCODE_ADDR, 0);
3970
3971 /* CE */
3972 fw_data = (const __be32 *)rdev->ce_fw->data;
3973 WREG32(CP_CE_UCODE_ADDR, 0);
3974 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3975 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3976 WREG32(CP_CE_UCODE_ADDR, 0);
3977
3978 /* ME */
3979 fw_data = (const __be32 *)rdev->me_fw->data;
3980 WREG32(CP_ME_RAM_WADDR, 0);
3981 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3982 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3983 WREG32(CP_ME_RAM_WADDR, 0);
3984 }
3985
3986 return 0;
3987 }
3988
3989 /**
3990 * cik_cp_gfx_start - start the gfx ring
3991 *
3992 * @rdev: radeon_device pointer
3993 *
3994 * Enables the ring and loads the clear state context and other
3995 * packets required to init the ring.
3996 * Returns 0 for success, error for failure.
3997 */
cik_cp_gfx_start(struct radeon_device * rdev)3998 static int cik_cp_gfx_start(struct radeon_device *rdev)
3999 {
4000 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4001 int r, i;
4002
4003 /* init the CP */
4004 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4005 WREG32(CP_ENDIAN_SWAP, 0);
4006 WREG32(CP_DEVICE_ID, 1);
4007
4008 cik_cp_gfx_enable(rdev, true);
4009
4010 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4011 if (r) {
4012 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4013 return r;
4014 }
4015
4016 /* init the CE partitions. CE only used for gfx on CIK */
4017 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4018 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4019 radeon_ring_write(ring, 0x8000);
4020 radeon_ring_write(ring, 0x8000);
4021
4022 /* setup clear context state */
4023 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4024 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4025
4026 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4027 radeon_ring_write(ring, 0x80000000);
4028 radeon_ring_write(ring, 0x80000000);
4029
4030 for (i = 0; i < cik_default_size; i++)
4031 radeon_ring_write(ring, cik_default_state[i]);
4032
4033 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4034 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4035
4036 /* set clear context state */
4037 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4038 radeon_ring_write(ring, 0);
4039
4040 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4041 radeon_ring_write(ring, 0x00000316);
4042 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4043 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4044
4045 radeon_ring_unlock_commit(rdev, ring, false);
4046
4047 return 0;
4048 }
4049
4050 /**
4051 * cik_cp_gfx_fini - stop the gfx ring
4052 *
4053 * @rdev: radeon_device pointer
4054 *
4055 * Stop the gfx ring and tear down the driver ring
4056 * info.
4057 */
cik_cp_gfx_fini(struct radeon_device * rdev)4058 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4059 {
4060 cik_cp_gfx_enable(rdev, false);
4061 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4062 }
4063
4064 /**
4065 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4066 *
4067 * @rdev: radeon_device pointer
4068 *
4069 * Program the location and size of the gfx ring buffer
4070 * and test it to make sure it's working.
4071 * Returns 0 for success, error for failure.
4072 */
cik_cp_gfx_resume(struct radeon_device * rdev)4073 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4074 {
4075 struct radeon_ring *ring;
4076 u32 tmp;
4077 u32 rb_bufsz;
4078 u64 rb_addr;
4079 int r;
4080
4081 WREG32(CP_SEM_WAIT_TIMER, 0x0);
4082 if (rdev->family != CHIP_HAWAII)
4083 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4084
4085 /* Set the write pointer delay */
4086 WREG32(CP_RB_WPTR_DELAY, 0);
4087
4088 /* set the RB to use vmid 0 */
4089 WREG32(CP_RB_VMID, 0);
4090
4091 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4092
4093 /* ring 0 - compute and gfx */
4094 /* Set ring buffer size */
4095 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4096 rb_bufsz = order_base_2(ring->ring_size / 8);
4097 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4098 #ifdef __BIG_ENDIAN
4099 tmp |= BUF_SWAP_32BIT;
4100 #endif
4101 WREG32(CP_RB0_CNTL, tmp);
4102
4103 /* Initialize the ring buffer's read and write pointers */
4104 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4105 ring->wptr = 0;
4106 WREG32(CP_RB0_WPTR, ring->wptr);
4107
4108 /* set the wb address wether it's enabled or not */
4109 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4110 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4111
4112 /* scratch register shadowing is no longer supported */
4113 WREG32(SCRATCH_UMSK, 0);
4114
4115 if (!rdev->wb.enabled)
4116 tmp |= RB_NO_UPDATE;
4117
4118 mdelay(1);
4119 WREG32(CP_RB0_CNTL, tmp);
4120
4121 rb_addr = ring->gpu_addr >> 8;
4122 WREG32(CP_RB0_BASE, rb_addr);
4123 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4124
4125 /* start the ring */
4126 cik_cp_gfx_start(rdev);
4127 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4128 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4129 if (r) {
4130 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4131 return r;
4132 }
4133
4134 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4135 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4136
4137 return 0;
4138 }
4139
cik_gfx_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4140 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4141 struct radeon_ring *ring)
4142 {
4143 u32 rptr;
4144
4145 if (rdev->wb.enabled)
4146 rptr = rdev->wb.wb[ring->rptr_offs/4];
4147 else
4148 rptr = RREG32(CP_RB0_RPTR);
4149
4150 return rptr;
4151 }
4152
cik_gfx_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4153 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4154 struct radeon_ring *ring)
4155 {
4156 return RREG32(CP_RB0_WPTR);
4157 }
4158
cik_gfx_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4159 void cik_gfx_set_wptr(struct radeon_device *rdev,
4160 struct radeon_ring *ring)
4161 {
4162 WREG32(CP_RB0_WPTR, ring->wptr);
4163 (void)RREG32(CP_RB0_WPTR);
4164 }
4165
cik_compute_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4166 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4167 struct radeon_ring *ring)
4168 {
4169 u32 rptr;
4170
4171 if (rdev->wb.enabled) {
4172 rptr = rdev->wb.wb[ring->rptr_offs/4];
4173 } else {
4174 mutex_lock(&rdev->srbm_mutex);
4175 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4176 rptr = RREG32(CP_HQD_PQ_RPTR);
4177 cik_srbm_select(rdev, 0, 0, 0, 0);
4178 mutex_unlock(&rdev->srbm_mutex);
4179 }
4180
4181 return rptr;
4182 }
4183
cik_compute_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4184 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4185 struct radeon_ring *ring)
4186 {
4187 u32 wptr;
4188
4189 if (rdev->wb.enabled) {
4190 /* XXX check if swapping is necessary on BE */
4191 wptr = rdev->wb.wb[ring->wptr_offs/4];
4192 } else {
4193 mutex_lock(&rdev->srbm_mutex);
4194 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4195 wptr = RREG32(CP_HQD_PQ_WPTR);
4196 cik_srbm_select(rdev, 0, 0, 0, 0);
4197 mutex_unlock(&rdev->srbm_mutex);
4198 }
4199
4200 return wptr;
4201 }
4202
cik_compute_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4203 void cik_compute_set_wptr(struct radeon_device *rdev,
4204 struct radeon_ring *ring)
4205 {
4206 /* XXX check if swapping is necessary on BE */
4207 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4208 WDOORBELL32(ring->doorbell_index, ring->wptr);
4209 }
4210
cik_compute_stop(struct radeon_device * rdev,struct radeon_ring * ring)4211 static void cik_compute_stop(struct radeon_device *rdev,
4212 struct radeon_ring *ring)
4213 {
4214 u32 j, tmp;
4215
4216 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4217 /* Disable wptr polling. */
4218 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4219 tmp &= ~WPTR_POLL_EN;
4220 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4221 /* Disable HQD. */
4222 if (RREG32(CP_HQD_ACTIVE) & 1) {
4223 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4224 for (j = 0; j < rdev->usec_timeout; j++) {
4225 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4226 break;
4227 udelay(1);
4228 }
4229 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4230 WREG32(CP_HQD_PQ_RPTR, 0);
4231 WREG32(CP_HQD_PQ_WPTR, 0);
4232 }
4233 cik_srbm_select(rdev, 0, 0, 0, 0);
4234 }
4235
4236 /**
4237 * cik_cp_compute_enable - enable/disable the compute CP MEs
4238 *
4239 * @rdev: radeon_device pointer
4240 * @enable: enable or disable the MEs
4241 *
4242 * Halts or unhalts the compute MEs.
4243 */
cik_cp_compute_enable(struct radeon_device * rdev,bool enable)4244 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4245 {
4246 if (enable)
4247 WREG32(CP_MEC_CNTL, 0);
4248 else {
4249 /*
4250 * To make hibernation reliable we need to clear compute ring
4251 * configuration before halting the compute ring.
4252 */
4253 mutex_lock(&rdev->srbm_mutex);
4254 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4255 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4256 mutex_unlock(&rdev->srbm_mutex);
4257
4258 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4259 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4260 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4261 }
4262 udelay(50);
4263 }
4264
4265 /**
4266 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4267 *
4268 * @rdev: radeon_device pointer
4269 *
4270 * Loads the compute MEC1&2 ucode.
4271 * Returns 0 for success, -EINVAL if the ucode is not available.
4272 */
cik_cp_compute_load_microcode(struct radeon_device * rdev)4273 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4274 {
4275 int i;
4276
4277 if (!rdev->mec_fw)
4278 return -EINVAL;
4279
4280 cik_cp_compute_enable(rdev, false);
4281
4282 if (rdev->new_fw) {
4283 const struct gfx_firmware_header_v1_0 *mec_hdr =
4284 (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4285 const __le32 *fw_data;
4286 u32 fw_size;
4287
4288 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4289
4290 /* MEC1 */
4291 fw_data = (const __le32 *)
4292 (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4293 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4294 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4295 for (i = 0; i < fw_size; i++)
4296 WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4297 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4298
4299 /* MEC2 */
4300 if (rdev->family == CHIP_KAVERI) {
4301 const struct gfx_firmware_header_v1_0 *mec2_hdr =
4302 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4303
4304 fw_data = (const __le32 *)
4305 (rdev->mec2_fw->data +
4306 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4307 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4308 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4309 for (i = 0; i < fw_size; i++)
4310 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4311 WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4312 }
4313 } else {
4314 const __be32 *fw_data;
4315
4316 /* MEC1 */
4317 fw_data = (const __be32 *)rdev->mec_fw->data;
4318 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4319 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4320 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4321 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4322
4323 if (rdev->family == CHIP_KAVERI) {
4324 /* MEC2 */
4325 fw_data = (const __be32 *)rdev->mec_fw->data;
4326 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4327 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4328 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4329 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4330 }
4331 }
4332
4333 return 0;
4334 }
4335
4336 /**
4337 * cik_cp_compute_start - start the compute queues
4338 *
4339 * @rdev: radeon_device pointer
4340 *
4341 * Enable the compute queues.
4342 * Returns 0 for success, error for failure.
4343 */
cik_cp_compute_start(struct radeon_device * rdev)4344 static int cik_cp_compute_start(struct radeon_device *rdev)
4345 {
4346 cik_cp_compute_enable(rdev, true);
4347
4348 return 0;
4349 }
4350
4351 /**
4352 * cik_cp_compute_fini - stop the compute queues
4353 *
4354 * @rdev: radeon_device pointer
4355 *
4356 * Stop the compute queues and tear down the driver queue
4357 * info.
4358 */
cik_cp_compute_fini(struct radeon_device * rdev)4359 static void cik_cp_compute_fini(struct radeon_device *rdev)
4360 {
4361 int i, idx, r;
4362
4363 cik_cp_compute_enable(rdev, false);
4364
4365 for (i = 0; i < 2; i++) {
4366 if (i == 0)
4367 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4368 else
4369 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4370
4371 if (rdev->ring[idx].mqd_obj) {
4372 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4373 if (unlikely(r != 0))
4374 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4375
4376 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4377 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4378
4379 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4380 rdev->ring[idx].mqd_obj = NULL;
4381 }
4382 }
4383 }
4384
cik_mec_fini(struct radeon_device * rdev)4385 static void cik_mec_fini(struct radeon_device *rdev)
4386 {
4387 int r;
4388
4389 if (rdev->mec.hpd_eop_obj) {
4390 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4391 if (unlikely(r != 0))
4392 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4393 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4394 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4395
4396 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4397 rdev->mec.hpd_eop_obj = NULL;
4398 }
4399 }
4400
4401 #define MEC_HPD_SIZE 2048
4402
cik_mec_init(struct radeon_device * rdev)4403 static int cik_mec_init(struct radeon_device *rdev)
4404 {
4405 int r;
4406 u32 *hpd;
4407
4408 /*
4409 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4410 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4411 */
4412 if (rdev->family == CHIP_KAVERI)
4413 rdev->mec.num_mec = 2;
4414 else
4415 rdev->mec.num_mec = 1;
4416 rdev->mec.num_pipe = 4;
4417 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4418
4419 if (rdev->mec.hpd_eop_obj == NULL) {
4420 r = radeon_bo_create(rdev,
4421 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4422 PAGE_SIZE, true,
4423 RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4424 &rdev->mec.hpd_eop_obj);
4425 if (r) {
4426 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4427 return r;
4428 }
4429 }
4430
4431 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4432 if (unlikely(r != 0)) {
4433 cik_mec_fini(rdev);
4434 return r;
4435 }
4436 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4437 &rdev->mec.hpd_eop_gpu_addr);
4438 if (r) {
4439 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4440 cik_mec_fini(rdev);
4441 return r;
4442 }
4443 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4444 if (r) {
4445 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4446 cik_mec_fini(rdev);
4447 return r;
4448 }
4449
4450 /* clear memory. Not sure if this is required or not */
4451 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4452
4453 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4454 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4455
4456 return 0;
4457 }
4458
4459 struct hqd_registers
4460 {
4461 u32 cp_mqd_base_addr;
4462 u32 cp_mqd_base_addr_hi;
4463 u32 cp_hqd_active;
4464 u32 cp_hqd_vmid;
4465 u32 cp_hqd_persistent_state;
4466 u32 cp_hqd_pipe_priority;
4467 u32 cp_hqd_queue_priority;
4468 u32 cp_hqd_quantum;
4469 u32 cp_hqd_pq_base;
4470 u32 cp_hqd_pq_base_hi;
4471 u32 cp_hqd_pq_rptr;
4472 u32 cp_hqd_pq_rptr_report_addr;
4473 u32 cp_hqd_pq_rptr_report_addr_hi;
4474 u32 cp_hqd_pq_wptr_poll_addr;
4475 u32 cp_hqd_pq_wptr_poll_addr_hi;
4476 u32 cp_hqd_pq_doorbell_control;
4477 u32 cp_hqd_pq_wptr;
4478 u32 cp_hqd_pq_control;
4479 u32 cp_hqd_ib_base_addr;
4480 u32 cp_hqd_ib_base_addr_hi;
4481 u32 cp_hqd_ib_rptr;
4482 u32 cp_hqd_ib_control;
4483 u32 cp_hqd_iq_timer;
4484 u32 cp_hqd_iq_rptr;
4485 u32 cp_hqd_dequeue_request;
4486 u32 cp_hqd_dma_offload;
4487 u32 cp_hqd_sema_cmd;
4488 u32 cp_hqd_msg_type;
4489 u32 cp_hqd_atomic0_preop_lo;
4490 u32 cp_hqd_atomic0_preop_hi;
4491 u32 cp_hqd_atomic1_preop_lo;
4492 u32 cp_hqd_atomic1_preop_hi;
4493 u32 cp_hqd_hq_scheduler0;
4494 u32 cp_hqd_hq_scheduler1;
4495 u32 cp_mqd_control;
4496 };
4497
4498 struct bonaire_mqd
4499 {
4500 u32 header;
4501 u32 dispatch_initiator;
4502 u32 dimensions[3];
4503 u32 start_idx[3];
4504 u32 num_threads[3];
4505 u32 pipeline_stat_enable;
4506 u32 perf_counter_enable;
4507 u32 pgm[2];
4508 u32 tba[2];
4509 u32 tma[2];
4510 u32 pgm_rsrc[2];
4511 u32 vmid;
4512 u32 resource_limits;
4513 u32 static_thread_mgmt01[2];
4514 u32 tmp_ring_size;
4515 u32 static_thread_mgmt23[2];
4516 u32 restart[3];
4517 u32 thread_trace_enable;
4518 u32 reserved1;
4519 u32 user_data[16];
4520 u32 vgtcs_invoke_count[2];
4521 struct hqd_registers queue_state;
4522 u32 dequeue_cntr;
4523 u32 interrupt_queue[64];
4524 };
4525
4526 /**
4527 * cik_cp_compute_resume - setup the compute queue registers
4528 *
4529 * @rdev: radeon_device pointer
4530 *
4531 * Program the compute queues and test them to make sure they
4532 * are working.
4533 * Returns 0 for success, error for failure.
4534 */
cik_cp_compute_resume(struct radeon_device * rdev)4535 static int cik_cp_compute_resume(struct radeon_device *rdev)
4536 {
4537 int r, i, j, idx;
4538 u32 tmp;
4539 bool use_doorbell = true;
4540 u64 hqd_gpu_addr;
4541 u64 mqd_gpu_addr;
4542 u64 eop_gpu_addr;
4543 u64 wb_gpu_addr;
4544 u32 *buf;
4545 struct bonaire_mqd *mqd;
4546
4547 r = cik_cp_compute_start(rdev);
4548 if (r)
4549 return r;
4550
4551 /* fix up chicken bits */
4552 tmp = RREG32(CP_CPF_DEBUG);
4553 tmp |= (1 << 23);
4554 WREG32(CP_CPF_DEBUG, tmp);
4555
4556 /* init the pipes */
4557 mutex_lock(&rdev->srbm_mutex);
4558
4559 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4560 int me = (i < 4) ? 1 : 2;
4561 int pipe = (i < 4) ? i : (i - 4);
4562
4563 cik_srbm_select(rdev, me, pipe, 0, 0);
4564
4565 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4566 /* write the EOP addr */
4567 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4568 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4569
4570 /* set the VMID assigned */
4571 WREG32(CP_HPD_EOP_VMID, 0);
4572
4573 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4574 tmp = RREG32(CP_HPD_EOP_CONTROL);
4575 tmp &= ~EOP_SIZE_MASK;
4576 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4577 WREG32(CP_HPD_EOP_CONTROL, tmp);
4578
4579 }
4580 cik_srbm_select(rdev, 0, 0, 0, 0);
4581 mutex_unlock(&rdev->srbm_mutex);
4582
4583 /* init the queues. Just two for now. */
4584 for (i = 0; i < 2; i++) {
4585 if (i == 0)
4586 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4587 else
4588 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4589
4590 if (rdev->ring[idx].mqd_obj == NULL) {
4591 r = radeon_bo_create(rdev,
4592 sizeof(struct bonaire_mqd),
4593 PAGE_SIZE, true,
4594 RADEON_GEM_DOMAIN_GTT, 0, NULL,
4595 NULL, &rdev->ring[idx].mqd_obj);
4596 if (r) {
4597 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4598 return r;
4599 }
4600 }
4601
4602 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4603 if (unlikely(r != 0)) {
4604 cik_cp_compute_fini(rdev);
4605 return r;
4606 }
4607 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4608 &mqd_gpu_addr);
4609 if (r) {
4610 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4611 cik_cp_compute_fini(rdev);
4612 return r;
4613 }
4614 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4615 if (r) {
4616 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4617 cik_cp_compute_fini(rdev);
4618 return r;
4619 }
4620
4621 /* init the mqd struct */
4622 memset(buf, 0, sizeof(struct bonaire_mqd));
4623
4624 mqd = (struct bonaire_mqd *)buf;
4625 mqd->header = 0xC0310800;
4626 mqd->static_thread_mgmt01[0] = 0xffffffff;
4627 mqd->static_thread_mgmt01[1] = 0xffffffff;
4628 mqd->static_thread_mgmt23[0] = 0xffffffff;
4629 mqd->static_thread_mgmt23[1] = 0xffffffff;
4630
4631 mutex_lock(&rdev->srbm_mutex);
4632 cik_srbm_select(rdev, rdev->ring[idx].me,
4633 rdev->ring[idx].pipe,
4634 rdev->ring[idx].queue, 0);
4635
4636 /* disable wptr polling */
4637 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4638 tmp &= ~WPTR_POLL_EN;
4639 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4640
4641 /* enable doorbell? */
4642 mqd->queue_state.cp_hqd_pq_doorbell_control =
4643 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4644 if (use_doorbell)
4645 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4646 else
4647 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4648 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4649 mqd->queue_state.cp_hqd_pq_doorbell_control);
4650
4651 /* disable the queue if it's active */
4652 mqd->queue_state.cp_hqd_dequeue_request = 0;
4653 mqd->queue_state.cp_hqd_pq_rptr = 0;
4654 mqd->queue_state.cp_hqd_pq_wptr= 0;
4655 if (RREG32(CP_HQD_ACTIVE) & 1) {
4656 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4657 for (j = 0; j < rdev->usec_timeout; j++) {
4658 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4659 break;
4660 udelay(1);
4661 }
4662 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4663 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4664 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4665 }
4666
4667 /* set the pointer to the MQD */
4668 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4669 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4670 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4671 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4672 /* set MQD vmid to 0 */
4673 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4674 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4675 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4676
4677 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4678 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4679 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4680 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4681 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4682 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4683
4684 /* set up the HQD, this is similar to CP_RB0_CNTL */
4685 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4686 mqd->queue_state.cp_hqd_pq_control &=
4687 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4688
4689 mqd->queue_state.cp_hqd_pq_control |=
4690 order_base_2(rdev->ring[idx].ring_size / 8);
4691 mqd->queue_state.cp_hqd_pq_control |=
4692 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4693 #ifdef __BIG_ENDIAN
4694 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4695 #endif
4696 mqd->queue_state.cp_hqd_pq_control &=
4697 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4698 mqd->queue_state.cp_hqd_pq_control |=
4699 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4700 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4701
4702 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4703 if (i == 0)
4704 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4705 else
4706 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4707 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4708 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4709 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4710 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4711 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4712
4713 /* set the wb address wether it's enabled or not */
4714 if (i == 0)
4715 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4716 else
4717 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4718 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4719 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4720 upper_32_bits(wb_gpu_addr) & 0xffff;
4721 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4722 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4723 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4724 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4725
4726 /* enable the doorbell if requested */
4727 if (use_doorbell) {
4728 mqd->queue_state.cp_hqd_pq_doorbell_control =
4729 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4730 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4731 mqd->queue_state.cp_hqd_pq_doorbell_control |=
4732 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4733 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4734 mqd->queue_state.cp_hqd_pq_doorbell_control &=
4735 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4736
4737 } else {
4738 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4739 }
4740 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4741 mqd->queue_state.cp_hqd_pq_doorbell_control);
4742
4743 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4744 rdev->ring[idx].wptr = 0;
4745 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4746 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4747 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4748
4749 /* set the vmid for the queue */
4750 mqd->queue_state.cp_hqd_vmid = 0;
4751 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4752
4753 /* activate the queue */
4754 mqd->queue_state.cp_hqd_active = 1;
4755 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4756
4757 cik_srbm_select(rdev, 0, 0, 0, 0);
4758 mutex_unlock(&rdev->srbm_mutex);
4759
4760 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4761 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4762
4763 rdev->ring[idx].ready = true;
4764 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4765 if (r)
4766 rdev->ring[idx].ready = false;
4767 }
4768
4769 return 0;
4770 }
4771
cik_cp_enable(struct radeon_device * rdev,bool enable)4772 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4773 {
4774 cik_cp_gfx_enable(rdev, enable);
4775 cik_cp_compute_enable(rdev, enable);
4776 }
4777
cik_cp_load_microcode(struct radeon_device * rdev)4778 static int cik_cp_load_microcode(struct radeon_device *rdev)
4779 {
4780 int r;
4781
4782 r = cik_cp_gfx_load_microcode(rdev);
4783 if (r)
4784 return r;
4785 r = cik_cp_compute_load_microcode(rdev);
4786 if (r)
4787 return r;
4788
4789 return 0;
4790 }
4791
cik_cp_fini(struct radeon_device * rdev)4792 static void cik_cp_fini(struct radeon_device *rdev)
4793 {
4794 cik_cp_gfx_fini(rdev);
4795 cik_cp_compute_fini(rdev);
4796 }
4797
cik_cp_resume(struct radeon_device * rdev)4798 static int cik_cp_resume(struct radeon_device *rdev)
4799 {
4800 int r;
4801
4802 cik_enable_gui_idle_interrupt(rdev, false);
4803
4804 r = cik_cp_load_microcode(rdev);
4805 if (r)
4806 return r;
4807
4808 r = cik_cp_gfx_resume(rdev);
4809 if (r)
4810 return r;
4811 r = cik_cp_compute_resume(rdev);
4812 if (r)
4813 return r;
4814
4815 cik_enable_gui_idle_interrupt(rdev, true);
4816
4817 return 0;
4818 }
4819
cik_print_gpu_status_regs(struct radeon_device * rdev)4820 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4821 {
4822 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4823 RREG32(GRBM_STATUS));
4824 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4825 RREG32(GRBM_STATUS2));
4826 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4827 RREG32(GRBM_STATUS_SE0));
4828 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4829 RREG32(GRBM_STATUS_SE1));
4830 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4831 RREG32(GRBM_STATUS_SE2));
4832 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4833 RREG32(GRBM_STATUS_SE3));
4834 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4835 RREG32(SRBM_STATUS));
4836 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4837 RREG32(SRBM_STATUS2));
4838 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4839 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4840 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4841 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4842 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4843 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4844 RREG32(CP_STALLED_STAT1));
4845 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4846 RREG32(CP_STALLED_STAT2));
4847 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4848 RREG32(CP_STALLED_STAT3));
4849 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4850 RREG32(CP_CPF_BUSY_STAT));
4851 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4852 RREG32(CP_CPF_STALLED_STAT1));
4853 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4854 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4855 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4856 RREG32(CP_CPC_STALLED_STAT1));
4857 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4858 }
4859
4860 /**
4861 * cik_gpu_check_soft_reset - check which blocks are busy
4862 *
4863 * @rdev: radeon_device pointer
4864 *
4865 * Check which blocks are busy and return the relevant reset
4866 * mask to be used by cik_gpu_soft_reset().
4867 * Returns a mask of the blocks to be reset.
4868 */
cik_gpu_check_soft_reset(struct radeon_device * rdev)4869 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4870 {
4871 u32 reset_mask = 0;
4872 u32 tmp;
4873
4874 /* GRBM_STATUS */
4875 tmp = RREG32(GRBM_STATUS);
4876 if (tmp & (PA_BUSY | SC_BUSY |
4877 BCI_BUSY | SX_BUSY |
4878 TA_BUSY | VGT_BUSY |
4879 DB_BUSY | CB_BUSY |
4880 GDS_BUSY | SPI_BUSY |
4881 IA_BUSY | IA_BUSY_NO_DMA))
4882 reset_mask |= RADEON_RESET_GFX;
4883
4884 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4885 reset_mask |= RADEON_RESET_CP;
4886
4887 /* GRBM_STATUS2 */
4888 tmp = RREG32(GRBM_STATUS2);
4889 if (tmp & RLC_BUSY)
4890 reset_mask |= RADEON_RESET_RLC;
4891
4892 /* SDMA0_STATUS_REG */
4893 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4894 if (!(tmp & SDMA_IDLE))
4895 reset_mask |= RADEON_RESET_DMA;
4896
4897 /* SDMA1_STATUS_REG */
4898 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4899 if (!(tmp & SDMA_IDLE))
4900 reset_mask |= RADEON_RESET_DMA1;
4901
4902 /* SRBM_STATUS2 */
4903 tmp = RREG32(SRBM_STATUS2);
4904 if (tmp & SDMA_BUSY)
4905 reset_mask |= RADEON_RESET_DMA;
4906
4907 if (tmp & SDMA1_BUSY)
4908 reset_mask |= RADEON_RESET_DMA1;
4909
4910 /* SRBM_STATUS */
4911 tmp = RREG32(SRBM_STATUS);
4912
4913 if (tmp & IH_BUSY)
4914 reset_mask |= RADEON_RESET_IH;
4915
4916 if (tmp & SEM_BUSY)
4917 reset_mask |= RADEON_RESET_SEM;
4918
4919 if (tmp & GRBM_RQ_PENDING)
4920 reset_mask |= RADEON_RESET_GRBM;
4921
4922 if (tmp & VMC_BUSY)
4923 reset_mask |= RADEON_RESET_VMC;
4924
4925 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4926 MCC_BUSY | MCD_BUSY))
4927 reset_mask |= RADEON_RESET_MC;
4928
4929 if (evergreen_is_display_hung(rdev))
4930 reset_mask |= RADEON_RESET_DISPLAY;
4931
4932 /* Skip MC reset as it's mostly likely not hung, just busy */
4933 if (reset_mask & RADEON_RESET_MC) {
4934 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4935 reset_mask &= ~RADEON_RESET_MC;
4936 }
4937
4938 return reset_mask;
4939 }
4940
4941 /**
4942 * cik_gpu_soft_reset - soft reset GPU
4943 *
4944 * @rdev: radeon_device pointer
4945 * @reset_mask: mask of which blocks to reset
4946 *
4947 * Soft reset the blocks specified in @reset_mask.
4948 */
cik_gpu_soft_reset(struct radeon_device * rdev,u32 reset_mask)4949 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4950 {
4951 struct evergreen_mc_save save;
4952 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4953 u32 tmp;
4954
4955 if (reset_mask == 0)
4956 return;
4957
4958 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4959
4960 cik_print_gpu_status_regs(rdev);
4961 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4962 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4963 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4964 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4965
4966 /* disable CG/PG */
4967 cik_fini_pg(rdev);
4968 cik_fini_cg(rdev);
4969
4970 /* stop the rlc */
4971 cik_rlc_stop(rdev);
4972
4973 /* Disable GFX parsing/prefetching */
4974 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4975
4976 /* Disable MEC parsing/prefetching */
4977 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4978
4979 if (reset_mask & RADEON_RESET_DMA) {
4980 /* sdma0 */
4981 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4982 tmp |= SDMA_HALT;
4983 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4984 }
4985 if (reset_mask & RADEON_RESET_DMA1) {
4986 /* sdma1 */
4987 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4988 tmp |= SDMA_HALT;
4989 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4990 }
4991
4992 evergreen_mc_stop(rdev, &save);
4993 if (evergreen_mc_wait_for_idle(rdev)) {
4994 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4995 }
4996
4997 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4998 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4999
5000 if (reset_mask & RADEON_RESET_CP) {
5001 grbm_soft_reset |= SOFT_RESET_CP;
5002
5003 srbm_soft_reset |= SOFT_RESET_GRBM;
5004 }
5005
5006 if (reset_mask & RADEON_RESET_DMA)
5007 srbm_soft_reset |= SOFT_RESET_SDMA;
5008
5009 if (reset_mask & RADEON_RESET_DMA1)
5010 srbm_soft_reset |= SOFT_RESET_SDMA1;
5011
5012 if (reset_mask & RADEON_RESET_DISPLAY)
5013 srbm_soft_reset |= SOFT_RESET_DC;
5014
5015 if (reset_mask & RADEON_RESET_RLC)
5016 grbm_soft_reset |= SOFT_RESET_RLC;
5017
5018 if (reset_mask & RADEON_RESET_SEM)
5019 srbm_soft_reset |= SOFT_RESET_SEM;
5020
5021 if (reset_mask & RADEON_RESET_IH)
5022 srbm_soft_reset |= SOFT_RESET_IH;
5023
5024 if (reset_mask & RADEON_RESET_GRBM)
5025 srbm_soft_reset |= SOFT_RESET_GRBM;
5026
5027 if (reset_mask & RADEON_RESET_VMC)
5028 srbm_soft_reset |= SOFT_RESET_VMC;
5029
5030 if (!(rdev->flags & RADEON_IS_IGP)) {
5031 if (reset_mask & RADEON_RESET_MC)
5032 srbm_soft_reset |= SOFT_RESET_MC;
5033 }
5034
5035 if (grbm_soft_reset) {
5036 tmp = RREG32(GRBM_SOFT_RESET);
5037 tmp |= grbm_soft_reset;
5038 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5039 WREG32(GRBM_SOFT_RESET, tmp);
5040 tmp = RREG32(GRBM_SOFT_RESET);
5041
5042 udelay(50);
5043
5044 tmp &= ~grbm_soft_reset;
5045 WREG32(GRBM_SOFT_RESET, tmp);
5046 tmp = RREG32(GRBM_SOFT_RESET);
5047 }
5048
5049 if (srbm_soft_reset) {
5050 tmp = RREG32(SRBM_SOFT_RESET);
5051 tmp |= srbm_soft_reset;
5052 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5053 WREG32(SRBM_SOFT_RESET, tmp);
5054 tmp = RREG32(SRBM_SOFT_RESET);
5055
5056 udelay(50);
5057
5058 tmp &= ~srbm_soft_reset;
5059 WREG32(SRBM_SOFT_RESET, tmp);
5060 tmp = RREG32(SRBM_SOFT_RESET);
5061 }
5062
5063 /* Wait a little for things to settle down */
5064 udelay(50);
5065
5066 evergreen_mc_resume(rdev, &save);
5067 udelay(50);
5068
5069 cik_print_gpu_status_regs(rdev);
5070 }
5071
5072 struct kv_reset_save_regs {
5073 u32 gmcon_reng_execute;
5074 u32 gmcon_misc;
5075 u32 gmcon_misc3;
5076 };
5077
kv_save_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5078 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5079 struct kv_reset_save_regs *save)
5080 {
5081 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5082 save->gmcon_misc = RREG32(GMCON_MISC);
5083 save->gmcon_misc3 = RREG32(GMCON_MISC3);
5084
5085 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5086 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5087 STCTRL_STUTTER_EN));
5088 }
5089
kv_restore_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5090 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5091 struct kv_reset_save_regs *save)
5092 {
5093 int i;
5094
5095 WREG32(GMCON_PGFSM_WRITE, 0);
5096 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5097
5098 for (i = 0; i < 5; i++)
5099 WREG32(GMCON_PGFSM_WRITE, 0);
5100
5101 WREG32(GMCON_PGFSM_WRITE, 0);
5102 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5103
5104 for (i = 0; i < 5; i++)
5105 WREG32(GMCON_PGFSM_WRITE, 0);
5106
5107 WREG32(GMCON_PGFSM_WRITE, 0x210000);
5108 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5109
5110 for (i = 0; i < 5; i++)
5111 WREG32(GMCON_PGFSM_WRITE, 0);
5112
5113 WREG32(GMCON_PGFSM_WRITE, 0x21003);
5114 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5115
5116 for (i = 0; i < 5; i++)
5117 WREG32(GMCON_PGFSM_WRITE, 0);
5118
5119 WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5120 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5121
5122 for (i = 0; i < 5; i++)
5123 WREG32(GMCON_PGFSM_WRITE, 0);
5124
5125 WREG32(GMCON_PGFSM_WRITE, 0);
5126 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5127
5128 for (i = 0; i < 5; i++)
5129 WREG32(GMCON_PGFSM_WRITE, 0);
5130
5131 WREG32(GMCON_PGFSM_WRITE, 0x420000);
5132 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5133
5134 for (i = 0; i < 5; i++)
5135 WREG32(GMCON_PGFSM_WRITE, 0);
5136
5137 WREG32(GMCON_PGFSM_WRITE, 0x120202);
5138 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5139
5140 for (i = 0; i < 5; i++)
5141 WREG32(GMCON_PGFSM_WRITE, 0);
5142
5143 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5144 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5145
5146 for (i = 0; i < 5; i++)
5147 WREG32(GMCON_PGFSM_WRITE, 0);
5148
5149 WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5150 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5151
5152 for (i = 0; i < 5; i++)
5153 WREG32(GMCON_PGFSM_WRITE, 0);
5154
5155 WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5156 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5157
5158 WREG32(GMCON_MISC3, save->gmcon_misc3);
5159 WREG32(GMCON_MISC, save->gmcon_misc);
5160 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5161 }
5162
cik_gpu_pci_config_reset(struct radeon_device * rdev)5163 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5164 {
5165 struct evergreen_mc_save save;
5166 struct kv_reset_save_regs kv_save = { 0 };
5167 u32 tmp, i;
5168
5169 dev_info(rdev->dev, "GPU pci config reset\n");
5170
5171 /* disable dpm? */
5172
5173 /* disable cg/pg */
5174 cik_fini_pg(rdev);
5175 cik_fini_cg(rdev);
5176
5177 /* Disable GFX parsing/prefetching */
5178 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5179
5180 /* Disable MEC parsing/prefetching */
5181 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5182
5183 /* sdma0 */
5184 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5185 tmp |= SDMA_HALT;
5186 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5187 /* sdma1 */
5188 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5189 tmp |= SDMA_HALT;
5190 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5191 /* XXX other engines? */
5192
5193 /* halt the rlc, disable cp internal ints */
5194 cik_rlc_stop(rdev);
5195
5196 udelay(50);
5197
5198 /* disable mem access */
5199 evergreen_mc_stop(rdev, &save);
5200 if (evergreen_mc_wait_for_idle(rdev)) {
5201 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5202 }
5203
5204 if (rdev->flags & RADEON_IS_IGP)
5205 kv_save_regs_for_reset(rdev, &kv_save);
5206
5207 /* disable BM */
5208 pci_clear_master(rdev->pdev);
5209 /* reset */
5210 radeon_pci_config_reset(rdev);
5211
5212 udelay(100);
5213
5214 /* wait for asic to come out of reset */
5215 for (i = 0; i < rdev->usec_timeout; i++) {
5216 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5217 break;
5218 udelay(1);
5219 }
5220
5221 /* does asic init need to be run first??? */
5222 if (rdev->flags & RADEON_IS_IGP)
5223 kv_restore_regs_for_reset(rdev, &kv_save);
5224 }
5225
5226 /**
5227 * cik_asic_reset - soft reset GPU
5228 *
5229 * @rdev: radeon_device pointer
5230 * @hard: force hard reset
5231 *
5232 * Look up which blocks are hung and attempt
5233 * to reset them.
5234 * Returns 0 for success.
5235 */
cik_asic_reset(struct radeon_device * rdev,bool hard)5236 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5237 {
5238 u32 reset_mask;
5239
5240 if (hard) {
5241 cik_gpu_pci_config_reset(rdev);
5242 return 0;
5243 }
5244
5245 reset_mask = cik_gpu_check_soft_reset(rdev);
5246
5247 if (reset_mask)
5248 r600_set_bios_scratch_engine_hung(rdev, true);
5249
5250 /* try soft reset */
5251 cik_gpu_soft_reset(rdev, reset_mask);
5252
5253 reset_mask = cik_gpu_check_soft_reset(rdev);
5254
5255 /* try pci config reset */
5256 if (reset_mask && radeon_hard_reset)
5257 cik_gpu_pci_config_reset(rdev);
5258
5259 reset_mask = cik_gpu_check_soft_reset(rdev);
5260
5261 if (!reset_mask)
5262 r600_set_bios_scratch_engine_hung(rdev, false);
5263
5264 return 0;
5265 }
5266
5267 /**
5268 * cik_gfx_is_lockup - check if the 3D engine is locked up
5269 *
5270 * @rdev: radeon_device pointer
5271 * @ring: radeon_ring structure holding ring information
5272 *
5273 * Check if the 3D engine is locked up (CIK).
5274 * Returns true if the engine is locked, false if not.
5275 */
cik_gfx_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)5276 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5277 {
5278 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5279
5280 if (!(reset_mask & (RADEON_RESET_GFX |
5281 RADEON_RESET_COMPUTE |
5282 RADEON_RESET_CP))) {
5283 radeon_ring_lockup_update(rdev, ring);
5284 return false;
5285 }
5286 return radeon_ring_test_lockup(rdev, ring);
5287 }
5288
5289 /* MC */
5290 /**
5291 * cik_mc_program - program the GPU memory controller
5292 *
5293 * @rdev: radeon_device pointer
5294 *
5295 * Set the location of vram, gart, and AGP in the GPU's
5296 * physical address space (CIK).
5297 */
cik_mc_program(struct radeon_device * rdev)5298 static void cik_mc_program(struct radeon_device *rdev)
5299 {
5300 struct evergreen_mc_save save;
5301 u32 tmp;
5302 int i, j;
5303
5304 /* Initialize HDP */
5305 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5306 WREG32((0x2c14 + j), 0x00000000);
5307 WREG32((0x2c18 + j), 0x00000000);
5308 WREG32((0x2c1c + j), 0x00000000);
5309 WREG32((0x2c20 + j), 0x00000000);
5310 WREG32((0x2c24 + j), 0x00000000);
5311 }
5312 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5313
5314 evergreen_mc_stop(rdev, &save);
5315 if (radeon_mc_wait_for_idle(rdev)) {
5316 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5317 }
5318 /* Lockout access through VGA aperture*/
5319 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5320 /* Update configuration */
5321 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5322 rdev->mc.vram_start >> 12);
5323 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5324 rdev->mc.vram_end >> 12);
5325 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5326 rdev->vram_scratch.gpu_addr >> 12);
5327 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5328 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5329 WREG32(MC_VM_FB_LOCATION, tmp);
5330 /* XXX double check these! */
5331 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5332 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5333 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5334 WREG32(MC_VM_AGP_BASE, 0);
5335 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5336 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5337 if (radeon_mc_wait_for_idle(rdev)) {
5338 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5339 }
5340 evergreen_mc_resume(rdev, &save);
5341 /* we need to own VRAM, so turn off the VGA renderer here
5342 * to stop it overwriting our objects */
5343 rv515_vga_render_disable(rdev);
5344 }
5345
5346 /**
5347 * cik_mc_init - initialize the memory controller driver params
5348 *
5349 * @rdev: radeon_device pointer
5350 *
5351 * Look up the amount of vram, vram width, and decide how to place
5352 * vram and gart within the GPU's physical address space (CIK).
5353 * Returns 0 for success.
5354 */
cik_mc_init(struct radeon_device * rdev)5355 static int cik_mc_init(struct radeon_device *rdev)
5356 {
5357 u32 tmp;
5358 int chansize, numchan;
5359
5360 /* Get VRAM informations */
5361 rdev->mc.vram_is_ddr = true;
5362 tmp = RREG32(MC_ARB_RAMCFG);
5363 if (tmp & CHANSIZE_MASK) {
5364 chansize = 64;
5365 } else {
5366 chansize = 32;
5367 }
5368 tmp = RREG32(MC_SHARED_CHMAP);
5369 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5370 case 0:
5371 default:
5372 numchan = 1;
5373 break;
5374 case 1:
5375 numchan = 2;
5376 break;
5377 case 2:
5378 numchan = 4;
5379 break;
5380 case 3:
5381 numchan = 8;
5382 break;
5383 case 4:
5384 numchan = 3;
5385 break;
5386 case 5:
5387 numchan = 6;
5388 break;
5389 case 6:
5390 numchan = 10;
5391 break;
5392 case 7:
5393 numchan = 12;
5394 break;
5395 case 8:
5396 numchan = 16;
5397 break;
5398 }
5399 rdev->mc.vram_width = numchan * chansize;
5400 /* Could aper size report 0 ? */
5401 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5402 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5403 /* size in MB on si */
5404 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5405 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5406 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5407 si_vram_gtt_location(rdev, &rdev->mc);
5408 radeon_update_bandwidth_info(rdev);
5409
5410 return 0;
5411 }
5412
5413 /*
5414 * GART
5415 * VMID 0 is the physical GPU addresses as used by the kernel.
5416 * VMIDs 1-15 are used for userspace clients and are handled
5417 * by the radeon vm/hsa code.
5418 */
5419 /**
5420 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5421 *
5422 * @rdev: radeon_device pointer
5423 *
5424 * Flush the TLB for the VMID 0 page table (CIK).
5425 */
cik_pcie_gart_tlb_flush(struct radeon_device * rdev)5426 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5427 {
5428 /* flush hdp cache */
5429 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5430
5431 /* bits 0-15 are the VM contexts0-15 */
5432 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5433 }
5434
5435 /**
5436 * cik_pcie_gart_enable - gart enable
5437 *
5438 * @rdev: radeon_device pointer
5439 *
5440 * This sets up the TLBs, programs the page tables for VMID0,
5441 * sets up the hw for VMIDs 1-15 which are allocated on
5442 * demand, and sets up the global locations for the LDS, GDS,
5443 * and GPUVM for FSA64 clients (CIK).
5444 * Returns 0 for success, errors for failure.
5445 */
cik_pcie_gart_enable(struct radeon_device * rdev)5446 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5447 {
5448 int r, i;
5449
5450 if (rdev->gart.robj == NULL) {
5451 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5452 return -EINVAL;
5453 }
5454 r = radeon_gart_table_vram_pin(rdev);
5455 if (r)
5456 return r;
5457 /* Setup TLB control */
5458 WREG32(MC_VM_MX_L1_TLB_CNTL,
5459 (0xA << 7) |
5460 ENABLE_L1_TLB |
5461 ENABLE_L1_FRAGMENT_PROCESSING |
5462 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5463 ENABLE_ADVANCED_DRIVER_MODEL |
5464 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5465 /* Setup L2 cache */
5466 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5467 ENABLE_L2_FRAGMENT_PROCESSING |
5468 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5469 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5470 EFFECTIVE_L2_QUEUE_SIZE(7) |
5471 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5472 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5473 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5474 BANK_SELECT(4) |
5475 L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5476 /* setup context0 */
5477 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5478 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5479 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5480 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5481 (u32)(rdev->dummy_page.addr >> 12));
5482 WREG32(VM_CONTEXT0_CNTL2, 0);
5483 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5484 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5485
5486 WREG32(0x15D4, 0);
5487 WREG32(0x15D8, 0);
5488 WREG32(0x15DC, 0);
5489
5490 /* restore context1-15 */
5491 /* set vm size, must be a multiple of 4 */
5492 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5493 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5494 for (i = 1; i < 16; i++) {
5495 if (i < 8)
5496 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5497 rdev->vm_manager.saved_table_addr[i]);
5498 else
5499 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5500 rdev->vm_manager.saved_table_addr[i]);
5501 }
5502
5503 /* enable context1-15 */
5504 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5505 (u32)(rdev->dummy_page.addr >> 12));
5506 WREG32(VM_CONTEXT1_CNTL2, 4);
5507 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5508 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5509 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5510 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5511 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5512 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5513 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5514 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5515 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5516 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5517 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5518 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5519 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5520 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5521
5522 if (rdev->family == CHIP_KAVERI) {
5523 u32 tmp = RREG32(CHUB_CONTROL);
5524 tmp &= ~BYPASS_VM;
5525 WREG32(CHUB_CONTROL, tmp);
5526 }
5527
5528 /* XXX SH_MEM regs */
5529 /* where to put LDS, scratch, GPUVM in FSA64 space */
5530 mutex_lock(&rdev->srbm_mutex);
5531 for (i = 0; i < 16; i++) {
5532 cik_srbm_select(rdev, 0, 0, 0, i);
5533 /* CP and shaders */
5534 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5535 WREG32(SH_MEM_APE1_BASE, 1);
5536 WREG32(SH_MEM_APE1_LIMIT, 0);
5537 WREG32(SH_MEM_BASES, 0);
5538 /* SDMA GFX */
5539 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5540 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5541 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5542 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5543 /* XXX SDMA RLC - todo */
5544 }
5545 cik_srbm_select(rdev, 0, 0, 0, 0);
5546 mutex_unlock(&rdev->srbm_mutex);
5547
5548 cik_pcie_gart_tlb_flush(rdev);
5549 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5550 (unsigned)(rdev->mc.gtt_size >> 20),
5551 (unsigned long long)rdev->gart.table_addr);
5552 rdev->gart.ready = true;
5553 return 0;
5554 }
5555
5556 /**
5557 * cik_pcie_gart_disable - gart disable
5558 *
5559 * @rdev: radeon_device pointer
5560 *
5561 * This disables all VM page table (CIK).
5562 */
cik_pcie_gart_disable(struct radeon_device * rdev)5563 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5564 {
5565 unsigned i;
5566
5567 for (i = 1; i < 16; ++i) {
5568 uint32_t reg;
5569 if (i < 8)
5570 reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5571 else
5572 reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5573 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5574 }
5575
5576 /* Disable all tables */
5577 WREG32(VM_CONTEXT0_CNTL, 0);
5578 WREG32(VM_CONTEXT1_CNTL, 0);
5579 /* Setup TLB control */
5580 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5581 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5582 /* Setup L2 cache */
5583 WREG32(VM_L2_CNTL,
5584 ENABLE_L2_FRAGMENT_PROCESSING |
5585 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5586 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5587 EFFECTIVE_L2_QUEUE_SIZE(7) |
5588 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5589 WREG32(VM_L2_CNTL2, 0);
5590 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5591 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5592 radeon_gart_table_vram_unpin(rdev);
5593 }
5594
5595 /**
5596 * cik_pcie_gart_fini - vm fini callback
5597 *
5598 * @rdev: radeon_device pointer
5599 *
5600 * Tears down the driver GART/VM setup (CIK).
5601 */
cik_pcie_gart_fini(struct radeon_device * rdev)5602 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5603 {
5604 cik_pcie_gart_disable(rdev);
5605 radeon_gart_table_vram_free(rdev);
5606 radeon_gart_fini(rdev);
5607 }
5608
5609 /* vm parser */
5610 /**
5611 * cik_ib_parse - vm ib_parse callback
5612 *
5613 * @rdev: radeon_device pointer
5614 * @ib: indirect buffer pointer
5615 *
5616 * CIK uses hw IB checking so this is a nop (CIK).
5617 */
cik_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)5618 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5619 {
5620 return 0;
5621 }
5622
5623 /*
5624 * vm
5625 * VMID 0 is the physical GPU addresses as used by the kernel.
5626 * VMIDs 1-15 are used for userspace clients and are handled
5627 * by the radeon vm/hsa code.
5628 */
5629 /**
5630 * cik_vm_init - cik vm init callback
5631 *
5632 * @rdev: radeon_device pointer
5633 *
5634 * Inits cik specific vm parameters (number of VMs, base of vram for
5635 * VMIDs 1-15) (CIK).
5636 * Returns 0 for success.
5637 */
cik_vm_init(struct radeon_device * rdev)5638 int cik_vm_init(struct radeon_device *rdev)
5639 {
5640 /*
5641 * number of VMs
5642 * VMID 0 is reserved for System
5643 * radeon graphics/compute will use VMIDs 1-15
5644 */
5645 rdev->vm_manager.nvm = 16;
5646 /* base offset of vram pages */
5647 if (rdev->flags & RADEON_IS_IGP) {
5648 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5649 tmp <<= 22;
5650 rdev->vm_manager.vram_base_offset = tmp;
5651 } else
5652 rdev->vm_manager.vram_base_offset = 0;
5653
5654 return 0;
5655 }
5656
5657 /**
5658 * cik_vm_fini - cik vm fini callback
5659 *
5660 * @rdev: radeon_device pointer
5661 *
5662 * Tear down any asic specific VM setup (CIK).
5663 */
cik_vm_fini(struct radeon_device * rdev)5664 void cik_vm_fini(struct radeon_device *rdev)
5665 {
5666 }
5667
5668 /**
5669 * cik_vm_decode_fault - print human readable fault info
5670 *
5671 * @rdev: radeon_device pointer
5672 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5673 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5674 *
5675 * Print human readable fault information (CIK).
5676 */
cik_vm_decode_fault(struct radeon_device * rdev,u32 status,u32 addr,u32 mc_client)5677 static void cik_vm_decode_fault(struct radeon_device *rdev,
5678 u32 status, u32 addr, u32 mc_client)
5679 {
5680 u32 mc_id;
5681 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5682 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5683 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5684 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5685
5686 if (rdev->family == CHIP_HAWAII)
5687 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5688 else
5689 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5690
5691 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5692 protections, vmid, addr,
5693 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5694 block, mc_client, mc_id);
5695 }
5696
5697 /**
5698 * cik_vm_flush - cik vm flush using the CP
5699 *
5700 * @rdev: radeon_device pointer
5701 *
5702 * Update the page table base and flush the VM TLB
5703 * using the CP (CIK).
5704 */
cik_vm_flush(struct radeon_device * rdev,struct radeon_ring * ring,unsigned vm_id,uint64_t pd_addr)5705 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5706 unsigned vm_id, uint64_t pd_addr)
5707 {
5708 int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5709
5710 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5711 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5712 WRITE_DATA_DST_SEL(0)));
5713 if (vm_id < 8) {
5714 radeon_ring_write(ring,
5715 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5716 } else {
5717 radeon_ring_write(ring,
5718 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5719 }
5720 radeon_ring_write(ring, 0);
5721 radeon_ring_write(ring, pd_addr >> 12);
5722
5723 /* update SH_MEM_* regs */
5724 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5725 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5726 WRITE_DATA_DST_SEL(0)));
5727 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5728 radeon_ring_write(ring, 0);
5729 radeon_ring_write(ring, VMID(vm_id));
5730
5731 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5732 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5733 WRITE_DATA_DST_SEL(0)));
5734 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5735 radeon_ring_write(ring, 0);
5736
5737 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5738 radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5739 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5740 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5741
5742 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5743 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5744 WRITE_DATA_DST_SEL(0)));
5745 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5746 radeon_ring_write(ring, 0);
5747 radeon_ring_write(ring, VMID(0));
5748
5749 /* HDP flush */
5750 cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5751
5752 /* bits 0-15 are the VM contexts0-15 */
5753 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5754 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5755 WRITE_DATA_DST_SEL(0)));
5756 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5757 radeon_ring_write(ring, 0);
5758 radeon_ring_write(ring, 1 << vm_id);
5759
5760 /* wait for the invalidate to complete */
5761 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5762 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5763 WAIT_REG_MEM_FUNCTION(0) | /* always */
5764 WAIT_REG_MEM_ENGINE(0))); /* me */
5765 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5766 radeon_ring_write(ring, 0);
5767 radeon_ring_write(ring, 0); /* ref */
5768 radeon_ring_write(ring, 0); /* mask */
5769 radeon_ring_write(ring, 0x20); /* poll interval */
5770
5771 /* compute doesn't have PFP */
5772 if (usepfp) {
5773 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5774 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5775 radeon_ring_write(ring, 0x0);
5776 }
5777 }
5778
5779 /*
5780 * RLC
5781 * The RLC is a multi-purpose microengine that handles a
5782 * variety of functions, the most important of which is
5783 * the interrupt controller.
5784 */
cik_enable_gui_idle_interrupt(struct radeon_device * rdev,bool enable)5785 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5786 bool enable)
5787 {
5788 u32 tmp = RREG32(CP_INT_CNTL_RING0);
5789
5790 if (enable)
5791 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5792 else
5793 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5794 WREG32(CP_INT_CNTL_RING0, tmp);
5795 }
5796
cik_enable_lbpw(struct radeon_device * rdev,bool enable)5797 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5798 {
5799 u32 tmp;
5800
5801 tmp = RREG32(RLC_LB_CNTL);
5802 if (enable)
5803 tmp |= LOAD_BALANCE_ENABLE;
5804 else
5805 tmp &= ~LOAD_BALANCE_ENABLE;
5806 WREG32(RLC_LB_CNTL, tmp);
5807 }
5808
cik_wait_for_rlc_serdes(struct radeon_device * rdev)5809 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5810 {
5811 u32 i, j, k;
5812 u32 mask;
5813
5814 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5815 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5816 cik_select_se_sh(rdev, i, j);
5817 for (k = 0; k < rdev->usec_timeout; k++) {
5818 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5819 break;
5820 udelay(1);
5821 }
5822 }
5823 }
5824 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5825
5826 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5827 for (k = 0; k < rdev->usec_timeout; k++) {
5828 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5829 break;
5830 udelay(1);
5831 }
5832 }
5833
cik_update_rlc(struct radeon_device * rdev,u32 rlc)5834 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5835 {
5836 u32 tmp;
5837
5838 tmp = RREG32(RLC_CNTL);
5839 if (tmp != rlc)
5840 WREG32(RLC_CNTL, rlc);
5841 }
5842
cik_halt_rlc(struct radeon_device * rdev)5843 static u32 cik_halt_rlc(struct radeon_device *rdev)
5844 {
5845 u32 data, orig;
5846
5847 orig = data = RREG32(RLC_CNTL);
5848
5849 if (data & RLC_ENABLE) {
5850 u32 i;
5851
5852 data &= ~RLC_ENABLE;
5853 WREG32(RLC_CNTL, data);
5854
5855 for (i = 0; i < rdev->usec_timeout; i++) {
5856 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5857 break;
5858 udelay(1);
5859 }
5860
5861 cik_wait_for_rlc_serdes(rdev);
5862 }
5863
5864 return orig;
5865 }
5866
cik_enter_rlc_safe_mode(struct radeon_device * rdev)5867 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5868 {
5869 u32 tmp, i, mask;
5870
5871 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5872 WREG32(RLC_GPR_REG2, tmp);
5873
5874 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5875 for (i = 0; i < rdev->usec_timeout; i++) {
5876 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5877 break;
5878 udelay(1);
5879 }
5880
5881 for (i = 0; i < rdev->usec_timeout; i++) {
5882 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5883 break;
5884 udelay(1);
5885 }
5886 }
5887
cik_exit_rlc_safe_mode(struct radeon_device * rdev)5888 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5889 {
5890 u32 tmp;
5891
5892 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5893 WREG32(RLC_GPR_REG2, tmp);
5894 }
5895
5896 /**
5897 * cik_rlc_stop - stop the RLC ME
5898 *
5899 * @rdev: radeon_device pointer
5900 *
5901 * Halt the RLC ME (MicroEngine) (CIK).
5902 */
cik_rlc_stop(struct radeon_device * rdev)5903 static void cik_rlc_stop(struct radeon_device *rdev)
5904 {
5905 WREG32(RLC_CNTL, 0);
5906
5907 cik_enable_gui_idle_interrupt(rdev, false);
5908
5909 cik_wait_for_rlc_serdes(rdev);
5910 }
5911
5912 /**
5913 * cik_rlc_start - start the RLC ME
5914 *
5915 * @rdev: radeon_device pointer
5916 *
5917 * Unhalt the RLC ME (MicroEngine) (CIK).
5918 */
cik_rlc_start(struct radeon_device * rdev)5919 static void cik_rlc_start(struct radeon_device *rdev)
5920 {
5921 WREG32(RLC_CNTL, RLC_ENABLE);
5922
5923 cik_enable_gui_idle_interrupt(rdev, true);
5924
5925 udelay(50);
5926 }
5927
5928 /**
5929 * cik_rlc_resume - setup the RLC hw
5930 *
5931 * @rdev: radeon_device pointer
5932 *
5933 * Initialize the RLC registers, load the ucode,
5934 * and start the RLC (CIK).
5935 * Returns 0 for success, -EINVAL if the ucode is not available.
5936 */
cik_rlc_resume(struct radeon_device * rdev)5937 static int cik_rlc_resume(struct radeon_device *rdev)
5938 {
5939 u32 i, size, tmp;
5940
5941 if (!rdev->rlc_fw)
5942 return -EINVAL;
5943
5944 cik_rlc_stop(rdev);
5945
5946 /* disable CG */
5947 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5948 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5949
5950 si_rlc_reset(rdev);
5951
5952 cik_init_pg(rdev);
5953
5954 cik_init_cg(rdev);
5955
5956 WREG32(RLC_LB_CNTR_INIT, 0);
5957 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5958
5959 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5960 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5961 WREG32(RLC_LB_PARAMS, 0x00600408);
5962 WREG32(RLC_LB_CNTL, 0x80000004);
5963
5964 WREG32(RLC_MC_CNTL, 0);
5965 WREG32(RLC_UCODE_CNTL, 0);
5966
5967 if (rdev->new_fw) {
5968 const struct rlc_firmware_header_v1_0 *hdr =
5969 (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5970 const __le32 *fw_data = (const __le32 *)
5971 (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5972
5973 radeon_ucode_print_rlc_hdr(&hdr->header);
5974
5975 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5976 WREG32(RLC_GPM_UCODE_ADDR, 0);
5977 for (i = 0; i < size; i++)
5978 WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5979 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5980 } else {
5981 const __be32 *fw_data;
5982
5983 switch (rdev->family) {
5984 case CHIP_BONAIRE:
5985 case CHIP_HAWAII:
5986 default:
5987 size = BONAIRE_RLC_UCODE_SIZE;
5988 break;
5989 case CHIP_KAVERI:
5990 size = KV_RLC_UCODE_SIZE;
5991 break;
5992 case CHIP_KABINI:
5993 size = KB_RLC_UCODE_SIZE;
5994 break;
5995 case CHIP_MULLINS:
5996 size = ML_RLC_UCODE_SIZE;
5997 break;
5998 }
5999
6000 fw_data = (const __be32 *)rdev->rlc_fw->data;
6001 WREG32(RLC_GPM_UCODE_ADDR, 0);
6002 for (i = 0; i < size; i++)
6003 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6004 WREG32(RLC_GPM_UCODE_ADDR, 0);
6005 }
6006
6007 /* XXX - find out what chips support lbpw */
6008 cik_enable_lbpw(rdev, false);
6009
6010 if (rdev->family == CHIP_BONAIRE)
6011 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6012
6013 cik_rlc_start(rdev);
6014
6015 return 0;
6016 }
6017
cik_enable_cgcg(struct radeon_device * rdev,bool enable)6018 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6019 {
6020 u32 data, orig, tmp, tmp2;
6021
6022 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6023
6024 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6025 cik_enable_gui_idle_interrupt(rdev, true);
6026
6027 tmp = cik_halt_rlc(rdev);
6028
6029 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6030 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6031 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6032 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6033 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6034
6035 cik_update_rlc(rdev, tmp);
6036
6037 data |= CGCG_EN | CGLS_EN;
6038 } else {
6039 cik_enable_gui_idle_interrupt(rdev, false);
6040
6041 RREG32(CB_CGTT_SCLK_CTRL);
6042 RREG32(CB_CGTT_SCLK_CTRL);
6043 RREG32(CB_CGTT_SCLK_CTRL);
6044 RREG32(CB_CGTT_SCLK_CTRL);
6045
6046 data &= ~(CGCG_EN | CGLS_EN);
6047 }
6048
6049 if (orig != data)
6050 WREG32(RLC_CGCG_CGLS_CTRL, data);
6051
6052 }
6053
cik_enable_mgcg(struct radeon_device * rdev,bool enable)6054 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6055 {
6056 u32 data, orig, tmp = 0;
6057
6058 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6059 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6060 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6061 orig = data = RREG32(CP_MEM_SLP_CNTL);
6062 data |= CP_MEM_LS_EN;
6063 if (orig != data)
6064 WREG32(CP_MEM_SLP_CNTL, data);
6065 }
6066 }
6067
6068 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6069 data |= 0x00000001;
6070 data &= 0xfffffffd;
6071 if (orig != data)
6072 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6073
6074 tmp = cik_halt_rlc(rdev);
6075
6076 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6077 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6078 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6079 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6080 WREG32(RLC_SERDES_WR_CTRL, data);
6081
6082 cik_update_rlc(rdev, tmp);
6083
6084 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6085 orig = data = RREG32(CGTS_SM_CTRL_REG);
6086 data &= ~SM_MODE_MASK;
6087 data |= SM_MODE(0x2);
6088 data |= SM_MODE_ENABLE;
6089 data &= ~CGTS_OVERRIDE;
6090 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6091 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6092 data &= ~CGTS_LS_OVERRIDE;
6093 data &= ~ON_MONITOR_ADD_MASK;
6094 data |= ON_MONITOR_ADD_EN;
6095 data |= ON_MONITOR_ADD(0x96);
6096 if (orig != data)
6097 WREG32(CGTS_SM_CTRL_REG, data);
6098 }
6099 } else {
6100 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6101 data |= 0x00000003;
6102 if (orig != data)
6103 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6104
6105 data = RREG32(RLC_MEM_SLP_CNTL);
6106 if (data & RLC_MEM_LS_EN) {
6107 data &= ~RLC_MEM_LS_EN;
6108 WREG32(RLC_MEM_SLP_CNTL, data);
6109 }
6110
6111 data = RREG32(CP_MEM_SLP_CNTL);
6112 if (data & CP_MEM_LS_EN) {
6113 data &= ~CP_MEM_LS_EN;
6114 WREG32(CP_MEM_SLP_CNTL, data);
6115 }
6116
6117 orig = data = RREG32(CGTS_SM_CTRL_REG);
6118 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6119 if (orig != data)
6120 WREG32(CGTS_SM_CTRL_REG, data);
6121
6122 tmp = cik_halt_rlc(rdev);
6123
6124 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6125 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6126 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6127 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6128 WREG32(RLC_SERDES_WR_CTRL, data);
6129
6130 cik_update_rlc(rdev, tmp);
6131 }
6132 }
6133
6134 static const u32 mc_cg_registers[] =
6135 {
6136 MC_HUB_MISC_HUB_CG,
6137 MC_HUB_MISC_SIP_CG,
6138 MC_HUB_MISC_VM_CG,
6139 MC_XPB_CLK_GAT,
6140 ATC_MISC_CG,
6141 MC_CITF_MISC_WR_CG,
6142 MC_CITF_MISC_RD_CG,
6143 MC_CITF_MISC_VM_CG,
6144 VM_L2_CG,
6145 };
6146
cik_enable_mc_ls(struct radeon_device * rdev,bool enable)6147 static void cik_enable_mc_ls(struct radeon_device *rdev,
6148 bool enable)
6149 {
6150 int i;
6151 u32 orig, data;
6152
6153 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6154 orig = data = RREG32(mc_cg_registers[i]);
6155 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6156 data |= MC_LS_ENABLE;
6157 else
6158 data &= ~MC_LS_ENABLE;
6159 if (data != orig)
6160 WREG32(mc_cg_registers[i], data);
6161 }
6162 }
6163
cik_enable_mc_mgcg(struct radeon_device * rdev,bool enable)6164 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6165 bool enable)
6166 {
6167 int i;
6168 u32 orig, data;
6169
6170 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6171 orig = data = RREG32(mc_cg_registers[i]);
6172 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6173 data |= MC_CG_ENABLE;
6174 else
6175 data &= ~MC_CG_ENABLE;
6176 if (data != orig)
6177 WREG32(mc_cg_registers[i], data);
6178 }
6179 }
6180
cik_enable_sdma_mgcg(struct radeon_device * rdev,bool enable)6181 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6182 bool enable)
6183 {
6184 u32 orig, data;
6185
6186 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6187 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6188 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6189 } else {
6190 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6191 data |= 0xff000000;
6192 if (data != orig)
6193 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6194
6195 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6196 data |= 0xff000000;
6197 if (data != orig)
6198 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6199 }
6200 }
6201
cik_enable_sdma_mgls(struct radeon_device * rdev,bool enable)6202 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6203 bool enable)
6204 {
6205 u32 orig, data;
6206
6207 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6208 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6209 data |= 0x100;
6210 if (orig != data)
6211 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6212
6213 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6214 data |= 0x100;
6215 if (orig != data)
6216 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6217 } else {
6218 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6219 data &= ~0x100;
6220 if (orig != data)
6221 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6222
6223 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6224 data &= ~0x100;
6225 if (orig != data)
6226 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6227 }
6228 }
6229
cik_enable_uvd_mgcg(struct radeon_device * rdev,bool enable)6230 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6231 bool enable)
6232 {
6233 u32 orig, data;
6234
6235 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6236 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6237 data = 0xfff;
6238 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6239
6240 orig = data = RREG32(UVD_CGC_CTRL);
6241 data |= DCM;
6242 if (orig != data)
6243 WREG32(UVD_CGC_CTRL, data);
6244 } else {
6245 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6246 data &= ~0xfff;
6247 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6248
6249 orig = data = RREG32(UVD_CGC_CTRL);
6250 data &= ~DCM;
6251 if (orig != data)
6252 WREG32(UVD_CGC_CTRL, data);
6253 }
6254 }
6255
cik_enable_bif_mgls(struct radeon_device * rdev,bool enable)6256 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6257 bool enable)
6258 {
6259 u32 orig, data;
6260
6261 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6262
6263 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6264 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6265 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6266 else
6267 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6268 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6269
6270 if (orig != data)
6271 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6272 }
6273
cik_enable_hdp_mgcg(struct radeon_device * rdev,bool enable)6274 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6275 bool enable)
6276 {
6277 u32 orig, data;
6278
6279 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6280
6281 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6282 data &= ~CLOCK_GATING_DIS;
6283 else
6284 data |= CLOCK_GATING_DIS;
6285
6286 if (orig != data)
6287 WREG32(HDP_HOST_PATH_CNTL, data);
6288 }
6289
cik_enable_hdp_ls(struct radeon_device * rdev,bool enable)6290 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6291 bool enable)
6292 {
6293 u32 orig, data;
6294
6295 orig = data = RREG32(HDP_MEM_POWER_LS);
6296
6297 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6298 data |= HDP_LS_ENABLE;
6299 else
6300 data &= ~HDP_LS_ENABLE;
6301
6302 if (orig != data)
6303 WREG32(HDP_MEM_POWER_LS, data);
6304 }
6305
cik_update_cg(struct radeon_device * rdev,u32 block,bool enable)6306 void cik_update_cg(struct radeon_device *rdev,
6307 u32 block, bool enable)
6308 {
6309
6310 if (block & RADEON_CG_BLOCK_GFX) {
6311 cik_enable_gui_idle_interrupt(rdev, false);
6312 /* order matters! */
6313 if (enable) {
6314 cik_enable_mgcg(rdev, true);
6315 cik_enable_cgcg(rdev, true);
6316 } else {
6317 cik_enable_cgcg(rdev, false);
6318 cik_enable_mgcg(rdev, false);
6319 }
6320 cik_enable_gui_idle_interrupt(rdev, true);
6321 }
6322
6323 if (block & RADEON_CG_BLOCK_MC) {
6324 if (!(rdev->flags & RADEON_IS_IGP)) {
6325 cik_enable_mc_mgcg(rdev, enable);
6326 cik_enable_mc_ls(rdev, enable);
6327 }
6328 }
6329
6330 if (block & RADEON_CG_BLOCK_SDMA) {
6331 cik_enable_sdma_mgcg(rdev, enable);
6332 cik_enable_sdma_mgls(rdev, enable);
6333 }
6334
6335 if (block & RADEON_CG_BLOCK_BIF) {
6336 cik_enable_bif_mgls(rdev, enable);
6337 }
6338
6339 if (block & RADEON_CG_BLOCK_UVD) {
6340 if (rdev->has_uvd)
6341 cik_enable_uvd_mgcg(rdev, enable);
6342 }
6343
6344 if (block & RADEON_CG_BLOCK_HDP) {
6345 cik_enable_hdp_mgcg(rdev, enable);
6346 cik_enable_hdp_ls(rdev, enable);
6347 }
6348
6349 if (block & RADEON_CG_BLOCK_VCE) {
6350 vce_v2_0_enable_mgcg(rdev, enable);
6351 }
6352 }
6353
cik_init_cg(struct radeon_device * rdev)6354 static void cik_init_cg(struct radeon_device *rdev)
6355 {
6356
6357 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6358
6359 if (rdev->has_uvd)
6360 si_init_uvd_internal_cg(rdev);
6361
6362 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6363 RADEON_CG_BLOCK_SDMA |
6364 RADEON_CG_BLOCK_BIF |
6365 RADEON_CG_BLOCK_UVD |
6366 RADEON_CG_BLOCK_HDP), true);
6367 }
6368
cik_fini_cg(struct radeon_device * rdev)6369 static void cik_fini_cg(struct radeon_device *rdev)
6370 {
6371 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6372 RADEON_CG_BLOCK_SDMA |
6373 RADEON_CG_BLOCK_BIF |
6374 RADEON_CG_BLOCK_UVD |
6375 RADEON_CG_BLOCK_HDP), false);
6376
6377 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6378 }
6379
cik_enable_sck_slowdown_on_pu(struct radeon_device * rdev,bool enable)6380 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6381 bool enable)
6382 {
6383 u32 data, orig;
6384
6385 orig = data = RREG32(RLC_PG_CNTL);
6386 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6387 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6388 else
6389 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6390 if (orig != data)
6391 WREG32(RLC_PG_CNTL, data);
6392 }
6393
cik_enable_sck_slowdown_on_pd(struct radeon_device * rdev,bool enable)6394 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6395 bool enable)
6396 {
6397 u32 data, orig;
6398
6399 orig = data = RREG32(RLC_PG_CNTL);
6400 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6401 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6402 else
6403 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6404 if (orig != data)
6405 WREG32(RLC_PG_CNTL, data);
6406 }
6407
cik_enable_cp_pg(struct radeon_device * rdev,bool enable)6408 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6409 {
6410 u32 data, orig;
6411
6412 orig = data = RREG32(RLC_PG_CNTL);
6413 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6414 data &= ~DISABLE_CP_PG;
6415 else
6416 data |= DISABLE_CP_PG;
6417 if (orig != data)
6418 WREG32(RLC_PG_CNTL, data);
6419 }
6420
cik_enable_gds_pg(struct radeon_device * rdev,bool enable)6421 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6422 {
6423 u32 data, orig;
6424
6425 orig = data = RREG32(RLC_PG_CNTL);
6426 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6427 data &= ~DISABLE_GDS_PG;
6428 else
6429 data |= DISABLE_GDS_PG;
6430 if (orig != data)
6431 WREG32(RLC_PG_CNTL, data);
6432 }
6433
6434 #define CP_ME_TABLE_SIZE 96
6435 #define CP_ME_TABLE_OFFSET 2048
6436 #define CP_MEC_TABLE_OFFSET 4096
6437
cik_init_cp_pg_table(struct radeon_device * rdev)6438 void cik_init_cp_pg_table(struct radeon_device *rdev)
6439 {
6440 volatile u32 *dst_ptr;
6441 int me, i, max_me = 4;
6442 u32 bo_offset = 0;
6443 u32 table_offset, table_size;
6444
6445 if (rdev->family == CHIP_KAVERI)
6446 max_me = 5;
6447
6448 if (rdev->rlc.cp_table_ptr == NULL)
6449 return;
6450
6451 /* write the cp table buffer */
6452 dst_ptr = rdev->rlc.cp_table_ptr;
6453 for (me = 0; me < max_me; me++) {
6454 if (rdev->new_fw) {
6455 const __le32 *fw_data;
6456 const struct gfx_firmware_header_v1_0 *hdr;
6457
6458 if (me == 0) {
6459 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6460 fw_data = (const __le32 *)
6461 (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6462 table_offset = le32_to_cpu(hdr->jt_offset);
6463 table_size = le32_to_cpu(hdr->jt_size);
6464 } else if (me == 1) {
6465 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6466 fw_data = (const __le32 *)
6467 (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6468 table_offset = le32_to_cpu(hdr->jt_offset);
6469 table_size = le32_to_cpu(hdr->jt_size);
6470 } else if (me == 2) {
6471 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6472 fw_data = (const __le32 *)
6473 (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6474 table_offset = le32_to_cpu(hdr->jt_offset);
6475 table_size = le32_to_cpu(hdr->jt_size);
6476 } else if (me == 3) {
6477 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6478 fw_data = (const __le32 *)
6479 (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6480 table_offset = le32_to_cpu(hdr->jt_offset);
6481 table_size = le32_to_cpu(hdr->jt_size);
6482 } else {
6483 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6484 fw_data = (const __le32 *)
6485 (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6486 table_offset = le32_to_cpu(hdr->jt_offset);
6487 table_size = le32_to_cpu(hdr->jt_size);
6488 }
6489
6490 for (i = 0; i < table_size; i ++) {
6491 dst_ptr[bo_offset + i] =
6492 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6493 }
6494 bo_offset += table_size;
6495 } else {
6496 const __be32 *fw_data;
6497 table_size = CP_ME_TABLE_SIZE;
6498
6499 if (me == 0) {
6500 fw_data = (const __be32 *)rdev->ce_fw->data;
6501 table_offset = CP_ME_TABLE_OFFSET;
6502 } else if (me == 1) {
6503 fw_data = (const __be32 *)rdev->pfp_fw->data;
6504 table_offset = CP_ME_TABLE_OFFSET;
6505 } else if (me == 2) {
6506 fw_data = (const __be32 *)rdev->me_fw->data;
6507 table_offset = CP_ME_TABLE_OFFSET;
6508 } else {
6509 fw_data = (const __be32 *)rdev->mec_fw->data;
6510 table_offset = CP_MEC_TABLE_OFFSET;
6511 }
6512
6513 for (i = 0; i < table_size; i ++) {
6514 dst_ptr[bo_offset + i] =
6515 cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6516 }
6517 bo_offset += table_size;
6518 }
6519 }
6520 }
6521
cik_enable_gfx_cgpg(struct radeon_device * rdev,bool enable)6522 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6523 bool enable)
6524 {
6525 u32 data, orig;
6526
6527 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6528 orig = data = RREG32(RLC_PG_CNTL);
6529 data |= GFX_PG_ENABLE;
6530 if (orig != data)
6531 WREG32(RLC_PG_CNTL, data);
6532
6533 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6534 data |= AUTO_PG_EN;
6535 if (orig != data)
6536 WREG32(RLC_AUTO_PG_CTRL, data);
6537 } else {
6538 orig = data = RREG32(RLC_PG_CNTL);
6539 data &= ~GFX_PG_ENABLE;
6540 if (orig != data)
6541 WREG32(RLC_PG_CNTL, data);
6542
6543 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6544 data &= ~AUTO_PG_EN;
6545 if (orig != data)
6546 WREG32(RLC_AUTO_PG_CTRL, data);
6547
6548 data = RREG32(DB_RENDER_CONTROL);
6549 }
6550 }
6551
cik_get_cu_active_bitmap(struct radeon_device * rdev,u32 se,u32 sh)6552 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6553 {
6554 u32 mask = 0, tmp, tmp1;
6555 int i;
6556
6557 cik_select_se_sh(rdev, se, sh);
6558 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6559 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6560 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6561
6562 tmp &= 0xffff0000;
6563
6564 tmp |= tmp1;
6565 tmp >>= 16;
6566
6567 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6568 mask <<= 1;
6569 mask |= 1;
6570 }
6571
6572 return (~tmp) & mask;
6573 }
6574
cik_init_ao_cu_mask(struct radeon_device * rdev)6575 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6576 {
6577 u32 i, j, k, active_cu_number = 0;
6578 u32 mask, counter, cu_bitmap;
6579 u32 tmp = 0;
6580
6581 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6582 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6583 mask = 1;
6584 cu_bitmap = 0;
6585 counter = 0;
6586 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6587 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6588 if (counter < 2)
6589 cu_bitmap |= mask;
6590 counter ++;
6591 }
6592 mask <<= 1;
6593 }
6594
6595 active_cu_number += counter;
6596 tmp |= (cu_bitmap << (i * 16 + j * 8));
6597 }
6598 }
6599
6600 WREG32(RLC_PG_AO_CU_MASK, tmp);
6601
6602 tmp = RREG32(RLC_MAX_PG_CU);
6603 tmp &= ~MAX_PU_CU_MASK;
6604 tmp |= MAX_PU_CU(active_cu_number);
6605 WREG32(RLC_MAX_PG_CU, tmp);
6606 }
6607
cik_enable_gfx_static_mgpg(struct radeon_device * rdev,bool enable)6608 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6609 bool enable)
6610 {
6611 u32 data, orig;
6612
6613 orig = data = RREG32(RLC_PG_CNTL);
6614 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6615 data |= STATIC_PER_CU_PG_ENABLE;
6616 else
6617 data &= ~STATIC_PER_CU_PG_ENABLE;
6618 if (orig != data)
6619 WREG32(RLC_PG_CNTL, data);
6620 }
6621
cik_enable_gfx_dynamic_mgpg(struct radeon_device * rdev,bool enable)6622 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6623 bool enable)
6624 {
6625 u32 data, orig;
6626
6627 orig = data = RREG32(RLC_PG_CNTL);
6628 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6629 data |= DYN_PER_CU_PG_ENABLE;
6630 else
6631 data &= ~DYN_PER_CU_PG_ENABLE;
6632 if (orig != data)
6633 WREG32(RLC_PG_CNTL, data);
6634 }
6635
6636 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6637 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6638
cik_init_gfx_cgpg(struct radeon_device * rdev)6639 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6640 {
6641 u32 data, orig;
6642 u32 i;
6643
6644 if (rdev->rlc.cs_data) {
6645 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6646 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6647 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6648 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6649 } else {
6650 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6651 for (i = 0; i < 3; i++)
6652 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6653 }
6654 if (rdev->rlc.reg_list) {
6655 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6656 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6657 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6658 }
6659
6660 orig = data = RREG32(RLC_PG_CNTL);
6661 data |= GFX_PG_SRC;
6662 if (orig != data)
6663 WREG32(RLC_PG_CNTL, data);
6664
6665 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6666 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6667
6668 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6669 data &= ~IDLE_POLL_COUNT_MASK;
6670 data |= IDLE_POLL_COUNT(0x60);
6671 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6672
6673 data = 0x10101010;
6674 WREG32(RLC_PG_DELAY, data);
6675
6676 data = RREG32(RLC_PG_DELAY_2);
6677 data &= ~0xff;
6678 data |= 0x3;
6679 WREG32(RLC_PG_DELAY_2, data);
6680
6681 data = RREG32(RLC_AUTO_PG_CTRL);
6682 data &= ~GRBM_REG_SGIT_MASK;
6683 data |= GRBM_REG_SGIT(0x700);
6684 WREG32(RLC_AUTO_PG_CTRL, data);
6685
6686 }
6687
cik_update_gfx_pg(struct radeon_device * rdev,bool enable)6688 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6689 {
6690 cik_enable_gfx_cgpg(rdev, enable);
6691 cik_enable_gfx_static_mgpg(rdev, enable);
6692 cik_enable_gfx_dynamic_mgpg(rdev, enable);
6693 }
6694
cik_get_csb_size(struct radeon_device * rdev)6695 u32 cik_get_csb_size(struct radeon_device *rdev)
6696 {
6697 u32 count = 0;
6698 const struct cs_section_def *sect = NULL;
6699 const struct cs_extent_def *ext = NULL;
6700
6701 if (rdev->rlc.cs_data == NULL)
6702 return 0;
6703
6704 /* begin clear state */
6705 count += 2;
6706 /* context control state */
6707 count += 3;
6708
6709 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6710 for (ext = sect->section; ext->extent != NULL; ++ext) {
6711 if (sect->id == SECT_CONTEXT)
6712 count += 2 + ext->reg_count;
6713 else
6714 return 0;
6715 }
6716 }
6717 /* pa_sc_raster_config/pa_sc_raster_config1 */
6718 count += 4;
6719 /* end clear state */
6720 count += 2;
6721 /* clear state */
6722 count += 2;
6723
6724 return count;
6725 }
6726
cik_get_csb_buffer(struct radeon_device * rdev,volatile u32 * buffer)6727 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6728 {
6729 u32 count = 0, i;
6730 const struct cs_section_def *sect = NULL;
6731 const struct cs_extent_def *ext = NULL;
6732
6733 if (rdev->rlc.cs_data == NULL)
6734 return;
6735 if (buffer == NULL)
6736 return;
6737
6738 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6739 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6740
6741 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6742 buffer[count++] = cpu_to_le32(0x80000000);
6743 buffer[count++] = cpu_to_le32(0x80000000);
6744
6745 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6746 for (ext = sect->section; ext->extent != NULL; ++ext) {
6747 if (sect->id == SECT_CONTEXT) {
6748 buffer[count++] =
6749 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6750 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6751 for (i = 0; i < ext->reg_count; i++)
6752 buffer[count++] = cpu_to_le32(ext->extent[i]);
6753 } else {
6754 return;
6755 }
6756 }
6757 }
6758
6759 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6760 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6761 switch (rdev->family) {
6762 case CHIP_BONAIRE:
6763 buffer[count++] = cpu_to_le32(0x16000012);
6764 buffer[count++] = cpu_to_le32(0x00000000);
6765 break;
6766 case CHIP_KAVERI:
6767 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6768 buffer[count++] = cpu_to_le32(0x00000000);
6769 break;
6770 case CHIP_KABINI:
6771 case CHIP_MULLINS:
6772 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6773 buffer[count++] = cpu_to_le32(0x00000000);
6774 break;
6775 case CHIP_HAWAII:
6776 buffer[count++] = cpu_to_le32(0x3a00161a);
6777 buffer[count++] = cpu_to_le32(0x0000002e);
6778 break;
6779 default:
6780 buffer[count++] = cpu_to_le32(0x00000000);
6781 buffer[count++] = cpu_to_le32(0x00000000);
6782 break;
6783 }
6784
6785 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6786 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6787
6788 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6789 buffer[count++] = cpu_to_le32(0);
6790 }
6791
cik_init_pg(struct radeon_device * rdev)6792 static void cik_init_pg(struct radeon_device *rdev)
6793 {
6794 if (rdev->pg_flags) {
6795 cik_enable_sck_slowdown_on_pu(rdev, true);
6796 cik_enable_sck_slowdown_on_pd(rdev, true);
6797 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6798 cik_init_gfx_cgpg(rdev);
6799 cik_enable_cp_pg(rdev, true);
6800 cik_enable_gds_pg(rdev, true);
6801 }
6802 cik_init_ao_cu_mask(rdev);
6803 cik_update_gfx_pg(rdev, true);
6804 }
6805 }
6806
cik_fini_pg(struct radeon_device * rdev)6807 static void cik_fini_pg(struct radeon_device *rdev)
6808 {
6809 if (rdev->pg_flags) {
6810 cik_update_gfx_pg(rdev, false);
6811 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6812 cik_enable_cp_pg(rdev, false);
6813 cik_enable_gds_pg(rdev, false);
6814 }
6815 }
6816 }
6817
6818 /*
6819 * Interrupts
6820 * Starting with r6xx, interrupts are handled via a ring buffer.
6821 * Ring buffers are areas of GPU accessible memory that the GPU
6822 * writes interrupt vectors into and the host reads vectors out of.
6823 * There is a rptr (read pointer) that determines where the
6824 * host is currently reading, and a wptr (write pointer)
6825 * which determines where the GPU has written. When the
6826 * pointers are equal, the ring is idle. When the GPU
6827 * writes vectors to the ring buffer, it increments the
6828 * wptr. When there is an interrupt, the host then starts
6829 * fetching commands and processing them until the pointers are
6830 * equal again at which point it updates the rptr.
6831 */
6832
6833 /**
6834 * cik_enable_interrupts - Enable the interrupt ring buffer
6835 *
6836 * @rdev: radeon_device pointer
6837 *
6838 * Enable the interrupt ring buffer (CIK).
6839 */
cik_enable_interrupts(struct radeon_device * rdev)6840 static void cik_enable_interrupts(struct radeon_device *rdev)
6841 {
6842 u32 ih_cntl = RREG32(IH_CNTL);
6843 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6844
6845 ih_cntl |= ENABLE_INTR;
6846 ih_rb_cntl |= IH_RB_ENABLE;
6847 WREG32(IH_CNTL, ih_cntl);
6848 WREG32(IH_RB_CNTL, ih_rb_cntl);
6849 rdev->ih.enabled = true;
6850 }
6851
6852 /**
6853 * cik_disable_interrupts - Disable the interrupt ring buffer
6854 *
6855 * @rdev: radeon_device pointer
6856 *
6857 * Disable the interrupt ring buffer (CIK).
6858 */
cik_disable_interrupts(struct radeon_device * rdev)6859 static void cik_disable_interrupts(struct radeon_device *rdev)
6860 {
6861 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6862 u32 ih_cntl = RREG32(IH_CNTL);
6863
6864 ih_rb_cntl &= ~IH_RB_ENABLE;
6865 ih_cntl &= ~ENABLE_INTR;
6866 WREG32(IH_RB_CNTL, ih_rb_cntl);
6867 WREG32(IH_CNTL, ih_cntl);
6868 /* set rptr, wptr to 0 */
6869 WREG32(IH_RB_RPTR, 0);
6870 WREG32(IH_RB_WPTR, 0);
6871 rdev->ih.enabled = false;
6872 rdev->ih.rptr = 0;
6873 }
6874
6875 /**
6876 * cik_disable_interrupt_state - Disable all interrupt sources
6877 *
6878 * @rdev: radeon_device pointer
6879 *
6880 * Clear all interrupt enable bits used by the driver (CIK).
6881 */
cik_disable_interrupt_state(struct radeon_device * rdev)6882 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6883 {
6884 u32 tmp;
6885
6886 /* gfx ring */
6887 tmp = RREG32(CP_INT_CNTL_RING0) &
6888 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6889 WREG32(CP_INT_CNTL_RING0, tmp);
6890 /* sdma */
6891 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6892 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6893 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6894 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6895 /* compute queues */
6896 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6897 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6898 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6899 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6900 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6901 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6902 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6903 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6904 /* grbm */
6905 WREG32(GRBM_INT_CNTL, 0);
6906 /* SRBM */
6907 WREG32(SRBM_INT_CNTL, 0);
6908 /* vline/vblank, etc. */
6909 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6910 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6911 if (rdev->num_crtc >= 4) {
6912 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6913 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6914 }
6915 if (rdev->num_crtc >= 6) {
6916 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6917 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6918 }
6919 /* pflip */
6920 if (rdev->num_crtc >= 2) {
6921 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6922 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6923 }
6924 if (rdev->num_crtc >= 4) {
6925 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6926 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6927 }
6928 if (rdev->num_crtc >= 6) {
6929 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6930 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6931 }
6932
6933 /* dac hotplug */
6934 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6935
6936 /* digital hotplug */
6937 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6938 WREG32(DC_HPD1_INT_CONTROL, tmp);
6939 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6940 WREG32(DC_HPD2_INT_CONTROL, tmp);
6941 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6942 WREG32(DC_HPD3_INT_CONTROL, tmp);
6943 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6944 WREG32(DC_HPD4_INT_CONTROL, tmp);
6945 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6946 WREG32(DC_HPD5_INT_CONTROL, tmp);
6947 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6948 WREG32(DC_HPD6_INT_CONTROL, tmp);
6949
6950 }
6951
6952 /**
6953 * cik_irq_init - init and enable the interrupt ring
6954 *
6955 * @rdev: radeon_device pointer
6956 *
6957 * Allocate a ring buffer for the interrupt controller,
6958 * enable the RLC, disable interrupts, enable the IH
6959 * ring buffer and enable it (CIK).
6960 * Called at device load and reume.
6961 * Returns 0 for success, errors for failure.
6962 */
cik_irq_init(struct radeon_device * rdev)6963 static int cik_irq_init(struct radeon_device *rdev)
6964 {
6965 int ret = 0;
6966 int rb_bufsz;
6967 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6968
6969 /* allocate ring */
6970 ret = r600_ih_ring_alloc(rdev);
6971 if (ret)
6972 return ret;
6973
6974 /* disable irqs */
6975 cik_disable_interrupts(rdev);
6976
6977 /* init rlc */
6978 ret = cik_rlc_resume(rdev);
6979 if (ret) {
6980 r600_ih_ring_fini(rdev);
6981 return ret;
6982 }
6983
6984 /* setup interrupt control */
6985 /* set dummy read address to dummy page address */
6986 WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6987 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6988 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6989 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6990 */
6991 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6992 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6993 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6994 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6995
6996 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6997 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6998
6999 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7000 IH_WPTR_OVERFLOW_CLEAR |
7001 (rb_bufsz << 1));
7002
7003 if (rdev->wb.enabled)
7004 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7005
7006 /* set the writeback address whether it's enabled or not */
7007 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7008 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7009
7010 WREG32(IH_RB_CNTL, ih_rb_cntl);
7011
7012 /* set rptr, wptr to 0 */
7013 WREG32(IH_RB_RPTR, 0);
7014 WREG32(IH_RB_WPTR, 0);
7015
7016 /* Default settings for IH_CNTL (disabled at first) */
7017 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7018 /* RPTR_REARM only works if msi's are enabled */
7019 if (rdev->msi_enabled)
7020 ih_cntl |= RPTR_REARM;
7021 WREG32(IH_CNTL, ih_cntl);
7022
7023 /* force the active interrupt state to all disabled */
7024 cik_disable_interrupt_state(rdev);
7025
7026 pci_set_master(rdev->pdev);
7027
7028 /* enable irqs */
7029 cik_enable_interrupts(rdev);
7030
7031 return ret;
7032 }
7033
7034 /**
7035 * cik_irq_set - enable/disable interrupt sources
7036 *
7037 * @rdev: radeon_device pointer
7038 *
7039 * Enable interrupt sources on the GPU (vblanks, hpd,
7040 * etc.) (CIK).
7041 * Returns 0 for success, errors for failure.
7042 */
cik_irq_set(struct radeon_device * rdev)7043 int cik_irq_set(struct radeon_device *rdev)
7044 {
7045 u32 cp_int_cntl;
7046 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7047 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7048 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7049 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7050 u32 grbm_int_cntl = 0;
7051 u32 dma_cntl, dma_cntl1;
7052
7053 if (!rdev->irq.installed) {
7054 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7055 return -EINVAL;
7056 }
7057 /* don't enable anything if the ih is disabled */
7058 if (!rdev->ih.enabled) {
7059 cik_disable_interrupts(rdev);
7060 /* force the active interrupt state to all disabled */
7061 cik_disable_interrupt_state(rdev);
7062 return 0;
7063 }
7064
7065 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7066 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7067 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7068
7069 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7070 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7071 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7072 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7073 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7074 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7075
7076 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7077 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7078
7079 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7080 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7081 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7082 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7083 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7084 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7085 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7086 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7087
7088 /* enable CP interrupts on all rings */
7089 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7090 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7091 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7092 }
7093 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7094 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7095 DRM_DEBUG("si_irq_set: sw int cp1\n");
7096 if (ring->me == 1) {
7097 switch (ring->pipe) {
7098 case 0:
7099 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7100 break;
7101 case 1:
7102 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7103 break;
7104 case 2:
7105 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7106 break;
7107 case 3:
7108 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7109 break;
7110 default:
7111 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7112 break;
7113 }
7114 } else if (ring->me == 2) {
7115 switch (ring->pipe) {
7116 case 0:
7117 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7118 break;
7119 case 1:
7120 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7121 break;
7122 case 2:
7123 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7124 break;
7125 case 3:
7126 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7127 break;
7128 default:
7129 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7130 break;
7131 }
7132 } else {
7133 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7134 }
7135 }
7136 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7137 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7138 DRM_DEBUG("si_irq_set: sw int cp2\n");
7139 if (ring->me == 1) {
7140 switch (ring->pipe) {
7141 case 0:
7142 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7143 break;
7144 case 1:
7145 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7146 break;
7147 case 2:
7148 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7149 break;
7150 case 3:
7151 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7152 break;
7153 default:
7154 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7155 break;
7156 }
7157 } else if (ring->me == 2) {
7158 switch (ring->pipe) {
7159 case 0:
7160 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7161 break;
7162 case 1:
7163 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7164 break;
7165 case 2:
7166 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7167 break;
7168 case 3:
7169 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7170 break;
7171 default:
7172 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7173 break;
7174 }
7175 } else {
7176 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7177 }
7178 }
7179
7180 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7181 DRM_DEBUG("cik_irq_set: sw int dma\n");
7182 dma_cntl |= TRAP_ENABLE;
7183 }
7184
7185 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7186 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7187 dma_cntl1 |= TRAP_ENABLE;
7188 }
7189
7190 if (rdev->irq.crtc_vblank_int[0] ||
7191 atomic_read(&rdev->irq.pflip[0])) {
7192 DRM_DEBUG("cik_irq_set: vblank 0\n");
7193 crtc1 |= VBLANK_INTERRUPT_MASK;
7194 }
7195 if (rdev->irq.crtc_vblank_int[1] ||
7196 atomic_read(&rdev->irq.pflip[1])) {
7197 DRM_DEBUG("cik_irq_set: vblank 1\n");
7198 crtc2 |= VBLANK_INTERRUPT_MASK;
7199 }
7200 if (rdev->irq.crtc_vblank_int[2] ||
7201 atomic_read(&rdev->irq.pflip[2])) {
7202 DRM_DEBUG("cik_irq_set: vblank 2\n");
7203 crtc3 |= VBLANK_INTERRUPT_MASK;
7204 }
7205 if (rdev->irq.crtc_vblank_int[3] ||
7206 atomic_read(&rdev->irq.pflip[3])) {
7207 DRM_DEBUG("cik_irq_set: vblank 3\n");
7208 crtc4 |= VBLANK_INTERRUPT_MASK;
7209 }
7210 if (rdev->irq.crtc_vblank_int[4] ||
7211 atomic_read(&rdev->irq.pflip[4])) {
7212 DRM_DEBUG("cik_irq_set: vblank 4\n");
7213 crtc5 |= VBLANK_INTERRUPT_MASK;
7214 }
7215 if (rdev->irq.crtc_vblank_int[5] ||
7216 atomic_read(&rdev->irq.pflip[5])) {
7217 DRM_DEBUG("cik_irq_set: vblank 5\n");
7218 crtc6 |= VBLANK_INTERRUPT_MASK;
7219 }
7220 if (rdev->irq.hpd[0]) {
7221 DRM_DEBUG("cik_irq_set: hpd 1\n");
7222 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7223 }
7224 if (rdev->irq.hpd[1]) {
7225 DRM_DEBUG("cik_irq_set: hpd 2\n");
7226 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7227 }
7228 if (rdev->irq.hpd[2]) {
7229 DRM_DEBUG("cik_irq_set: hpd 3\n");
7230 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7231 }
7232 if (rdev->irq.hpd[3]) {
7233 DRM_DEBUG("cik_irq_set: hpd 4\n");
7234 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7235 }
7236 if (rdev->irq.hpd[4]) {
7237 DRM_DEBUG("cik_irq_set: hpd 5\n");
7238 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7239 }
7240 if (rdev->irq.hpd[5]) {
7241 DRM_DEBUG("cik_irq_set: hpd 6\n");
7242 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7243 }
7244
7245 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7246
7247 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7248 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7249
7250 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7251 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7252 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7253 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7254 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7255 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7256 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7257 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7258
7259 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7260
7261 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7262 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7263 if (rdev->num_crtc >= 4) {
7264 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7265 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7266 }
7267 if (rdev->num_crtc >= 6) {
7268 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7269 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7270 }
7271
7272 if (rdev->num_crtc >= 2) {
7273 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7274 GRPH_PFLIP_INT_MASK);
7275 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7276 GRPH_PFLIP_INT_MASK);
7277 }
7278 if (rdev->num_crtc >= 4) {
7279 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7280 GRPH_PFLIP_INT_MASK);
7281 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7282 GRPH_PFLIP_INT_MASK);
7283 }
7284 if (rdev->num_crtc >= 6) {
7285 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7286 GRPH_PFLIP_INT_MASK);
7287 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7288 GRPH_PFLIP_INT_MASK);
7289 }
7290
7291 WREG32(DC_HPD1_INT_CONTROL, hpd1);
7292 WREG32(DC_HPD2_INT_CONTROL, hpd2);
7293 WREG32(DC_HPD3_INT_CONTROL, hpd3);
7294 WREG32(DC_HPD4_INT_CONTROL, hpd4);
7295 WREG32(DC_HPD5_INT_CONTROL, hpd5);
7296 WREG32(DC_HPD6_INT_CONTROL, hpd6);
7297
7298 /* posting read */
7299 RREG32(SRBM_STATUS);
7300
7301 return 0;
7302 }
7303
7304 /**
7305 * cik_irq_ack - ack interrupt sources
7306 *
7307 * @rdev: radeon_device pointer
7308 *
7309 * Ack interrupt sources on the GPU (vblanks, hpd,
7310 * etc.) (CIK). Certain interrupts sources are sw
7311 * generated and do not require an explicit ack.
7312 */
cik_irq_ack(struct radeon_device * rdev)7313 static inline void cik_irq_ack(struct radeon_device *rdev)
7314 {
7315 u32 tmp;
7316
7317 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7318 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7319 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7320 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7321 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7322 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7323 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7324
7325 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7326 EVERGREEN_CRTC0_REGISTER_OFFSET);
7327 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7328 EVERGREEN_CRTC1_REGISTER_OFFSET);
7329 if (rdev->num_crtc >= 4) {
7330 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7331 EVERGREEN_CRTC2_REGISTER_OFFSET);
7332 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7333 EVERGREEN_CRTC3_REGISTER_OFFSET);
7334 }
7335 if (rdev->num_crtc >= 6) {
7336 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7337 EVERGREEN_CRTC4_REGISTER_OFFSET);
7338 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7339 EVERGREEN_CRTC5_REGISTER_OFFSET);
7340 }
7341
7342 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7343 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7344 GRPH_PFLIP_INT_CLEAR);
7345 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7346 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7347 GRPH_PFLIP_INT_CLEAR);
7348 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7349 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7350 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7351 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7352 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7353 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7354 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7355 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7356
7357 if (rdev->num_crtc >= 4) {
7358 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7359 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7360 GRPH_PFLIP_INT_CLEAR);
7361 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7362 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7363 GRPH_PFLIP_INT_CLEAR);
7364 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7365 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7366 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7367 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7368 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7369 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7370 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7371 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7372 }
7373
7374 if (rdev->num_crtc >= 6) {
7375 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7376 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7377 GRPH_PFLIP_INT_CLEAR);
7378 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7379 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7380 GRPH_PFLIP_INT_CLEAR);
7381 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7382 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7383 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7384 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7385 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7386 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7387 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7388 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7389 }
7390
7391 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7392 tmp = RREG32(DC_HPD1_INT_CONTROL);
7393 tmp |= DC_HPDx_INT_ACK;
7394 WREG32(DC_HPD1_INT_CONTROL, tmp);
7395 }
7396 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7397 tmp = RREG32(DC_HPD2_INT_CONTROL);
7398 tmp |= DC_HPDx_INT_ACK;
7399 WREG32(DC_HPD2_INT_CONTROL, tmp);
7400 }
7401 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7402 tmp = RREG32(DC_HPD3_INT_CONTROL);
7403 tmp |= DC_HPDx_INT_ACK;
7404 WREG32(DC_HPD3_INT_CONTROL, tmp);
7405 }
7406 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7407 tmp = RREG32(DC_HPD4_INT_CONTROL);
7408 tmp |= DC_HPDx_INT_ACK;
7409 WREG32(DC_HPD4_INT_CONTROL, tmp);
7410 }
7411 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7412 tmp = RREG32(DC_HPD5_INT_CONTROL);
7413 tmp |= DC_HPDx_INT_ACK;
7414 WREG32(DC_HPD5_INT_CONTROL, tmp);
7415 }
7416 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7417 tmp = RREG32(DC_HPD6_INT_CONTROL);
7418 tmp |= DC_HPDx_INT_ACK;
7419 WREG32(DC_HPD6_INT_CONTROL, tmp);
7420 }
7421 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7422 tmp = RREG32(DC_HPD1_INT_CONTROL);
7423 tmp |= DC_HPDx_RX_INT_ACK;
7424 WREG32(DC_HPD1_INT_CONTROL, tmp);
7425 }
7426 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7427 tmp = RREG32(DC_HPD2_INT_CONTROL);
7428 tmp |= DC_HPDx_RX_INT_ACK;
7429 WREG32(DC_HPD2_INT_CONTROL, tmp);
7430 }
7431 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7432 tmp = RREG32(DC_HPD3_INT_CONTROL);
7433 tmp |= DC_HPDx_RX_INT_ACK;
7434 WREG32(DC_HPD3_INT_CONTROL, tmp);
7435 }
7436 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7437 tmp = RREG32(DC_HPD4_INT_CONTROL);
7438 tmp |= DC_HPDx_RX_INT_ACK;
7439 WREG32(DC_HPD4_INT_CONTROL, tmp);
7440 }
7441 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7442 tmp = RREG32(DC_HPD5_INT_CONTROL);
7443 tmp |= DC_HPDx_RX_INT_ACK;
7444 WREG32(DC_HPD5_INT_CONTROL, tmp);
7445 }
7446 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7447 tmp = RREG32(DC_HPD6_INT_CONTROL);
7448 tmp |= DC_HPDx_RX_INT_ACK;
7449 WREG32(DC_HPD6_INT_CONTROL, tmp);
7450 }
7451 }
7452
7453 /**
7454 * cik_irq_disable - disable interrupts
7455 *
7456 * @rdev: radeon_device pointer
7457 *
7458 * Disable interrupts on the hw (CIK).
7459 */
cik_irq_disable(struct radeon_device * rdev)7460 static void cik_irq_disable(struct radeon_device *rdev)
7461 {
7462 cik_disable_interrupts(rdev);
7463 /* Wait and acknowledge irq */
7464 mdelay(1);
7465 cik_irq_ack(rdev);
7466 cik_disable_interrupt_state(rdev);
7467 }
7468
7469 /**
7470 * cik_irq_disable - disable interrupts for suspend
7471 *
7472 * @rdev: radeon_device pointer
7473 *
7474 * Disable interrupts and stop the RLC (CIK).
7475 * Used for suspend.
7476 */
cik_irq_suspend(struct radeon_device * rdev)7477 static void cik_irq_suspend(struct radeon_device *rdev)
7478 {
7479 cik_irq_disable(rdev);
7480 cik_rlc_stop(rdev);
7481 }
7482
7483 /**
7484 * cik_irq_fini - tear down interrupt support
7485 *
7486 * @rdev: radeon_device pointer
7487 *
7488 * Disable interrupts on the hw and free the IH ring
7489 * buffer (CIK).
7490 * Used for driver unload.
7491 */
cik_irq_fini(struct radeon_device * rdev)7492 static void cik_irq_fini(struct radeon_device *rdev)
7493 {
7494 cik_irq_suspend(rdev);
7495 r600_ih_ring_fini(rdev);
7496 }
7497
7498 /**
7499 * cik_get_ih_wptr - get the IH ring buffer wptr
7500 *
7501 * @rdev: radeon_device pointer
7502 *
7503 * Get the IH ring buffer wptr from either the register
7504 * or the writeback memory buffer (CIK). Also check for
7505 * ring buffer overflow and deal with it.
7506 * Used by cik_irq_process().
7507 * Returns the value of the wptr.
7508 */
cik_get_ih_wptr(struct radeon_device * rdev)7509 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7510 {
7511 u32 wptr, tmp;
7512
7513 if (rdev->wb.enabled)
7514 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7515 else
7516 wptr = RREG32(IH_RB_WPTR);
7517
7518 if (wptr & RB_OVERFLOW) {
7519 wptr &= ~RB_OVERFLOW;
7520 /* When a ring buffer overflow happen start parsing interrupt
7521 * from the last not overwritten vector (wptr + 16). Hopefully
7522 * this should allow us to catchup.
7523 */
7524 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7525 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7526 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7527 tmp = RREG32(IH_RB_CNTL);
7528 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7529 WREG32(IH_RB_CNTL, tmp);
7530 }
7531 return (wptr & rdev->ih.ptr_mask);
7532 }
7533
7534 /* CIK IV Ring
7535 * Each IV ring entry is 128 bits:
7536 * [7:0] - interrupt source id
7537 * [31:8] - reserved
7538 * [59:32] - interrupt source data
7539 * [63:60] - reserved
7540 * [71:64] - RINGID
7541 * CP:
7542 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7543 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7544 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7545 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7546 * PIPE_ID - ME0 0=3D
7547 * - ME1&2 compute dispatcher (4 pipes each)
7548 * SDMA:
7549 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7550 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7551 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7552 * [79:72] - VMID
7553 * [95:80] - PASID
7554 * [127:96] - reserved
7555 */
7556 /**
7557 * cik_irq_process - interrupt handler
7558 *
7559 * @rdev: radeon_device pointer
7560 *
7561 * Interrupt hander (CIK). Walk the IH ring,
7562 * ack interrupts and schedule work to handle
7563 * interrupt events.
7564 * Returns irq process return code.
7565 */
cik_irq_process(struct radeon_device * rdev)7566 int cik_irq_process(struct radeon_device *rdev)
7567 {
7568 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7569 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7570 u32 wptr;
7571 u32 rptr;
7572 u32 src_id, src_data, ring_id;
7573 u8 me_id, pipe_id, queue_id;
7574 u32 ring_index;
7575 bool queue_hotplug = false;
7576 bool queue_dp = false;
7577 bool queue_reset = false;
7578 u32 addr, status, mc_client;
7579 bool queue_thermal = false;
7580
7581 if (!rdev->ih.enabled || rdev->shutdown)
7582 return IRQ_NONE;
7583
7584 wptr = cik_get_ih_wptr(rdev);
7585
7586 restart_ih:
7587 /* is somebody else already processing irqs? */
7588 if (atomic_xchg(&rdev->ih.lock, 1))
7589 return IRQ_NONE;
7590
7591 rptr = rdev->ih.rptr;
7592 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7593
7594 /* Order reading of wptr vs. reading of IH ring data */
7595 rmb();
7596
7597 /* display interrupts */
7598 cik_irq_ack(rdev);
7599
7600 while (rptr != wptr) {
7601 /* wptr/rptr are in bytes! */
7602 ring_index = rptr / 4;
7603
7604 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7605 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7606 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7607
7608 switch (src_id) {
7609 case 1: /* D1 vblank/vline */
7610 switch (src_data) {
7611 case 0: /* D1 vblank */
7612 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7613 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7614
7615 if (rdev->irq.crtc_vblank_int[0]) {
7616 drm_handle_vblank(rdev->ddev, 0);
7617 #ifdef __NetBSD__
7618 spin_lock(&rdev->irq.vblank_lock);
7619 rdev->pm.vblank_sync = true;
7620 DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7621 spin_unlock(&rdev->irq.vblank_lock);
7622 #else
7623 rdev->pm.vblank_sync = true;
7624 wake_up(&rdev->irq.vblank_queue);
7625 #endif
7626 }
7627 if (atomic_read(&rdev->irq.pflip[0]))
7628 radeon_crtc_handle_vblank(rdev, 0);
7629 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7630 DRM_DEBUG("IH: D1 vblank\n");
7631
7632 break;
7633 case 1: /* D1 vline */
7634 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7635 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7636
7637 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7638 DRM_DEBUG("IH: D1 vline\n");
7639
7640 break;
7641 default:
7642 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7643 break;
7644 }
7645 break;
7646 case 2: /* D2 vblank/vline */
7647 switch (src_data) {
7648 case 0: /* D2 vblank */
7649 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7650 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7651
7652 if (rdev->irq.crtc_vblank_int[1]) {
7653 drm_handle_vblank(rdev->ddev, 1);
7654 #ifdef __NetBSD__
7655 spin_lock(&rdev->irq.vblank_lock);
7656 rdev->pm.vblank_sync = true;
7657 DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7658 spin_unlock(&rdev->irq.vblank_lock);
7659 #else
7660 rdev->pm.vblank_sync = true;
7661 wake_up(&rdev->irq.vblank_queue);
7662 #endif
7663 }
7664 if (atomic_read(&rdev->irq.pflip[1]))
7665 radeon_crtc_handle_vblank(rdev, 1);
7666 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7667 DRM_DEBUG("IH: D2 vblank\n");
7668
7669 break;
7670 case 1: /* D2 vline */
7671 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7672 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7673
7674 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7675 DRM_DEBUG("IH: D2 vline\n");
7676
7677 break;
7678 default:
7679 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7680 break;
7681 }
7682 break;
7683 case 3: /* D3 vblank/vline */
7684 switch (src_data) {
7685 case 0: /* D3 vblank */
7686 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7687 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7688
7689 if (rdev->irq.crtc_vblank_int[2]) {
7690 drm_handle_vblank(rdev->ddev, 2);
7691 #ifdef __NetBSD__
7692 spin_lock(&rdev->irq.vblank_lock);
7693 rdev->pm.vblank_sync = true;
7694 DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7695 spin_unlock(&rdev->irq.vblank_lock);
7696 #else
7697 rdev->pm.vblank_sync = true;
7698 wake_up(&rdev->irq.vblank_queue);
7699 #endif
7700 }
7701 if (atomic_read(&rdev->irq.pflip[2]))
7702 radeon_crtc_handle_vblank(rdev, 2);
7703 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7704 DRM_DEBUG("IH: D3 vblank\n");
7705
7706 break;
7707 case 1: /* D3 vline */
7708 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7709 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7710
7711 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7712 DRM_DEBUG("IH: D3 vline\n");
7713
7714 break;
7715 default:
7716 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7717 break;
7718 }
7719 break;
7720 case 4: /* D4 vblank/vline */
7721 switch (src_data) {
7722 case 0: /* D4 vblank */
7723 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7724 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7725
7726 if (rdev->irq.crtc_vblank_int[3]) {
7727 drm_handle_vblank(rdev->ddev, 3);
7728 #ifdef __NetBSD__
7729 spin_lock(&rdev->irq.vblank_lock);
7730 rdev->pm.vblank_sync = true;
7731 DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7732 spin_unlock(&rdev->irq.vblank_lock);
7733 #else
7734 rdev->pm.vblank_sync = true;
7735 wake_up(&rdev->irq.vblank_queue);
7736 #endif
7737 }
7738 if (atomic_read(&rdev->irq.pflip[3]))
7739 radeon_crtc_handle_vblank(rdev, 3);
7740 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7741 DRM_DEBUG("IH: D4 vblank\n");
7742
7743 break;
7744 case 1: /* D4 vline */
7745 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7746 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7747
7748 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7749 DRM_DEBUG("IH: D4 vline\n");
7750
7751 break;
7752 default:
7753 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7754 break;
7755 }
7756 break;
7757 case 5: /* D5 vblank/vline */
7758 switch (src_data) {
7759 case 0: /* D5 vblank */
7760 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7761 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7762
7763 if (rdev->irq.crtc_vblank_int[4]) {
7764 drm_handle_vblank(rdev->ddev, 4);
7765 #ifdef __NetBSD__
7766 spin_lock(&rdev->irq.vblank_lock);
7767 rdev->pm.vblank_sync = true;
7768 DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7769 spin_unlock(&rdev->irq.vblank_lock);
7770 #else
7771 rdev->pm.vblank_sync = true;
7772 wake_up(&rdev->irq.vblank_queue);
7773 #endif
7774 }
7775 if (atomic_read(&rdev->irq.pflip[4]))
7776 radeon_crtc_handle_vblank(rdev, 4);
7777 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7778 DRM_DEBUG("IH: D5 vblank\n");
7779
7780 break;
7781 case 1: /* D5 vline */
7782 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7783 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7784
7785 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7786 DRM_DEBUG("IH: D5 vline\n");
7787
7788 break;
7789 default:
7790 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7791 break;
7792 }
7793 break;
7794 case 6: /* D6 vblank/vline */
7795 switch (src_data) {
7796 case 0: /* D6 vblank */
7797 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7798 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7799
7800 if (rdev->irq.crtc_vblank_int[5]) {
7801 drm_handle_vblank(rdev->ddev, 5);
7802 #ifdef __NetBSD__
7803 spin_lock(&rdev->irq.vblank_lock);
7804 rdev->pm.vblank_sync = true;
7805 DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
7806 spin_unlock(&rdev->irq.vblank_lock);
7807 #else
7808 rdev->pm.vblank_sync = true;
7809 wake_up(&rdev->irq.vblank_queue);
7810 #endif
7811 }
7812 if (atomic_read(&rdev->irq.pflip[5]))
7813 radeon_crtc_handle_vblank(rdev, 5);
7814 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7815 DRM_DEBUG("IH: D6 vblank\n");
7816
7817 break;
7818 case 1: /* D6 vline */
7819 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7820 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821
7822 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7823 DRM_DEBUG("IH: D6 vline\n");
7824
7825 break;
7826 default:
7827 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7828 break;
7829 }
7830 break;
7831 case 8: /* D1 page flip */
7832 case 10: /* D2 page flip */
7833 case 12: /* D3 page flip */
7834 case 14: /* D4 page flip */
7835 case 16: /* D5 page flip */
7836 case 18: /* D6 page flip */
7837 DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7838 if (radeon_use_pflipirq > 0)
7839 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7840 break;
7841 case 42: /* HPD hotplug */
7842 switch (src_data) {
7843 case 0:
7844 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7845 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7846
7847 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7848 queue_hotplug = true;
7849 DRM_DEBUG("IH: HPD1\n");
7850
7851 break;
7852 case 1:
7853 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7854 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7855
7856 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7857 queue_hotplug = true;
7858 DRM_DEBUG("IH: HPD2\n");
7859
7860 break;
7861 case 2:
7862 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7863 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7864
7865 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7866 queue_hotplug = true;
7867 DRM_DEBUG("IH: HPD3\n");
7868
7869 break;
7870 case 3:
7871 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7872 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7873
7874 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7875 queue_hotplug = true;
7876 DRM_DEBUG("IH: HPD4\n");
7877
7878 break;
7879 case 4:
7880 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7881 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7882
7883 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7884 queue_hotplug = true;
7885 DRM_DEBUG("IH: HPD5\n");
7886
7887 break;
7888 case 5:
7889 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7890 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7891
7892 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7893 queue_hotplug = true;
7894 DRM_DEBUG("IH: HPD6\n");
7895
7896 break;
7897 case 6:
7898 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7899 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7900
7901 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7902 queue_dp = true;
7903 DRM_DEBUG("IH: HPD_RX 1\n");
7904
7905 break;
7906 case 7:
7907 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7908 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7909
7910 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7911 queue_dp = true;
7912 DRM_DEBUG("IH: HPD_RX 2\n");
7913
7914 break;
7915 case 8:
7916 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7917 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7918
7919 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7920 queue_dp = true;
7921 DRM_DEBUG("IH: HPD_RX 3\n");
7922
7923 break;
7924 case 9:
7925 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7926 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7927
7928 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7929 queue_dp = true;
7930 DRM_DEBUG("IH: HPD_RX 4\n");
7931
7932 break;
7933 case 10:
7934 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7935 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7936
7937 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7938 queue_dp = true;
7939 DRM_DEBUG("IH: HPD_RX 5\n");
7940
7941 break;
7942 case 11:
7943 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7944 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7945
7946 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7947 queue_dp = true;
7948 DRM_DEBUG("IH: HPD_RX 6\n");
7949
7950 break;
7951 default:
7952 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7953 break;
7954 }
7955 break;
7956 case 96:
7957 DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7958 WREG32(SRBM_INT_ACK, 0x1);
7959 break;
7960 case 124: /* UVD */
7961 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7962 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7963 break;
7964 case 146:
7965 case 147:
7966 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7967 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7968 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7969 /* reset addr and status */
7970 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7971 if (addr == 0x0 && status == 0x0)
7972 break;
7973 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7974 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7975 addr);
7976 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7977 status);
7978 cik_vm_decode_fault(rdev, status, addr, mc_client);
7979 break;
7980 case 167: /* VCE */
7981 DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7982 switch (src_data) {
7983 case 0:
7984 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7985 break;
7986 case 1:
7987 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7988 break;
7989 default:
7990 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7991 break;
7992 }
7993 break;
7994 case 176: /* GFX RB CP_INT */
7995 case 177: /* GFX IB CP_INT */
7996 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7997 break;
7998 case 181: /* CP EOP event */
7999 DRM_DEBUG("IH: CP EOP\n");
8000 /* XXX check the bitfield order! */
8001 me_id = (ring_id & 0x60) >> 5;
8002 pipe_id = (ring_id & 0x18) >> 3;
8003 queue_id = (ring_id & 0x7) >> 0;
8004 switch (me_id) {
8005 case 0:
8006 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8007 break;
8008 case 1:
8009 case 2:
8010 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8011 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8012 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8013 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8014 break;
8015 }
8016 break;
8017 case 184: /* CP Privileged reg access */
8018 DRM_ERROR("Illegal register access in command stream\n");
8019 /* XXX check the bitfield order! */
8020 me_id = (ring_id & 0x60) >> 5;
8021 pipe_id = (ring_id & 0x18) >> 3;
8022 queue_id = (ring_id & 0x7) >> 0;
8023 switch (me_id) {
8024 case 0:
8025 /* This results in a full GPU reset, but all we need to do is soft
8026 * reset the CP for gfx
8027 */
8028 queue_reset = true;
8029 break;
8030 case 1:
8031 /* XXX compute */
8032 queue_reset = true;
8033 break;
8034 case 2:
8035 /* XXX compute */
8036 queue_reset = true;
8037 break;
8038 }
8039 break;
8040 case 185: /* CP Privileged inst */
8041 DRM_ERROR("Illegal instruction in command stream\n");
8042 /* XXX check the bitfield order! */
8043 me_id = (ring_id & 0x60) >> 5;
8044 pipe_id = (ring_id & 0x18) >> 3;
8045 queue_id = (ring_id & 0x7) >> 0;
8046 switch (me_id) {
8047 case 0:
8048 /* This results in a full GPU reset, but all we need to do is soft
8049 * reset the CP for gfx
8050 */
8051 queue_reset = true;
8052 break;
8053 case 1:
8054 /* XXX compute */
8055 queue_reset = true;
8056 break;
8057 case 2:
8058 /* XXX compute */
8059 queue_reset = true;
8060 break;
8061 }
8062 break;
8063 case 224: /* SDMA trap event */
8064 /* XXX check the bitfield order! */
8065 me_id = (ring_id & 0x3) >> 0;
8066 queue_id = (ring_id & 0xc) >> 2;
8067 DRM_DEBUG("IH: SDMA trap\n");
8068 switch (me_id) {
8069 case 0:
8070 switch (queue_id) {
8071 case 0:
8072 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8073 break;
8074 case 1:
8075 /* XXX compute */
8076 break;
8077 case 2:
8078 /* XXX compute */
8079 break;
8080 }
8081 break;
8082 case 1:
8083 switch (queue_id) {
8084 case 0:
8085 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8086 break;
8087 case 1:
8088 /* XXX compute */
8089 break;
8090 case 2:
8091 /* XXX compute */
8092 break;
8093 }
8094 break;
8095 }
8096 break;
8097 case 230: /* thermal low to high */
8098 DRM_DEBUG("IH: thermal low to high\n");
8099 rdev->pm.dpm.thermal.high_to_low = false;
8100 queue_thermal = true;
8101 break;
8102 case 231: /* thermal high to low */
8103 DRM_DEBUG("IH: thermal high to low\n");
8104 rdev->pm.dpm.thermal.high_to_low = true;
8105 queue_thermal = true;
8106 break;
8107 case 233: /* GUI IDLE */
8108 DRM_DEBUG("IH: GUI idle\n");
8109 break;
8110 case 241: /* SDMA Privileged inst */
8111 case 247: /* SDMA Privileged inst */
8112 DRM_ERROR("Illegal instruction in SDMA command stream\n");
8113 /* XXX check the bitfield order! */
8114 me_id = (ring_id & 0x3) >> 0;
8115 queue_id = (ring_id & 0xc) >> 2;
8116 switch (me_id) {
8117 case 0:
8118 switch (queue_id) {
8119 case 0:
8120 queue_reset = true;
8121 break;
8122 case 1:
8123 /* XXX compute */
8124 queue_reset = true;
8125 break;
8126 case 2:
8127 /* XXX compute */
8128 queue_reset = true;
8129 break;
8130 }
8131 break;
8132 case 1:
8133 switch (queue_id) {
8134 case 0:
8135 queue_reset = true;
8136 break;
8137 case 1:
8138 /* XXX compute */
8139 queue_reset = true;
8140 break;
8141 case 2:
8142 /* XXX compute */
8143 queue_reset = true;
8144 break;
8145 }
8146 break;
8147 }
8148 break;
8149 default:
8150 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8151 break;
8152 }
8153
8154 /* wptr/rptr are in bytes! */
8155 rptr += 16;
8156 rptr &= rdev->ih.ptr_mask;
8157 WREG32(IH_RB_RPTR, rptr);
8158 }
8159 if (queue_dp)
8160 schedule_work(&rdev->dp_work);
8161 if (queue_hotplug)
8162 schedule_delayed_work(&rdev->hotplug_work, 0);
8163 if (queue_reset) {
8164 #ifdef __NetBSD__
8165 spin_lock(&rdev->fence_lock);
8166 rdev->needs_reset = true;
8167 radeon_fence_wakeup_locked(rdev);
8168 spin_unlock(&rdev->fence_lock);
8169 #else
8170 rdev->needs_reset = true;
8171 wake_up_all(&rdev->fence_queue);
8172 #endif
8173 }
8174 if (queue_thermal)
8175 schedule_work(&rdev->pm.dpm.thermal.work);
8176 rdev->ih.rptr = rptr;
8177 atomic_set(&rdev->ih.lock, 0);
8178
8179 /* make sure wptr hasn't changed while processing */
8180 wptr = cik_get_ih_wptr(rdev);
8181 if (wptr != rptr)
8182 goto restart_ih;
8183
8184 return IRQ_HANDLED;
8185 }
8186
8187 /*
8188 * startup/shutdown callbacks
8189 */
cik_uvd_init(struct radeon_device * rdev)8190 static void cik_uvd_init(struct radeon_device *rdev)
8191 {
8192 int r;
8193
8194 if (!rdev->has_uvd)
8195 return;
8196
8197 r = radeon_uvd_init(rdev);
8198 if (r) {
8199 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8200 /*
8201 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8202 * to early fails cik_uvd_start() and thus nothing happens
8203 * there. So it is pointless to try to go through that code
8204 * hence why we disable uvd here.
8205 */
8206 rdev->has_uvd = false;
8207 return;
8208 }
8209 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8210 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8211 }
8212
cik_uvd_start(struct radeon_device * rdev)8213 static void cik_uvd_start(struct radeon_device *rdev)
8214 {
8215 int r;
8216
8217 if (!rdev->has_uvd)
8218 return;
8219
8220 r = radeon_uvd_resume(rdev);
8221 if (r) {
8222 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8223 goto error;
8224 }
8225 r = uvd_v4_2_resume(rdev);
8226 if (r) {
8227 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8228 goto error;
8229 }
8230 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8231 if (r) {
8232 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8233 goto error;
8234 }
8235 return;
8236
8237 error:
8238 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8239 }
8240
cik_uvd_resume(struct radeon_device * rdev)8241 static void cik_uvd_resume(struct radeon_device *rdev)
8242 {
8243 struct radeon_ring *ring;
8244 int r;
8245
8246 if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8247 return;
8248
8249 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8250 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8251 if (r) {
8252 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8253 return;
8254 }
8255 r = uvd_v1_0_init(rdev);
8256 if (r) {
8257 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8258 return;
8259 }
8260 }
8261
cik_vce_init(struct radeon_device * rdev)8262 static void cik_vce_init(struct radeon_device *rdev)
8263 {
8264 int r;
8265
8266 if (!rdev->has_vce)
8267 return;
8268
8269 r = radeon_vce_init(rdev);
8270 if (r) {
8271 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8272 /*
8273 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8274 * to early fails cik_vce_start() and thus nothing happens
8275 * there. So it is pointless to try to go through that code
8276 * hence why we disable vce here.
8277 */
8278 rdev->has_vce = false;
8279 return;
8280 }
8281 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8282 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8283 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8284 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8285 }
8286
cik_vce_start(struct radeon_device * rdev)8287 static void cik_vce_start(struct radeon_device *rdev)
8288 {
8289 int r;
8290
8291 if (!rdev->has_vce)
8292 return;
8293
8294 r = radeon_vce_resume(rdev);
8295 if (r) {
8296 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8297 goto error;
8298 }
8299 r = vce_v2_0_resume(rdev);
8300 if (r) {
8301 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8302 goto error;
8303 }
8304 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8305 if (r) {
8306 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8307 goto error;
8308 }
8309 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8310 if (r) {
8311 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8312 goto error;
8313 }
8314 return;
8315
8316 error:
8317 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8318 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8319 }
8320
cik_vce_resume(struct radeon_device * rdev)8321 static void cik_vce_resume(struct radeon_device *rdev)
8322 {
8323 struct radeon_ring *ring;
8324 int r;
8325
8326 if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8327 return;
8328
8329 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8330 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8331 if (r) {
8332 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8333 return;
8334 }
8335 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8336 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8337 if (r) {
8338 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8339 return;
8340 }
8341 r = vce_v1_0_init(rdev);
8342 if (r) {
8343 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8344 return;
8345 }
8346 }
8347
8348 /**
8349 * cik_startup - program the asic to a functional state
8350 *
8351 * @rdev: radeon_device pointer
8352 *
8353 * Programs the asic to a functional state (CIK).
8354 * Called by cik_init() and cik_resume().
8355 * Returns 0 for success, error for failure.
8356 */
cik_startup(struct radeon_device * rdev)8357 static int cik_startup(struct radeon_device *rdev)
8358 {
8359 struct radeon_ring *ring;
8360 u32 nop;
8361 int r;
8362
8363 /* enable pcie gen2/3 link */
8364 cik_pcie_gen3_enable(rdev);
8365 /* enable aspm */
8366 cik_program_aspm(rdev);
8367
8368 /* scratch needs to be initialized before MC */
8369 r = r600_vram_scratch_init(rdev);
8370 if (r)
8371 return r;
8372
8373 cik_mc_program(rdev);
8374
8375 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8376 r = ci_mc_load_microcode(rdev);
8377 if (r) {
8378 DRM_ERROR("Failed to load MC firmware!\n");
8379 return r;
8380 }
8381 }
8382
8383 r = cik_pcie_gart_enable(rdev);
8384 if (r)
8385 return r;
8386 cik_gpu_init(rdev);
8387
8388 /* allocate rlc buffers */
8389 if (rdev->flags & RADEON_IS_IGP) {
8390 if (rdev->family == CHIP_KAVERI) {
8391 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8392 rdev->rlc.reg_list_size =
8393 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8394 } else {
8395 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8396 rdev->rlc.reg_list_size =
8397 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8398 }
8399 }
8400 rdev->rlc.cs_data = ci_cs_data;
8401 rdev->rlc.cp_table_size = round_up(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8402 rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8403 r = sumo_rlc_init(rdev);
8404 if (r) {
8405 DRM_ERROR("Failed to init rlc BOs!\n");
8406 return r;
8407 }
8408
8409 /* allocate wb buffer */
8410 r = radeon_wb_init(rdev);
8411 if (r)
8412 return r;
8413
8414 /* allocate mec buffers */
8415 r = cik_mec_init(rdev);
8416 if (r) {
8417 DRM_ERROR("Failed to init MEC BOs!\n");
8418 return r;
8419 }
8420
8421 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8422 if (r) {
8423 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8424 return r;
8425 }
8426
8427 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8428 if (r) {
8429 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8430 return r;
8431 }
8432
8433 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8434 if (r) {
8435 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8436 return r;
8437 }
8438
8439 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8440 if (r) {
8441 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8442 return r;
8443 }
8444
8445 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8446 if (r) {
8447 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8448 return r;
8449 }
8450
8451 cik_uvd_start(rdev);
8452 cik_vce_start(rdev);
8453
8454 /* Enable IRQ */
8455 if (!rdev->irq.installed) {
8456 r = radeon_irq_kms_init(rdev);
8457 if (r)
8458 return r;
8459 }
8460
8461 r = cik_irq_init(rdev);
8462 if (r) {
8463 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8464 radeon_irq_kms_fini(rdev);
8465 return r;
8466 }
8467 cik_irq_set(rdev);
8468
8469 if (rdev->family == CHIP_HAWAII) {
8470 if (rdev->new_fw)
8471 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8472 else
8473 nop = RADEON_CP_PACKET2;
8474 } else {
8475 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8476 }
8477
8478 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8479 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8480 nop);
8481 if (r)
8482 return r;
8483
8484 /* set up the compute queues */
8485 /* type-2 packets are deprecated on MEC, use type-3 instead */
8486 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8487 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8488 nop);
8489 if (r)
8490 return r;
8491 ring->me = 1; /* first MEC */
8492 ring->pipe = 0; /* first pipe */
8493 ring->queue = 0; /* first queue */
8494 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8495
8496 /* type-2 packets are deprecated on MEC, use type-3 instead */
8497 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8498 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8499 nop);
8500 if (r)
8501 return r;
8502 /* dGPU only have 1 MEC */
8503 ring->me = 1; /* first MEC */
8504 ring->pipe = 0; /* first pipe */
8505 ring->queue = 1; /* second queue */
8506 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8507
8508 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8509 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8510 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8511 if (r)
8512 return r;
8513
8514 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8515 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8516 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8517 if (r)
8518 return r;
8519
8520 r = cik_cp_resume(rdev);
8521 if (r)
8522 return r;
8523
8524 r = cik_sdma_resume(rdev);
8525 if (r)
8526 return r;
8527
8528 cik_uvd_resume(rdev);
8529 cik_vce_resume(rdev);
8530
8531 r = radeon_ib_pool_init(rdev);
8532 if (r) {
8533 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8534 return r;
8535 }
8536
8537 r = radeon_vm_manager_init(rdev);
8538 if (r) {
8539 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8540 return r;
8541 }
8542
8543 r = radeon_audio_init(rdev);
8544 if (r)
8545 return r;
8546
8547 return 0;
8548 }
8549
8550 /**
8551 * cik_resume - resume the asic to a functional state
8552 *
8553 * @rdev: radeon_device pointer
8554 *
8555 * Programs the asic to a functional state (CIK).
8556 * Called at resume.
8557 * Returns 0 for success, error for failure.
8558 */
cik_resume(struct radeon_device * rdev)8559 int cik_resume(struct radeon_device *rdev)
8560 {
8561 int r;
8562
8563 /* post card */
8564 atom_asic_init(rdev->mode_info.atom_context);
8565
8566 /* init golden registers */
8567 cik_init_golden_registers(rdev);
8568
8569 if (rdev->pm.pm_method == PM_METHOD_DPM)
8570 radeon_pm_resume(rdev);
8571
8572 rdev->accel_working = true;
8573 r = cik_startup(rdev);
8574 if (r) {
8575 DRM_ERROR("cik startup failed on resume\n");
8576 rdev->accel_working = false;
8577 return r;
8578 }
8579
8580 return r;
8581
8582 }
8583
8584 /**
8585 * cik_suspend - suspend the asic
8586 *
8587 * @rdev: radeon_device pointer
8588 *
8589 * Bring the chip into a state suitable for suspend (CIK).
8590 * Called at suspend.
8591 * Returns 0 for success.
8592 */
cik_suspend(struct radeon_device * rdev)8593 int cik_suspend(struct radeon_device *rdev)
8594 {
8595 radeon_pm_suspend(rdev);
8596 radeon_audio_fini(rdev);
8597 radeon_vm_manager_fini(rdev);
8598 cik_cp_enable(rdev, false);
8599 cik_sdma_enable(rdev, false);
8600 if (rdev->has_uvd) {
8601 uvd_v1_0_fini(rdev);
8602 radeon_uvd_suspend(rdev);
8603 }
8604 if (rdev->has_vce)
8605 radeon_vce_suspend(rdev);
8606 cik_fini_pg(rdev);
8607 cik_fini_cg(rdev);
8608 cik_irq_suspend(rdev);
8609 radeon_wb_disable(rdev);
8610 cik_pcie_gart_disable(rdev);
8611 return 0;
8612 }
8613
8614 /* Plan is to move initialization in that function and use
8615 * helper function so that radeon_device_init pretty much
8616 * do nothing more than calling asic specific function. This
8617 * should also allow to remove a bunch of callback function
8618 * like vram_info.
8619 */
8620 /**
8621 * cik_init - asic specific driver and hw init
8622 *
8623 * @rdev: radeon_device pointer
8624 *
8625 * Setup asic specific driver variables and program the hw
8626 * to a functional state (CIK).
8627 * Called at driver startup.
8628 * Returns 0 for success, errors for failure.
8629 */
cik_init(struct radeon_device * rdev)8630 int cik_init(struct radeon_device *rdev)
8631 {
8632 struct radeon_ring *ring;
8633 int r;
8634
8635 /* Read BIOS */
8636 if (!radeon_get_bios(rdev)) {
8637 if (ASIC_IS_AVIVO(rdev))
8638 return -EINVAL;
8639 }
8640 /* Must be an ATOMBIOS */
8641 if (!rdev->is_atom_bios) {
8642 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8643 return -EINVAL;
8644 }
8645 r = radeon_atombios_init(rdev);
8646 if (r)
8647 return r;
8648
8649 /* Post card if necessary */
8650 if (!radeon_card_posted(rdev)) {
8651 if (!rdev->bios) {
8652 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8653 return -EINVAL;
8654 }
8655 DRM_INFO("GPU not posted. posting now...\n");
8656 atom_asic_init(rdev->mode_info.atom_context);
8657 }
8658 /* init golden registers */
8659 cik_init_golden_registers(rdev);
8660 /* Initialize scratch registers */
8661 cik_scratch_init(rdev);
8662 /* Initialize surface registers */
8663 radeon_surface_init(rdev);
8664 /* Initialize clocks */
8665 radeon_get_clock_info(rdev->ddev);
8666
8667 /* Fence driver */
8668 r = radeon_fence_driver_init(rdev);
8669 if (r)
8670 return r;
8671
8672 /* initialize memory controller */
8673 r = cik_mc_init(rdev);
8674 if (r)
8675 return r;
8676 /* Memory manager */
8677 r = radeon_bo_init(rdev);
8678 if (r)
8679 return r;
8680
8681 if (rdev->flags & RADEON_IS_IGP) {
8682 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8683 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8684 r = cik_init_microcode(rdev);
8685 if (r) {
8686 DRM_ERROR("Failed to load firmware!\n");
8687 return r;
8688 }
8689 }
8690 } else {
8691 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8692 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8693 !rdev->mc_fw) {
8694 r = cik_init_microcode(rdev);
8695 if (r) {
8696 DRM_ERROR("Failed to load firmware!\n");
8697 return r;
8698 }
8699 }
8700 }
8701
8702 /* Initialize power management */
8703 radeon_pm_init(rdev);
8704
8705 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8706 ring->ring_obj = NULL;
8707 r600_ring_init(rdev, ring, 1024 * 1024);
8708
8709 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8710 ring->ring_obj = NULL;
8711 r600_ring_init(rdev, ring, 1024 * 1024);
8712 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8713 if (r)
8714 return r;
8715
8716 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8717 ring->ring_obj = NULL;
8718 r600_ring_init(rdev, ring, 1024 * 1024);
8719 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8720 if (r)
8721 return r;
8722
8723 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8724 ring->ring_obj = NULL;
8725 r600_ring_init(rdev, ring, 256 * 1024);
8726
8727 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8728 ring->ring_obj = NULL;
8729 r600_ring_init(rdev, ring, 256 * 1024);
8730
8731 cik_uvd_init(rdev);
8732 cik_vce_init(rdev);
8733
8734 rdev->ih.ring_obj = NULL;
8735 r600_ih_ring_init(rdev, 64 * 1024);
8736
8737 r = r600_pcie_gart_init(rdev);
8738 if (r)
8739 return r;
8740
8741 rdev->accel_working = true;
8742 r = cik_startup(rdev);
8743 if (r) {
8744 dev_err(rdev->dev, "disabling GPU acceleration\n");
8745 cik_cp_fini(rdev);
8746 cik_sdma_fini(rdev);
8747 cik_irq_fini(rdev);
8748 sumo_rlc_fini(rdev);
8749 cik_mec_fini(rdev);
8750 radeon_wb_fini(rdev);
8751 radeon_ib_pool_fini(rdev);
8752 radeon_vm_manager_fini(rdev);
8753 radeon_irq_kms_fini(rdev);
8754 cik_pcie_gart_fini(rdev);
8755 rdev->accel_working = false;
8756 }
8757
8758 /* Don't start up if the MC ucode is missing.
8759 * The default clocks and voltages before the MC ucode
8760 * is loaded are not suffient for advanced operations.
8761 */
8762 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8763 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8764 return -EINVAL;
8765 }
8766
8767 return 0;
8768 }
8769
8770 /**
8771 * cik_fini - asic specific driver and hw fini
8772 *
8773 * @rdev: radeon_device pointer
8774 *
8775 * Tear down the asic specific driver variables and program the hw
8776 * to an idle state (CIK).
8777 * Called at driver unload.
8778 */
cik_fini(struct radeon_device * rdev)8779 void cik_fini(struct radeon_device *rdev)
8780 {
8781 radeon_pm_fini(rdev);
8782 cik_cp_fini(rdev);
8783 cik_sdma_fini(rdev);
8784 cik_fini_pg(rdev);
8785 cik_fini_cg(rdev);
8786 cik_irq_fini(rdev);
8787 sumo_rlc_fini(rdev);
8788 cik_mec_fini(rdev);
8789 radeon_wb_fini(rdev);
8790 radeon_vm_manager_fini(rdev);
8791 radeon_ib_pool_fini(rdev);
8792 radeon_irq_kms_fini(rdev);
8793 uvd_v1_0_fini(rdev);
8794 radeon_uvd_fini(rdev);
8795 radeon_vce_fini(rdev);
8796 cik_pcie_gart_fini(rdev);
8797 r600_vram_scratch_fini(rdev);
8798 radeon_gem_fini(rdev);
8799 radeon_fence_driver_fini(rdev);
8800 radeon_bo_fini(rdev);
8801 radeon_atombios_fini(rdev);
8802 kfree(rdev->bios);
8803 rdev->bios = NULL;
8804 }
8805
dce8_program_fmt(struct drm_encoder * encoder)8806 void dce8_program_fmt(struct drm_encoder *encoder)
8807 {
8808 struct drm_device *dev = encoder->dev;
8809 struct radeon_device *rdev = dev->dev_private;
8810 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8811 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8812 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8813 int bpc = 0;
8814 u32 tmp = 0;
8815 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8816
8817 if (connector) {
8818 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8819 bpc = radeon_get_monitor_bpc(connector);
8820 dither = radeon_connector->dither;
8821 }
8822
8823 /* LVDS/eDP FMT is set up by atom */
8824 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8825 return;
8826
8827 /* not needed for analog */
8828 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8829 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8830 return;
8831
8832 if (bpc == 0)
8833 return;
8834
8835 switch (bpc) {
8836 case 6:
8837 if (dither == RADEON_FMT_DITHER_ENABLE)
8838 /* XXX sort out optimal dither settings */
8839 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8840 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8841 else
8842 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8843 break;
8844 case 8:
8845 if (dither == RADEON_FMT_DITHER_ENABLE)
8846 /* XXX sort out optimal dither settings */
8847 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8848 FMT_RGB_RANDOM_ENABLE |
8849 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8850 else
8851 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8852 break;
8853 case 10:
8854 if (dither == RADEON_FMT_DITHER_ENABLE)
8855 /* XXX sort out optimal dither settings */
8856 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8857 FMT_RGB_RANDOM_ENABLE |
8858 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8859 else
8860 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8861 break;
8862 default:
8863 /* not needed */
8864 break;
8865 }
8866
8867 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8868 }
8869
8870 /* display watermark setup */
8871 /**
8872 * dce8_line_buffer_adjust - Set up the line buffer
8873 *
8874 * @rdev: radeon_device pointer
8875 * @radeon_crtc: the selected display controller
8876 * @mode: the current display mode on the selected display
8877 * controller
8878 *
8879 * Setup up the line buffer allocation for
8880 * the selected display controller (CIK).
8881 * Returns the line buffer size in pixels.
8882 */
dce8_line_buffer_adjust(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,struct drm_display_mode * mode)8883 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8884 struct radeon_crtc *radeon_crtc,
8885 struct drm_display_mode *mode)
8886 {
8887 u32 tmp, buffer_alloc, i;
8888 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8889 /*
8890 * Line Buffer Setup
8891 * There are 6 line buffers, one for each display controllers.
8892 * There are 3 partitions per LB. Select the number of partitions
8893 * to enable based on the display width. For display widths larger
8894 * than 4096, you need use to use 2 display controllers and combine
8895 * them using the stereo blender.
8896 */
8897 if (radeon_crtc->base.enabled && mode) {
8898 if (mode->crtc_hdisplay < 1920) {
8899 tmp = 1;
8900 buffer_alloc = 2;
8901 } else if (mode->crtc_hdisplay < 2560) {
8902 tmp = 2;
8903 buffer_alloc = 2;
8904 } else if (mode->crtc_hdisplay < 4096) {
8905 tmp = 0;
8906 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8907 } else {
8908 DRM_DEBUG_KMS("Mode too big for LB!\n");
8909 tmp = 0;
8910 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8911 }
8912 } else {
8913 tmp = 1;
8914 buffer_alloc = 0;
8915 }
8916
8917 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8918 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8919
8920 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8921 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8922 for (i = 0; i < rdev->usec_timeout; i++) {
8923 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8924 DMIF_BUFFERS_ALLOCATED_COMPLETED)
8925 break;
8926 udelay(1);
8927 }
8928
8929 if (radeon_crtc->base.enabled && mode) {
8930 switch (tmp) {
8931 case 0:
8932 default:
8933 return 4096 * 2;
8934 case 1:
8935 return 1920 * 2;
8936 case 2:
8937 return 2560 * 2;
8938 }
8939 }
8940
8941 /* controller not enabled, so no lb used */
8942 return 0;
8943 }
8944
8945 /**
8946 * cik_get_number_of_dram_channels - get the number of dram channels
8947 *
8948 * @rdev: radeon_device pointer
8949 *
8950 * Look up the number of video ram channels (CIK).
8951 * Used for display watermark bandwidth calculations
8952 * Returns the number of dram channels
8953 */
cik_get_number_of_dram_channels(struct radeon_device * rdev)8954 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8955 {
8956 u32 tmp = RREG32(MC_SHARED_CHMAP);
8957
8958 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8959 case 0:
8960 default:
8961 return 1;
8962 case 1:
8963 return 2;
8964 case 2:
8965 return 4;
8966 case 3:
8967 return 8;
8968 case 4:
8969 return 3;
8970 case 5:
8971 return 6;
8972 case 6:
8973 return 10;
8974 case 7:
8975 return 12;
8976 case 8:
8977 return 16;
8978 }
8979 }
8980
8981 struct dce8_wm_params {
8982 u32 dram_channels; /* number of dram channels */
8983 u32 yclk; /* bandwidth per dram data pin in kHz */
8984 u32 sclk; /* engine clock in kHz */
8985 u32 disp_clk; /* display clock in kHz */
8986 u32 src_width; /* viewport width */
8987 u32 active_time; /* active display time in ns */
8988 u32 blank_time; /* blank time in ns */
8989 bool interlaced; /* mode is interlaced */
8990 fixed20_12 vsc; /* vertical scale ratio */
8991 u32 num_heads; /* number of active crtcs */
8992 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8993 u32 lb_size; /* line buffer allocated to pipe */
8994 u32 vtaps; /* vertical scaler taps */
8995 };
8996
8997 /**
8998 * dce8_dram_bandwidth - get the dram bandwidth
8999 *
9000 * @wm: watermark calculation data
9001 *
9002 * Calculate the raw dram bandwidth (CIK).
9003 * Used for display watermark bandwidth calculations
9004 * Returns the dram bandwidth in MBytes/s
9005 */
dce8_dram_bandwidth(struct dce8_wm_params * wm)9006 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9007 {
9008 /* Calculate raw DRAM Bandwidth */
9009 fixed20_12 dram_efficiency; /* 0.7 */
9010 fixed20_12 yclk, dram_channels, bandwidth;
9011 fixed20_12 a;
9012
9013 a.full = dfixed_const(1000);
9014 yclk.full = dfixed_const(wm->yclk);
9015 yclk.full = dfixed_div(yclk, a);
9016 dram_channels.full = dfixed_const(wm->dram_channels * 4);
9017 a.full = dfixed_const(10);
9018 dram_efficiency.full = dfixed_const(7);
9019 dram_efficiency.full = dfixed_div(dram_efficiency, a);
9020 bandwidth.full = dfixed_mul(dram_channels, yclk);
9021 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9022
9023 return dfixed_trunc(bandwidth);
9024 }
9025
9026 /**
9027 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9028 *
9029 * @wm: watermark calculation data
9030 *
9031 * Calculate the dram bandwidth used for display (CIK).
9032 * Used for display watermark bandwidth calculations
9033 * Returns the dram bandwidth for display in MBytes/s
9034 */
dce8_dram_bandwidth_for_display(struct dce8_wm_params * wm)9035 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9036 {
9037 /* Calculate DRAM Bandwidth and the part allocated to display. */
9038 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9039 fixed20_12 yclk, dram_channels, bandwidth;
9040 fixed20_12 a;
9041
9042 a.full = dfixed_const(1000);
9043 yclk.full = dfixed_const(wm->yclk);
9044 yclk.full = dfixed_div(yclk, a);
9045 dram_channels.full = dfixed_const(wm->dram_channels * 4);
9046 a.full = dfixed_const(10);
9047 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9048 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9049 bandwidth.full = dfixed_mul(dram_channels, yclk);
9050 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9051
9052 return dfixed_trunc(bandwidth);
9053 }
9054
9055 /**
9056 * dce8_data_return_bandwidth - get the data return bandwidth
9057 *
9058 * @wm: watermark calculation data
9059 *
9060 * Calculate the data return bandwidth used for display (CIK).
9061 * Used for display watermark bandwidth calculations
9062 * Returns the data return bandwidth in MBytes/s
9063 */
dce8_data_return_bandwidth(struct dce8_wm_params * wm)9064 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9065 {
9066 /* Calculate the display Data return Bandwidth */
9067 fixed20_12 return_efficiency; /* 0.8 */
9068 fixed20_12 sclk, bandwidth;
9069 fixed20_12 a;
9070
9071 a.full = dfixed_const(1000);
9072 sclk.full = dfixed_const(wm->sclk);
9073 sclk.full = dfixed_div(sclk, a);
9074 a.full = dfixed_const(10);
9075 return_efficiency.full = dfixed_const(8);
9076 return_efficiency.full = dfixed_div(return_efficiency, a);
9077 a.full = dfixed_const(32);
9078 bandwidth.full = dfixed_mul(a, sclk);
9079 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9080
9081 return dfixed_trunc(bandwidth);
9082 }
9083
9084 /**
9085 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9086 *
9087 * @wm: watermark calculation data
9088 *
9089 * Calculate the dmif bandwidth used for display (CIK).
9090 * Used for display watermark bandwidth calculations
9091 * Returns the dmif bandwidth in MBytes/s
9092 */
dce8_dmif_request_bandwidth(struct dce8_wm_params * wm)9093 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9094 {
9095 /* Calculate the DMIF Request Bandwidth */
9096 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9097 fixed20_12 disp_clk, bandwidth;
9098 fixed20_12 a, b;
9099
9100 a.full = dfixed_const(1000);
9101 disp_clk.full = dfixed_const(wm->disp_clk);
9102 disp_clk.full = dfixed_div(disp_clk, a);
9103 a.full = dfixed_const(32);
9104 b.full = dfixed_mul(a, disp_clk);
9105
9106 a.full = dfixed_const(10);
9107 disp_clk_request_efficiency.full = dfixed_const(8);
9108 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9109
9110 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9111
9112 return dfixed_trunc(bandwidth);
9113 }
9114
9115 /**
9116 * dce8_available_bandwidth - get the min available bandwidth
9117 *
9118 * @wm: watermark calculation data
9119 *
9120 * Calculate the min available bandwidth used for display (CIK).
9121 * Used for display watermark bandwidth calculations
9122 * Returns the min available bandwidth in MBytes/s
9123 */
dce8_available_bandwidth(struct dce8_wm_params * wm)9124 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9125 {
9126 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9127 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9128 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9129 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9130
9131 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9132 }
9133
9134 /**
9135 * dce8_average_bandwidth - get the average available bandwidth
9136 *
9137 * @wm: watermark calculation data
9138 *
9139 * Calculate the average available bandwidth used for display (CIK).
9140 * Used for display watermark bandwidth calculations
9141 * Returns the average available bandwidth in MBytes/s
9142 */
dce8_average_bandwidth(struct dce8_wm_params * wm)9143 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9144 {
9145 /* Calculate the display mode Average Bandwidth
9146 * DisplayMode should contain the source and destination dimensions,
9147 * timing, etc.
9148 */
9149 fixed20_12 bpp;
9150 fixed20_12 line_time;
9151 fixed20_12 src_width;
9152 fixed20_12 bandwidth;
9153 fixed20_12 a;
9154
9155 a.full = dfixed_const(1000);
9156 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9157 line_time.full = dfixed_div(line_time, a);
9158 bpp.full = dfixed_const(wm->bytes_per_pixel);
9159 src_width.full = dfixed_const(wm->src_width);
9160 bandwidth.full = dfixed_mul(src_width, bpp);
9161 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9162 bandwidth.full = dfixed_div(bandwidth, line_time);
9163
9164 return dfixed_trunc(bandwidth);
9165 }
9166
9167 /**
9168 * dce8_latency_watermark - get the latency watermark
9169 *
9170 * @wm: watermark calculation data
9171 *
9172 * Calculate the latency watermark (CIK).
9173 * Used for display watermark bandwidth calculations
9174 * Returns the latency watermark in ns
9175 */
dce8_latency_watermark(struct dce8_wm_params * wm)9176 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9177 {
9178 /* First calculate the latency in ns */
9179 u32 mc_latency = 2000; /* 2000 ns. */
9180 u32 available_bandwidth = dce8_available_bandwidth(wm);
9181 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9182 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9183 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9184 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9185 (wm->num_heads * cursor_line_pair_return_time);
9186 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9187 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9188 u32 tmp, dmif_size = 12288;
9189 fixed20_12 a, b, c;
9190
9191 if (wm->num_heads == 0)
9192 return 0;
9193
9194 a.full = dfixed_const(2);
9195 b.full = dfixed_const(1);
9196 if ((wm->vsc.full > a.full) ||
9197 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9198 (wm->vtaps >= 5) ||
9199 ((wm->vsc.full >= a.full) && wm->interlaced))
9200 max_src_lines_per_dst_line = 4;
9201 else
9202 max_src_lines_per_dst_line = 2;
9203
9204 a.full = dfixed_const(available_bandwidth);
9205 b.full = dfixed_const(wm->num_heads);
9206 a.full = dfixed_div(a, b);
9207 tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9208 tmp = min(dfixed_trunc(a), tmp);
9209
9210 lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9211
9212 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9213 b.full = dfixed_const(1000);
9214 c.full = dfixed_const(lb_fill_bw);
9215 b.full = dfixed_div(c, b);
9216 a.full = dfixed_div(a, b);
9217 line_fill_time = dfixed_trunc(a);
9218
9219 if (line_fill_time < wm->active_time)
9220 return latency;
9221 else
9222 return latency + (line_fill_time - wm->active_time);
9223
9224 }
9225
9226 /**
9227 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9228 * average and available dram bandwidth
9229 *
9230 * @wm: watermark calculation data
9231 *
9232 * Check if the display average bandwidth fits in the display
9233 * dram bandwidth (CIK).
9234 * Used for display watermark bandwidth calculations
9235 * Returns true if the display fits, false if not.
9236 */
dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params * wm)9237 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9238 {
9239 if (dce8_average_bandwidth(wm) <=
9240 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9241 return true;
9242 else
9243 return false;
9244 }
9245
9246 /**
9247 * dce8_average_bandwidth_vs_available_bandwidth - check
9248 * average and available bandwidth
9249 *
9250 * @wm: watermark calculation data
9251 *
9252 * Check if the display average bandwidth fits in the display
9253 * available bandwidth (CIK).
9254 * Used for display watermark bandwidth calculations
9255 * Returns true if the display fits, false if not.
9256 */
dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params * wm)9257 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9258 {
9259 if (dce8_average_bandwidth(wm) <=
9260 (dce8_available_bandwidth(wm) / wm->num_heads))
9261 return true;
9262 else
9263 return false;
9264 }
9265
9266 /**
9267 * dce8_check_latency_hiding - check latency hiding
9268 *
9269 * @wm: watermark calculation data
9270 *
9271 * Check latency hiding (CIK).
9272 * Used for display watermark bandwidth calculations
9273 * Returns true if the display fits, false if not.
9274 */
dce8_check_latency_hiding(struct dce8_wm_params * wm)9275 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9276 {
9277 u32 lb_partitions = wm->lb_size / wm->src_width;
9278 u32 line_time = wm->active_time + wm->blank_time;
9279 u32 latency_tolerant_lines;
9280 u32 latency_hiding;
9281 fixed20_12 a;
9282
9283 a.full = dfixed_const(1);
9284 if (wm->vsc.full > a.full)
9285 latency_tolerant_lines = 1;
9286 else {
9287 if (lb_partitions <= (wm->vtaps + 1))
9288 latency_tolerant_lines = 1;
9289 else
9290 latency_tolerant_lines = 2;
9291 }
9292
9293 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9294
9295 if (dce8_latency_watermark(wm) <= latency_hiding)
9296 return true;
9297 else
9298 return false;
9299 }
9300
9301 /**
9302 * dce8_program_watermarks - program display watermarks
9303 *
9304 * @rdev: radeon_device pointer
9305 * @radeon_crtc: the selected display controller
9306 * @lb_size: line buffer size
9307 * @num_heads: number of display controllers in use
9308 *
9309 * Calculate and program the display watermarks for the
9310 * selected display controller (CIK).
9311 */
dce8_program_watermarks(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,u32 lb_size,u32 num_heads)9312 static void dce8_program_watermarks(struct radeon_device *rdev,
9313 struct radeon_crtc *radeon_crtc,
9314 u32 lb_size, u32 num_heads)
9315 {
9316 struct drm_display_mode *mode = &radeon_crtc->base.mode;
9317 struct dce8_wm_params wm_low, wm_high;
9318 u32 active_time;
9319 u32 line_time = 0;
9320 u32 latency_watermark_a = 0, latency_watermark_b = 0;
9321 u32 tmp, wm_mask;
9322
9323 if (radeon_crtc->base.enabled && num_heads && mode) {
9324 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9325 (u32)mode->clock);
9326 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9327 (u32)mode->clock);
9328 line_time = min(line_time, (u32)65535);
9329
9330 /* watermark for high clocks */
9331 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9332 rdev->pm.dpm_enabled) {
9333 wm_high.yclk =
9334 radeon_dpm_get_mclk(rdev, false) * 10;
9335 wm_high.sclk =
9336 radeon_dpm_get_sclk(rdev, false) * 10;
9337 } else {
9338 wm_high.yclk = rdev->pm.current_mclk * 10;
9339 wm_high.sclk = rdev->pm.current_sclk * 10;
9340 }
9341
9342 wm_high.disp_clk = mode->clock;
9343 wm_high.src_width = mode->crtc_hdisplay;
9344 wm_high.active_time = active_time;
9345 wm_high.blank_time = line_time - wm_high.active_time;
9346 wm_high.interlaced = false;
9347 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9348 wm_high.interlaced = true;
9349 wm_high.vsc = radeon_crtc->vsc;
9350 wm_high.vtaps = 1;
9351 if (radeon_crtc->rmx_type != RMX_OFF)
9352 wm_high.vtaps = 2;
9353 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9354 wm_high.lb_size = lb_size;
9355 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9356 wm_high.num_heads = num_heads;
9357
9358 /* set for high clocks */
9359 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9360
9361 /* possibly force display priority to high */
9362 /* should really do this at mode validation time... */
9363 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9364 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9365 !dce8_check_latency_hiding(&wm_high) ||
9366 (rdev->disp_priority == 2)) {
9367 DRM_DEBUG_KMS("force priority to high\n");
9368 }
9369
9370 /* watermark for low clocks */
9371 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9372 rdev->pm.dpm_enabled) {
9373 wm_low.yclk =
9374 radeon_dpm_get_mclk(rdev, true) * 10;
9375 wm_low.sclk =
9376 radeon_dpm_get_sclk(rdev, true) * 10;
9377 } else {
9378 wm_low.yclk = rdev->pm.current_mclk * 10;
9379 wm_low.sclk = rdev->pm.current_sclk * 10;
9380 }
9381
9382 wm_low.disp_clk = mode->clock;
9383 wm_low.src_width = mode->crtc_hdisplay;
9384 wm_low.active_time = active_time;
9385 wm_low.blank_time = line_time - wm_low.active_time;
9386 wm_low.interlaced = false;
9387 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9388 wm_low.interlaced = true;
9389 wm_low.vsc = radeon_crtc->vsc;
9390 wm_low.vtaps = 1;
9391 if (radeon_crtc->rmx_type != RMX_OFF)
9392 wm_low.vtaps = 2;
9393 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9394 wm_low.lb_size = lb_size;
9395 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9396 wm_low.num_heads = num_heads;
9397
9398 /* set for low clocks */
9399 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9400
9401 /* possibly force display priority to high */
9402 /* should really do this at mode validation time... */
9403 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9404 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9405 !dce8_check_latency_hiding(&wm_low) ||
9406 (rdev->disp_priority == 2)) {
9407 DRM_DEBUG_KMS("force priority to high\n");
9408 }
9409
9410 /* Save number of lines the linebuffer leads before the scanout */
9411 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9412 }
9413
9414 /* select wm A */
9415 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9416 tmp = wm_mask;
9417 tmp &= ~LATENCY_WATERMARK_MASK(3);
9418 tmp |= LATENCY_WATERMARK_MASK(1);
9419 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9420 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9421 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9422 LATENCY_HIGH_WATERMARK(line_time)));
9423 /* select wm B */
9424 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9425 tmp &= ~LATENCY_WATERMARK_MASK(3);
9426 tmp |= LATENCY_WATERMARK_MASK(2);
9427 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9428 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9429 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9430 LATENCY_HIGH_WATERMARK(line_time)));
9431 /* restore original selection */
9432 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9433
9434 /* save values for DPM */
9435 radeon_crtc->line_time = line_time;
9436 radeon_crtc->wm_high = latency_watermark_a;
9437 radeon_crtc->wm_low = latency_watermark_b;
9438 }
9439
9440 /**
9441 * dce8_bandwidth_update - program display watermarks
9442 *
9443 * @rdev: radeon_device pointer
9444 *
9445 * Calculate and program the display watermarks and line
9446 * buffer allocation (CIK).
9447 */
dce8_bandwidth_update(struct radeon_device * rdev)9448 void dce8_bandwidth_update(struct radeon_device *rdev)
9449 {
9450 struct drm_display_mode *mode = NULL;
9451 u32 num_heads = 0, lb_size;
9452 int i;
9453
9454 if (!rdev->mode_info.mode_config_initialized)
9455 return;
9456
9457 radeon_update_display_priority(rdev);
9458
9459 for (i = 0; i < rdev->num_crtc; i++) {
9460 if (rdev->mode_info.crtcs[i]->base.enabled)
9461 num_heads++;
9462 }
9463 for (i = 0; i < rdev->num_crtc; i++) {
9464 mode = &rdev->mode_info.crtcs[i]->base.mode;
9465 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9466 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9467 }
9468 }
9469
9470 /**
9471 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9472 *
9473 * @rdev: radeon_device pointer
9474 *
9475 * Fetches a GPU clock counter snapshot (SI).
9476 * Returns the 64 bit clock counter snapshot.
9477 */
cik_get_gpu_clock_counter(struct radeon_device * rdev)9478 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9479 {
9480 uint64_t clock;
9481
9482 mutex_lock(&rdev->gpu_clock_mutex);
9483 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9484 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9485 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9486 mutex_unlock(&rdev->gpu_clock_mutex);
9487 return clock;
9488 }
9489
cik_set_uvd_clock(struct radeon_device * rdev,u32 clock,u32 cntl_reg,u32 status_reg)9490 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9491 u32 cntl_reg, u32 status_reg)
9492 {
9493 int r, i;
9494 struct atom_clock_dividers dividers;
9495 uint32_t tmp;
9496
9497 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9498 clock, false, ÷rs);
9499 if (r)
9500 return r;
9501
9502 tmp = RREG32_SMC(cntl_reg);
9503 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9504 tmp |= dividers.post_divider;
9505 WREG32_SMC(cntl_reg, tmp);
9506
9507 for (i = 0; i < 100; i++) {
9508 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9509 break;
9510 mdelay(10);
9511 }
9512 if (i == 100)
9513 return -ETIMEDOUT;
9514
9515 return 0;
9516 }
9517
cik_set_uvd_clocks(struct radeon_device * rdev,u32 vclk,u32 dclk)9518 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9519 {
9520 int r = 0;
9521
9522 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9523 if (r)
9524 return r;
9525
9526 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9527 return r;
9528 }
9529
cik_set_vce_clocks(struct radeon_device * rdev,u32 evclk,u32 ecclk)9530 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9531 {
9532 int r, i;
9533 struct atom_clock_dividers dividers;
9534 u32 tmp;
9535
9536 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9537 ecclk, false, ÷rs);
9538 if (r)
9539 return r;
9540
9541 for (i = 0; i < 100; i++) {
9542 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9543 break;
9544 mdelay(10);
9545 }
9546 if (i == 100)
9547 return -ETIMEDOUT;
9548
9549 tmp = RREG32_SMC(CG_ECLK_CNTL);
9550 tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9551 tmp |= dividers.post_divider;
9552 WREG32_SMC(CG_ECLK_CNTL, tmp);
9553
9554 for (i = 0; i < 100; i++) {
9555 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9556 break;
9557 mdelay(10);
9558 }
9559 if (i == 100)
9560 return -ETIMEDOUT;
9561
9562 return 0;
9563 }
9564
cik_pcie_gen3_enable(struct radeon_device * rdev)9565 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9566 {
9567 struct pci_dev *root = rdev->pdev->bus->self;
9568 enum pci_bus_speed speed_cap;
9569 u32 speed_cntl, current_data_rate;
9570 int i;
9571 u16 tmp16;
9572
9573 if (pci_is_root_bus(rdev->pdev->bus))
9574 return;
9575
9576 if (radeon_pcie_gen2 == 0)
9577 return;
9578
9579 if (rdev->flags & RADEON_IS_IGP)
9580 return;
9581
9582 if (!(rdev->flags & RADEON_IS_PCIE))
9583 return;
9584
9585 speed_cap = pcie_get_speed_cap(root);
9586 if (speed_cap == PCI_SPEED_UNKNOWN)
9587 return;
9588
9589 if ((speed_cap != PCIE_SPEED_8_0GT) &&
9590 (speed_cap != PCIE_SPEED_5_0GT))
9591 return;
9592
9593 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9594 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9595 LC_CURRENT_DATA_RATE_SHIFT;
9596 if (speed_cap == PCIE_SPEED_8_0GT) {
9597 if (current_data_rate == 2) {
9598 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9599 return;
9600 }
9601 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9602 } else if (speed_cap == PCIE_SPEED_5_0GT) {
9603 if (current_data_rate == 1) {
9604 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9605 return;
9606 }
9607 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9608 }
9609
9610 if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
9611 return;
9612
9613 if (speed_cap == PCIE_SPEED_8_0GT) {
9614 /* re-try equalization if gen3 is not already enabled */
9615 if (current_data_rate != 2) {
9616 u16 bridge_cfg, gpu_cfg;
9617 u16 bridge_cfg2, gpu_cfg2;
9618 u32 max_lw, current_lw, tmp;
9619
9620 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9621 &bridge_cfg);
9622 pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
9623 &gpu_cfg);
9624
9625 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9626 pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
9627
9628 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9629 pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
9630 tmp16);
9631
9632 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9633 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9634 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9635
9636 if (current_lw < max_lw) {
9637 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9638 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9639 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9640 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9641 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9642 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9643 }
9644 }
9645
9646 for (i = 0; i < 10; i++) {
9647 /* check status */
9648 pcie_capability_read_word(rdev->pdev,
9649 PCI_EXP_DEVSTA,
9650 &tmp16);
9651 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9652 break;
9653
9654 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9655 &bridge_cfg);
9656 pcie_capability_read_word(rdev->pdev,
9657 PCI_EXP_LNKCTL,
9658 &gpu_cfg);
9659
9660 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9661 &bridge_cfg2);
9662 pcie_capability_read_word(rdev->pdev,
9663 PCI_EXP_LNKCTL2,
9664 &gpu_cfg2);
9665
9666 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9667 tmp |= LC_SET_QUIESCE;
9668 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9669
9670 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9671 tmp |= LC_REDO_EQ;
9672 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9673
9674 msleep(100);
9675
9676 /* linkctl */
9677 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9678 &tmp16);
9679 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9680 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9681 pcie_capability_write_word(root, PCI_EXP_LNKCTL,
9682 tmp16);
9683
9684 pcie_capability_read_word(rdev->pdev,
9685 PCI_EXP_LNKCTL,
9686 &tmp16);
9687 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9688 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9689 pcie_capability_write_word(rdev->pdev,
9690 PCI_EXP_LNKCTL,
9691 tmp16);
9692
9693 /* linkctl2 */
9694 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9695 &tmp16);
9696 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9697 PCI_EXP_LNKCTL2_TX_MARGIN);
9698 tmp16 |= (bridge_cfg2 &
9699 (PCI_EXP_LNKCTL2_ENTER_COMP |
9700 PCI_EXP_LNKCTL2_TX_MARGIN));
9701 pcie_capability_write_word(root,
9702 PCI_EXP_LNKCTL2,
9703 tmp16);
9704
9705 pcie_capability_read_word(rdev->pdev,
9706 PCI_EXP_LNKCTL2,
9707 &tmp16);
9708 tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
9709 PCI_EXP_LNKCTL2_TX_MARGIN);
9710 tmp16 |= (gpu_cfg2 &
9711 (PCI_EXP_LNKCTL2_ENTER_COMP |
9712 PCI_EXP_LNKCTL2_TX_MARGIN));
9713 pcie_capability_write_word(rdev->pdev,
9714 PCI_EXP_LNKCTL2,
9715 tmp16);
9716
9717 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9718 tmp &= ~LC_SET_QUIESCE;
9719 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9720 }
9721 }
9722 }
9723
9724 /* set the link speed */
9725 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9726 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9727 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9728
9729 pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
9730 tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
9731 if (speed_cap == PCIE_SPEED_8_0GT)
9732 tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9733 else if (speed_cap == PCIE_SPEED_5_0GT)
9734 tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9735 else
9736 tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9737 pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
9738
9739 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9740 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9741 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9742
9743 for (i = 0; i < rdev->usec_timeout; i++) {
9744 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9745 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9746 break;
9747 udelay(1);
9748 }
9749 }
9750
cik_program_aspm(struct radeon_device * rdev)9751 static void cik_program_aspm(struct radeon_device *rdev)
9752 {
9753 u32 data, orig;
9754 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9755 bool disable_clkreq = false;
9756
9757 if (radeon_aspm == 0)
9758 return;
9759
9760 /* XXX double check IGPs */
9761 if (rdev->flags & RADEON_IS_IGP)
9762 return;
9763
9764 if (!(rdev->flags & RADEON_IS_PCIE))
9765 return;
9766
9767 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9768 data &= ~LC_XMIT_N_FTS_MASK;
9769 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9770 if (orig != data)
9771 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9772
9773 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9774 data |= LC_GO_TO_RECOVERY;
9775 if (orig != data)
9776 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9777
9778 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9779 data |= P_IGNORE_EDB_ERR;
9780 if (orig != data)
9781 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9782
9783 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9784 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9785 data |= LC_PMI_TO_L1_DIS;
9786 if (!disable_l0s)
9787 data |= LC_L0S_INACTIVITY(7);
9788
9789 if (!disable_l1) {
9790 data |= LC_L1_INACTIVITY(7);
9791 data &= ~LC_PMI_TO_L1_DIS;
9792 if (orig != data)
9793 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9794
9795 if (!disable_plloff_in_l1) {
9796 bool clk_req_support;
9797
9798 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9799 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9800 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9801 if (orig != data)
9802 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9803
9804 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9805 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9806 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9807 if (orig != data)
9808 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9809
9810 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9811 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9812 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9813 if (orig != data)
9814 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9815
9816 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9817 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9818 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9819 if (orig != data)
9820 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9821
9822 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9823 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9824 data |= LC_DYN_LANES_PWR_STATE(3);
9825 if (orig != data)
9826 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9827
9828 if (!disable_clkreq &&
9829 !pci_is_root_bus(rdev->pdev->bus)) {
9830 #ifndef __NetBSD__ /* XXX radeon pcie */
9831 struct pci_dev *root = rdev->pdev->bus->self;
9832 u32 lnkcap;
9833 #endif
9834
9835 clk_req_support = false;
9836 #ifndef __NetBSD__ /* XXX radeon pcie */
9837 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9838 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9839 clk_req_support = true;
9840 #endif
9841 } else {
9842 clk_req_support = false;
9843 }
9844
9845 if (clk_req_support) {
9846 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9847 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9848 if (orig != data)
9849 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9850
9851 orig = data = RREG32_SMC(THM_CLK_CNTL);
9852 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9853 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9854 if (orig != data)
9855 WREG32_SMC(THM_CLK_CNTL, data);
9856
9857 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9858 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9859 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9860 if (orig != data)
9861 WREG32_SMC(MISC_CLK_CTRL, data);
9862
9863 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9864 data &= ~BCLK_AS_XCLK;
9865 if (orig != data)
9866 WREG32_SMC(CG_CLKPIN_CNTL, data);
9867
9868 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9869 data &= ~FORCE_BIF_REFCLK_EN;
9870 if (orig != data)
9871 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9872
9873 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9874 data &= ~MPLL_CLKOUT_SEL_MASK;
9875 data |= MPLL_CLKOUT_SEL(4);
9876 if (orig != data)
9877 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9878 }
9879 }
9880 } else {
9881 if (orig != data)
9882 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9883 }
9884
9885 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9886 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9887 if (orig != data)
9888 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9889
9890 if (!disable_l0s) {
9891 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9892 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9893 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9894 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9895 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9896 data &= ~LC_L0S_INACTIVITY_MASK;
9897 if (orig != data)
9898 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9899 }
9900 }
9901 }
9902 }
9903