1 /*	$NetBSD: radeon_si.c,v 1.5 2022/10/17 03:05:32 mrg Exp $	*/
2 
3 /*
4  * Copyright 2011 Advanced Micro Devices, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Alex Deucher
25  */
26 
27 #include <sys/cdefs.h>
28 __KERNEL_RCSID(0, "$NetBSD: radeon_si.c,v 1.5 2022/10/17 03:05:32 mrg Exp $");
29 
30 #include <linux/firmware.h>
31 #include <linux/module.h>
32 #include <linux/pci.h>
33 #include <linux/slab.h>
34 
35 #include <drm/drm_vblank.h>
36 #include <drm/radeon_drm.h>
37 
38 #include "atom.h"
39 #include "clearstate_si.h"
40 #include "radeon.h"
41 #include "radeon_asic.h"
42 #include "radeon_audio.h"
43 #include "radeon_ucode.h"
44 #include "si_blit_shaders.h"
45 #include "sid.h"
46 
47 #include <linux/nbsd-namespace.h>
48 
49 
50 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
51 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
52 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
53 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
54 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
55 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
56 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
57 
58 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
59 MODULE_FIRMWARE("radeon/tahiti_me.bin");
60 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
61 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
62 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
63 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
64 
65 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
66 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
67 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
68 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
69 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
70 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
71 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
72 
73 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
74 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
75 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
76 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
77 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
78 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
79 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
80 
81 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
82 MODULE_FIRMWARE("radeon/VERDE_me.bin");
83 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
84 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
85 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
86 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
87 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
88 
89 MODULE_FIRMWARE("radeon/verde_pfp.bin");
90 MODULE_FIRMWARE("radeon/verde_me.bin");
91 MODULE_FIRMWARE("radeon/verde_ce.bin");
92 MODULE_FIRMWARE("radeon/verde_mc.bin");
93 MODULE_FIRMWARE("radeon/verde_rlc.bin");
94 MODULE_FIRMWARE("radeon/verde_smc.bin");
95 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
96 
97 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
98 MODULE_FIRMWARE("radeon/OLAND_me.bin");
99 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
100 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
101 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
102 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
103 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
104 
105 MODULE_FIRMWARE("radeon/oland_pfp.bin");
106 MODULE_FIRMWARE("radeon/oland_me.bin");
107 MODULE_FIRMWARE("radeon/oland_ce.bin");
108 MODULE_FIRMWARE("radeon/oland_mc.bin");
109 MODULE_FIRMWARE("radeon/oland_rlc.bin");
110 MODULE_FIRMWARE("radeon/oland_smc.bin");
111 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
112 
113 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
114 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
115 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
116 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
117 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
118 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
119 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
120 
121 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
122 MODULE_FIRMWARE("radeon/hainan_me.bin");
123 MODULE_FIRMWARE("radeon/hainan_ce.bin");
124 MODULE_FIRMWARE("radeon/hainan_mc.bin");
125 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
126 MODULE_FIRMWARE("radeon/hainan_smc.bin");
127 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
128 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
129 
130 MODULE_FIRMWARE("radeon/si58_mc.bin");
131 
132 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
133 static void si_pcie_gen3_enable(struct radeon_device *rdev);
134 static void si_program_aspm(struct radeon_device *rdev);
135 extern void sumo_rlc_fini(struct radeon_device *rdev);
136 extern int sumo_rlc_init(struct radeon_device *rdev);
137 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
138 extern void r600_ih_ring_fini(struct radeon_device *rdev);
139 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
140 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
141 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
142 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
143 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
144 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
145 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
146 					 bool enable);
147 static void si_init_pg(struct radeon_device *rdev);
148 static void si_init_cg(struct radeon_device *rdev);
149 static void si_fini_pg(struct radeon_device *rdev);
150 static void si_fini_cg(struct radeon_device *rdev);
151 static void si_rlc_stop(struct radeon_device *rdev);
152 
153 static const u32 crtc_offsets[] =
154 {
155 	EVERGREEN_CRTC0_REGISTER_OFFSET,
156 	EVERGREEN_CRTC1_REGISTER_OFFSET,
157 	EVERGREEN_CRTC2_REGISTER_OFFSET,
158 	EVERGREEN_CRTC3_REGISTER_OFFSET,
159 	EVERGREEN_CRTC4_REGISTER_OFFSET,
160 	EVERGREEN_CRTC5_REGISTER_OFFSET
161 };
162 
163 static const u32 si_disp_int_status[] =
164 {
165 	DISP_INTERRUPT_STATUS,
166 	DISP_INTERRUPT_STATUS_CONTINUE,
167 	DISP_INTERRUPT_STATUS_CONTINUE2,
168 	DISP_INTERRUPT_STATUS_CONTINUE3,
169 	DISP_INTERRUPT_STATUS_CONTINUE4,
170 	DISP_INTERRUPT_STATUS_CONTINUE5
171 };
172 
173 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
174 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
175 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
176 
177 static const u32 verde_rlc_save_restore_register_list[] =
178 {
179 	(0x8000 << 16) | (0x98f4 >> 2),
180 	0x00000000,
181 	(0x8040 << 16) | (0x98f4 >> 2),
182 	0x00000000,
183 	(0x8000 << 16) | (0xe80 >> 2),
184 	0x00000000,
185 	(0x8040 << 16) | (0xe80 >> 2),
186 	0x00000000,
187 	(0x8000 << 16) | (0x89bc >> 2),
188 	0x00000000,
189 	(0x8040 << 16) | (0x89bc >> 2),
190 	0x00000000,
191 	(0x8000 << 16) | (0x8c1c >> 2),
192 	0x00000000,
193 	(0x8040 << 16) | (0x8c1c >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x98f0 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0xe7c >> 2),
198 	0x00000000,
199 	(0x8000 << 16) | (0x9148 >> 2),
200 	0x00000000,
201 	(0x8040 << 16) | (0x9148 >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9150 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x897c >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x8d8c >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0xac54 >> 2),
210 	0X00000000,
211 	0x3,
212 	(0x9c00 << 16) | (0x98f8 >> 2),
213 	0x00000000,
214 	(0x9c00 << 16) | (0x9910 >> 2),
215 	0x00000000,
216 	(0x9c00 << 16) | (0x9914 >> 2),
217 	0x00000000,
218 	(0x9c00 << 16) | (0x9918 >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x991c >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x9920 >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x9924 >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x9928 >> 2),
227 	0x00000000,
228 	(0x9c00 << 16) | (0x992c >> 2),
229 	0x00000000,
230 	(0x9c00 << 16) | (0x9930 >> 2),
231 	0x00000000,
232 	(0x9c00 << 16) | (0x9934 >> 2),
233 	0x00000000,
234 	(0x9c00 << 16) | (0x9938 >> 2),
235 	0x00000000,
236 	(0x9c00 << 16) | (0x993c >> 2),
237 	0x00000000,
238 	(0x9c00 << 16) | (0x9940 >> 2),
239 	0x00000000,
240 	(0x9c00 << 16) | (0x9944 >> 2),
241 	0x00000000,
242 	(0x9c00 << 16) | (0x9948 >> 2),
243 	0x00000000,
244 	(0x9c00 << 16) | (0x994c >> 2),
245 	0x00000000,
246 	(0x9c00 << 16) | (0x9950 >> 2),
247 	0x00000000,
248 	(0x9c00 << 16) | (0x9954 >> 2),
249 	0x00000000,
250 	(0x9c00 << 16) | (0x9958 >> 2),
251 	0x00000000,
252 	(0x9c00 << 16) | (0x995c >> 2),
253 	0x00000000,
254 	(0x9c00 << 16) | (0x9960 >> 2),
255 	0x00000000,
256 	(0x9c00 << 16) | (0x9964 >> 2),
257 	0x00000000,
258 	(0x9c00 << 16) | (0x9968 >> 2),
259 	0x00000000,
260 	(0x9c00 << 16) | (0x996c >> 2),
261 	0x00000000,
262 	(0x9c00 << 16) | (0x9970 >> 2),
263 	0x00000000,
264 	(0x9c00 << 16) | (0x9974 >> 2),
265 	0x00000000,
266 	(0x9c00 << 16) | (0x9978 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0x997c >> 2),
269 	0x00000000,
270 	(0x9c00 << 16) | (0x9980 >> 2),
271 	0x00000000,
272 	(0x9c00 << 16) | (0x9984 >> 2),
273 	0x00000000,
274 	(0x9c00 << 16) | (0x9988 >> 2),
275 	0x00000000,
276 	(0x9c00 << 16) | (0x998c >> 2),
277 	0x00000000,
278 	(0x9c00 << 16) | (0x8c00 >> 2),
279 	0x00000000,
280 	(0x9c00 << 16) | (0x8c14 >> 2),
281 	0x00000000,
282 	(0x9c00 << 16) | (0x8c04 >> 2),
283 	0x00000000,
284 	(0x9c00 << 16) | (0x8c08 >> 2),
285 	0x00000000,
286 	(0x8000 << 16) | (0x9b7c >> 2),
287 	0x00000000,
288 	(0x8040 << 16) | (0x9b7c >> 2),
289 	0x00000000,
290 	(0x8000 << 16) | (0xe84 >> 2),
291 	0x00000000,
292 	(0x8040 << 16) | (0xe84 >> 2),
293 	0x00000000,
294 	(0x8000 << 16) | (0x89c0 >> 2),
295 	0x00000000,
296 	(0x8040 << 16) | (0x89c0 >> 2),
297 	0x00000000,
298 	(0x8000 << 16) | (0x914c >> 2),
299 	0x00000000,
300 	(0x8040 << 16) | (0x914c >> 2),
301 	0x00000000,
302 	(0x8000 << 16) | (0x8c20 >> 2),
303 	0x00000000,
304 	(0x8040 << 16) | (0x8c20 >> 2),
305 	0x00000000,
306 	(0x8000 << 16) | (0x9354 >> 2),
307 	0x00000000,
308 	(0x8040 << 16) | (0x9354 >> 2),
309 	0x00000000,
310 	(0x9c00 << 16) | (0x9060 >> 2),
311 	0x00000000,
312 	(0x9c00 << 16) | (0x9364 >> 2),
313 	0x00000000,
314 	(0x9c00 << 16) | (0x9100 >> 2),
315 	0x00000000,
316 	(0x9c00 << 16) | (0x913c >> 2),
317 	0x00000000,
318 	(0x8000 << 16) | (0x90e0 >> 2),
319 	0x00000000,
320 	(0x8000 << 16) | (0x90e4 >> 2),
321 	0x00000000,
322 	(0x8000 << 16) | (0x90e8 >> 2),
323 	0x00000000,
324 	(0x8040 << 16) | (0x90e0 >> 2),
325 	0x00000000,
326 	(0x8040 << 16) | (0x90e4 >> 2),
327 	0x00000000,
328 	(0x8040 << 16) | (0x90e8 >> 2),
329 	0x00000000,
330 	(0x9c00 << 16) | (0x8bcc >> 2),
331 	0x00000000,
332 	(0x9c00 << 16) | (0x8b24 >> 2),
333 	0x00000000,
334 	(0x9c00 << 16) | (0x88c4 >> 2),
335 	0x00000000,
336 	(0x9c00 << 16) | (0x8e50 >> 2),
337 	0x00000000,
338 	(0x9c00 << 16) | (0x8c0c >> 2),
339 	0x00000000,
340 	(0x9c00 << 16) | (0x8e58 >> 2),
341 	0x00000000,
342 	(0x9c00 << 16) | (0x8e5c >> 2),
343 	0x00000000,
344 	(0x9c00 << 16) | (0x9508 >> 2),
345 	0x00000000,
346 	(0x9c00 << 16) | (0x950c >> 2),
347 	0x00000000,
348 	(0x9c00 << 16) | (0x9494 >> 2),
349 	0x00000000,
350 	(0x9c00 << 16) | (0xac0c >> 2),
351 	0x00000000,
352 	(0x9c00 << 16) | (0xac10 >> 2),
353 	0x00000000,
354 	(0x9c00 << 16) | (0xac14 >> 2),
355 	0x00000000,
356 	(0x9c00 << 16) | (0xae00 >> 2),
357 	0x00000000,
358 	(0x9c00 << 16) | (0xac08 >> 2),
359 	0x00000000,
360 	(0x9c00 << 16) | (0x88d4 >> 2),
361 	0x00000000,
362 	(0x9c00 << 16) | (0x88c8 >> 2),
363 	0x00000000,
364 	(0x9c00 << 16) | (0x88cc >> 2),
365 	0x00000000,
366 	(0x9c00 << 16) | (0x89b0 >> 2),
367 	0x00000000,
368 	(0x9c00 << 16) | (0x8b10 >> 2),
369 	0x00000000,
370 	(0x9c00 << 16) | (0x8a14 >> 2),
371 	0x00000000,
372 	(0x9c00 << 16) | (0x9830 >> 2),
373 	0x00000000,
374 	(0x9c00 << 16) | (0x9834 >> 2),
375 	0x00000000,
376 	(0x9c00 << 16) | (0x9838 >> 2),
377 	0x00000000,
378 	(0x9c00 << 16) | (0x9a10 >> 2),
379 	0x00000000,
380 	(0x8000 << 16) | (0x9870 >> 2),
381 	0x00000000,
382 	(0x8000 << 16) | (0x9874 >> 2),
383 	0x00000000,
384 	(0x8001 << 16) | (0x9870 >> 2),
385 	0x00000000,
386 	(0x8001 << 16) | (0x9874 >> 2),
387 	0x00000000,
388 	(0x8040 << 16) | (0x9870 >> 2),
389 	0x00000000,
390 	(0x8040 << 16) | (0x9874 >> 2),
391 	0x00000000,
392 	(0x8041 << 16) | (0x9870 >> 2),
393 	0x00000000,
394 	(0x8041 << 16) | (0x9874 >> 2),
395 	0x00000000,
396 	0x00000000
397 };
398 
399 static const u32 tahiti_golden_rlc_registers[] =
400 {
401 	0xc424, 0xffffffff, 0x00601005,
402 	0xc47c, 0xffffffff, 0x10104040,
403 	0xc488, 0xffffffff, 0x0100000a,
404 	0xc314, 0xffffffff, 0x00000800,
405 	0xc30c, 0xffffffff, 0x800000f4,
406 	0xf4a8, 0xffffffff, 0x00000000
407 };
408 
409 static const u32 tahiti_golden_registers[] =
410 {
411 	0x9a10, 0x00010000, 0x00018208,
412 	0x9830, 0xffffffff, 0x00000000,
413 	0x9834, 0xf00fffff, 0x00000400,
414 	0x9838, 0x0002021c, 0x00020200,
415 	0xc78, 0x00000080, 0x00000000,
416 	0xd030, 0x000300c0, 0x00800040,
417 	0xd830, 0x000300c0, 0x00800040,
418 	0x5bb0, 0x000000f0, 0x00000070,
419 	0x5bc0, 0x00200000, 0x50100000,
420 	0x7030, 0x31000311, 0x00000011,
421 	0x277c, 0x00000003, 0x000007ff,
422 	0x240c, 0x000007ff, 0x00000000,
423 	0x8a14, 0xf000001f, 0x00000007,
424 	0x8b24, 0xffffffff, 0x00ffffff,
425 	0x8b10, 0x0000ff0f, 0x00000000,
426 	0x28a4c, 0x07ffffff, 0x4e000000,
427 	0x28350, 0x3f3f3fff, 0x2a00126a,
428 	0x30, 0x000000ff, 0x0040,
429 	0x34, 0x00000040, 0x00004040,
430 	0x9100, 0x07ffffff, 0x03000000,
431 	0x8e88, 0x01ff1f3f, 0x00000000,
432 	0x8e84, 0x01ff1f3f, 0x00000000,
433 	0x9060, 0x0000007f, 0x00000020,
434 	0x9508, 0x00010000, 0x00010000,
435 	0xac14, 0x00000200, 0x000002fb,
436 	0xac10, 0xffffffff, 0x0000543b,
437 	0xac0c, 0xffffffff, 0xa9210876,
438 	0x88d0, 0xffffffff, 0x000fff40,
439 	0x88d4, 0x0000001f, 0x00000010,
440 	0x1410, 0x20000000, 0x20fffed8,
441 	0x15c0, 0x000c0fc0, 0x000c0400
442 };
443 
444 static const u32 tahiti_golden_registers2[] =
445 {
446 	0xc64, 0x00000001, 0x00000001
447 };
448 
449 static const u32 pitcairn_golden_rlc_registers[] =
450 {
451 	0xc424, 0xffffffff, 0x00601004,
452 	0xc47c, 0xffffffff, 0x10102020,
453 	0xc488, 0xffffffff, 0x01000020,
454 	0xc314, 0xffffffff, 0x00000800,
455 	0xc30c, 0xffffffff, 0x800000a4
456 };
457 
458 static const u32 pitcairn_golden_registers[] =
459 {
460 	0x9a10, 0x00010000, 0x00018208,
461 	0x9830, 0xffffffff, 0x00000000,
462 	0x9834, 0xf00fffff, 0x00000400,
463 	0x9838, 0x0002021c, 0x00020200,
464 	0xc78, 0x00000080, 0x00000000,
465 	0xd030, 0x000300c0, 0x00800040,
466 	0xd830, 0x000300c0, 0x00800040,
467 	0x5bb0, 0x000000f0, 0x00000070,
468 	0x5bc0, 0x00200000, 0x50100000,
469 	0x7030, 0x31000311, 0x00000011,
470 	0x2ae4, 0x00073ffe, 0x000022a2,
471 	0x240c, 0x000007ff, 0x00000000,
472 	0x8a14, 0xf000001f, 0x00000007,
473 	0x8b24, 0xffffffff, 0x00ffffff,
474 	0x8b10, 0x0000ff0f, 0x00000000,
475 	0x28a4c, 0x07ffffff, 0x4e000000,
476 	0x28350, 0x3f3f3fff, 0x2a00126a,
477 	0x30, 0x000000ff, 0x0040,
478 	0x34, 0x00000040, 0x00004040,
479 	0x9100, 0x07ffffff, 0x03000000,
480 	0x9060, 0x0000007f, 0x00000020,
481 	0x9508, 0x00010000, 0x00010000,
482 	0xac14, 0x000003ff, 0x000000f7,
483 	0xac10, 0xffffffff, 0x00000000,
484 	0xac0c, 0xffffffff, 0x32761054,
485 	0x88d4, 0x0000001f, 0x00000010,
486 	0x15c0, 0x000c0fc0, 0x000c0400
487 };
488 
489 static const u32 verde_golden_rlc_registers[] =
490 {
491 	0xc424, 0xffffffff, 0x033f1005,
492 	0xc47c, 0xffffffff, 0x10808020,
493 	0xc488, 0xffffffff, 0x00800008,
494 	0xc314, 0xffffffff, 0x00001000,
495 	0xc30c, 0xffffffff, 0x80010014
496 };
497 
498 static const u32 verde_golden_registers[] =
499 {
500 	0x9a10, 0x00010000, 0x00018208,
501 	0x9830, 0xffffffff, 0x00000000,
502 	0x9834, 0xf00fffff, 0x00000400,
503 	0x9838, 0x0002021c, 0x00020200,
504 	0xc78, 0x00000080, 0x00000000,
505 	0xd030, 0x000300c0, 0x00800040,
506 	0xd030, 0x000300c0, 0x00800040,
507 	0xd830, 0x000300c0, 0x00800040,
508 	0xd830, 0x000300c0, 0x00800040,
509 	0x5bb0, 0x000000f0, 0x00000070,
510 	0x5bc0, 0x00200000, 0x50100000,
511 	0x7030, 0x31000311, 0x00000011,
512 	0x2ae4, 0x00073ffe, 0x000022a2,
513 	0x2ae4, 0x00073ffe, 0x000022a2,
514 	0x2ae4, 0x00073ffe, 0x000022a2,
515 	0x240c, 0x000007ff, 0x00000000,
516 	0x240c, 0x000007ff, 0x00000000,
517 	0x240c, 0x000007ff, 0x00000000,
518 	0x8a14, 0xf000001f, 0x00000007,
519 	0x8a14, 0xf000001f, 0x00000007,
520 	0x8a14, 0xf000001f, 0x00000007,
521 	0x8b24, 0xffffffff, 0x00ffffff,
522 	0x8b10, 0x0000ff0f, 0x00000000,
523 	0x28a4c, 0x07ffffff, 0x4e000000,
524 	0x28350, 0x3f3f3fff, 0x0000124a,
525 	0x28350, 0x3f3f3fff, 0x0000124a,
526 	0x28350, 0x3f3f3fff, 0x0000124a,
527 	0x30, 0x000000ff, 0x0040,
528 	0x34, 0x00000040, 0x00004040,
529 	0x9100, 0x07ffffff, 0x03000000,
530 	0x9100, 0x07ffffff, 0x03000000,
531 	0x8e88, 0x01ff1f3f, 0x00000000,
532 	0x8e88, 0x01ff1f3f, 0x00000000,
533 	0x8e88, 0x01ff1f3f, 0x00000000,
534 	0x8e84, 0x01ff1f3f, 0x00000000,
535 	0x8e84, 0x01ff1f3f, 0x00000000,
536 	0x8e84, 0x01ff1f3f, 0x00000000,
537 	0x9060, 0x0000007f, 0x00000020,
538 	0x9508, 0x00010000, 0x00010000,
539 	0xac14, 0x000003ff, 0x00000003,
540 	0xac14, 0x000003ff, 0x00000003,
541 	0xac14, 0x000003ff, 0x00000003,
542 	0xac10, 0xffffffff, 0x00000000,
543 	0xac10, 0xffffffff, 0x00000000,
544 	0xac10, 0xffffffff, 0x00000000,
545 	0xac0c, 0xffffffff, 0x00001032,
546 	0xac0c, 0xffffffff, 0x00001032,
547 	0xac0c, 0xffffffff, 0x00001032,
548 	0x88d4, 0x0000001f, 0x00000010,
549 	0x88d4, 0x0000001f, 0x00000010,
550 	0x88d4, 0x0000001f, 0x00000010,
551 	0x15c0, 0x000c0fc0, 0x000c0400
552 };
553 
554 static const u32 oland_golden_rlc_registers[] =
555 {
556 	0xc424, 0xffffffff, 0x00601005,
557 	0xc47c, 0xffffffff, 0x10104040,
558 	0xc488, 0xffffffff, 0x0100000a,
559 	0xc314, 0xffffffff, 0x00000800,
560 	0xc30c, 0xffffffff, 0x800000f4
561 };
562 
563 static const u32 oland_golden_registers[] =
564 {
565 	0x9a10, 0x00010000, 0x00018208,
566 	0x9830, 0xffffffff, 0x00000000,
567 	0x9834, 0xf00fffff, 0x00000400,
568 	0x9838, 0x0002021c, 0x00020200,
569 	0xc78, 0x00000080, 0x00000000,
570 	0xd030, 0x000300c0, 0x00800040,
571 	0xd830, 0x000300c0, 0x00800040,
572 	0x5bb0, 0x000000f0, 0x00000070,
573 	0x5bc0, 0x00200000, 0x50100000,
574 	0x7030, 0x31000311, 0x00000011,
575 	0x2ae4, 0x00073ffe, 0x000022a2,
576 	0x240c, 0x000007ff, 0x00000000,
577 	0x8a14, 0xf000001f, 0x00000007,
578 	0x8b24, 0xffffffff, 0x00ffffff,
579 	0x8b10, 0x0000ff0f, 0x00000000,
580 	0x28a4c, 0x07ffffff, 0x4e000000,
581 	0x28350, 0x3f3f3fff, 0x00000082,
582 	0x30, 0x000000ff, 0x0040,
583 	0x34, 0x00000040, 0x00004040,
584 	0x9100, 0x07ffffff, 0x03000000,
585 	0x9060, 0x0000007f, 0x00000020,
586 	0x9508, 0x00010000, 0x00010000,
587 	0xac14, 0x000003ff, 0x000000f3,
588 	0xac10, 0xffffffff, 0x00000000,
589 	0xac0c, 0xffffffff, 0x00003210,
590 	0x88d4, 0x0000001f, 0x00000010,
591 	0x15c0, 0x000c0fc0, 0x000c0400
592 };
593 
594 static const u32 hainan_golden_registers[] =
595 {
596 	0x9a10, 0x00010000, 0x00018208,
597 	0x9830, 0xffffffff, 0x00000000,
598 	0x9834, 0xf00fffff, 0x00000400,
599 	0x9838, 0x0002021c, 0x00020200,
600 	0xd0c0, 0xff000fff, 0x00000100,
601 	0xd030, 0x000300c0, 0x00800040,
602 	0xd8c0, 0xff000fff, 0x00000100,
603 	0xd830, 0x000300c0, 0x00800040,
604 	0x2ae4, 0x00073ffe, 0x000022a2,
605 	0x240c, 0x000007ff, 0x00000000,
606 	0x8a14, 0xf000001f, 0x00000007,
607 	0x8b24, 0xffffffff, 0x00ffffff,
608 	0x8b10, 0x0000ff0f, 0x00000000,
609 	0x28a4c, 0x07ffffff, 0x4e000000,
610 	0x28350, 0x3f3f3fff, 0x00000000,
611 	0x30, 0x000000ff, 0x0040,
612 	0x34, 0x00000040, 0x00004040,
613 	0x9100, 0x03e00000, 0x03600000,
614 	0x9060, 0x0000007f, 0x00000020,
615 	0x9508, 0x00010000, 0x00010000,
616 	0xac14, 0x000003ff, 0x000000f1,
617 	0xac10, 0xffffffff, 0x00000000,
618 	0xac0c, 0xffffffff, 0x00003210,
619 	0x88d4, 0x0000001f, 0x00000010,
620 	0x15c0, 0x000c0fc0, 0x000c0400
621 };
622 
623 static const u32 hainan_golden_registers2[] =
624 {
625 	0x98f8, 0xffffffff, 0x02010001
626 };
627 
628 static const u32 tahiti_mgcg_cgcg_init[] =
629 {
630 	0xc400, 0xffffffff, 0xfffffffc,
631 	0x802c, 0xffffffff, 0xe0000000,
632 	0x9a60, 0xffffffff, 0x00000100,
633 	0x92a4, 0xffffffff, 0x00000100,
634 	0xc164, 0xffffffff, 0x00000100,
635 	0x9774, 0xffffffff, 0x00000100,
636 	0x8984, 0xffffffff, 0x06000100,
637 	0x8a18, 0xffffffff, 0x00000100,
638 	0x92a0, 0xffffffff, 0x00000100,
639 	0xc380, 0xffffffff, 0x00000100,
640 	0x8b28, 0xffffffff, 0x00000100,
641 	0x9144, 0xffffffff, 0x00000100,
642 	0x8d88, 0xffffffff, 0x00000100,
643 	0x8d8c, 0xffffffff, 0x00000100,
644 	0x9030, 0xffffffff, 0x00000100,
645 	0x9034, 0xffffffff, 0x00000100,
646 	0x9038, 0xffffffff, 0x00000100,
647 	0x903c, 0xffffffff, 0x00000100,
648 	0xad80, 0xffffffff, 0x00000100,
649 	0xac54, 0xffffffff, 0x00000100,
650 	0x897c, 0xffffffff, 0x06000100,
651 	0x9868, 0xffffffff, 0x00000100,
652 	0x9510, 0xffffffff, 0x00000100,
653 	0xaf04, 0xffffffff, 0x00000100,
654 	0xae04, 0xffffffff, 0x00000100,
655 	0x949c, 0xffffffff, 0x00000100,
656 	0x802c, 0xffffffff, 0xe0000000,
657 	0x9160, 0xffffffff, 0x00010000,
658 	0x9164, 0xffffffff, 0x00030002,
659 	0x9168, 0xffffffff, 0x00040007,
660 	0x916c, 0xffffffff, 0x00060005,
661 	0x9170, 0xffffffff, 0x00090008,
662 	0x9174, 0xffffffff, 0x00020001,
663 	0x9178, 0xffffffff, 0x00040003,
664 	0x917c, 0xffffffff, 0x00000007,
665 	0x9180, 0xffffffff, 0x00060005,
666 	0x9184, 0xffffffff, 0x00090008,
667 	0x9188, 0xffffffff, 0x00030002,
668 	0x918c, 0xffffffff, 0x00050004,
669 	0x9190, 0xffffffff, 0x00000008,
670 	0x9194, 0xffffffff, 0x00070006,
671 	0x9198, 0xffffffff, 0x000a0009,
672 	0x919c, 0xffffffff, 0x00040003,
673 	0x91a0, 0xffffffff, 0x00060005,
674 	0x91a4, 0xffffffff, 0x00000009,
675 	0x91a8, 0xffffffff, 0x00080007,
676 	0x91ac, 0xffffffff, 0x000b000a,
677 	0x91b0, 0xffffffff, 0x00050004,
678 	0x91b4, 0xffffffff, 0x00070006,
679 	0x91b8, 0xffffffff, 0x0008000b,
680 	0x91bc, 0xffffffff, 0x000a0009,
681 	0x91c0, 0xffffffff, 0x000d000c,
682 	0x91c4, 0xffffffff, 0x00060005,
683 	0x91c8, 0xffffffff, 0x00080007,
684 	0x91cc, 0xffffffff, 0x0000000b,
685 	0x91d0, 0xffffffff, 0x000a0009,
686 	0x91d4, 0xffffffff, 0x000d000c,
687 	0x91d8, 0xffffffff, 0x00070006,
688 	0x91dc, 0xffffffff, 0x00090008,
689 	0x91e0, 0xffffffff, 0x0000000c,
690 	0x91e4, 0xffffffff, 0x000b000a,
691 	0x91e8, 0xffffffff, 0x000e000d,
692 	0x91ec, 0xffffffff, 0x00080007,
693 	0x91f0, 0xffffffff, 0x000a0009,
694 	0x91f4, 0xffffffff, 0x0000000d,
695 	0x91f8, 0xffffffff, 0x000c000b,
696 	0x91fc, 0xffffffff, 0x000f000e,
697 	0x9200, 0xffffffff, 0x00090008,
698 	0x9204, 0xffffffff, 0x000b000a,
699 	0x9208, 0xffffffff, 0x000c000f,
700 	0x920c, 0xffffffff, 0x000e000d,
701 	0x9210, 0xffffffff, 0x00110010,
702 	0x9214, 0xffffffff, 0x000a0009,
703 	0x9218, 0xffffffff, 0x000c000b,
704 	0x921c, 0xffffffff, 0x0000000f,
705 	0x9220, 0xffffffff, 0x000e000d,
706 	0x9224, 0xffffffff, 0x00110010,
707 	0x9228, 0xffffffff, 0x000b000a,
708 	0x922c, 0xffffffff, 0x000d000c,
709 	0x9230, 0xffffffff, 0x00000010,
710 	0x9234, 0xffffffff, 0x000f000e,
711 	0x9238, 0xffffffff, 0x00120011,
712 	0x923c, 0xffffffff, 0x000c000b,
713 	0x9240, 0xffffffff, 0x000e000d,
714 	0x9244, 0xffffffff, 0x00000011,
715 	0x9248, 0xffffffff, 0x0010000f,
716 	0x924c, 0xffffffff, 0x00130012,
717 	0x9250, 0xffffffff, 0x000d000c,
718 	0x9254, 0xffffffff, 0x000f000e,
719 	0x9258, 0xffffffff, 0x00100013,
720 	0x925c, 0xffffffff, 0x00120011,
721 	0x9260, 0xffffffff, 0x00150014,
722 	0x9264, 0xffffffff, 0x000e000d,
723 	0x9268, 0xffffffff, 0x0010000f,
724 	0x926c, 0xffffffff, 0x00000013,
725 	0x9270, 0xffffffff, 0x00120011,
726 	0x9274, 0xffffffff, 0x00150014,
727 	0x9278, 0xffffffff, 0x000f000e,
728 	0x927c, 0xffffffff, 0x00110010,
729 	0x9280, 0xffffffff, 0x00000014,
730 	0x9284, 0xffffffff, 0x00130012,
731 	0x9288, 0xffffffff, 0x00160015,
732 	0x928c, 0xffffffff, 0x0010000f,
733 	0x9290, 0xffffffff, 0x00120011,
734 	0x9294, 0xffffffff, 0x00000015,
735 	0x9298, 0xffffffff, 0x00140013,
736 	0x929c, 0xffffffff, 0x00170016,
737 	0x9150, 0xffffffff, 0x96940200,
738 	0x8708, 0xffffffff, 0x00900100,
739 	0xc478, 0xffffffff, 0x00000080,
740 	0xc404, 0xffffffff, 0x0020003f,
741 	0x30, 0xffffffff, 0x0000001c,
742 	0x34, 0x000f0000, 0x000f0000,
743 	0x160c, 0xffffffff, 0x00000100,
744 	0x1024, 0xffffffff, 0x00000100,
745 	0x102c, 0x00000101, 0x00000000,
746 	0x20a8, 0xffffffff, 0x00000104,
747 	0x264c, 0x000c0000, 0x000c0000,
748 	0x2648, 0x000c0000, 0x000c0000,
749 	0x55e4, 0xff000fff, 0x00000100,
750 	0x55e8, 0x00000001, 0x00000001,
751 	0x2f50, 0x00000001, 0x00000001,
752 	0x30cc, 0xc0000fff, 0x00000104,
753 	0xc1e4, 0x00000001, 0x00000001,
754 	0xd0c0, 0xfffffff0, 0x00000100,
755 	0xd8c0, 0xfffffff0, 0x00000100
756 };
757 
758 static const u32 pitcairn_mgcg_cgcg_init[] =
759 {
760 	0xc400, 0xffffffff, 0xfffffffc,
761 	0x802c, 0xffffffff, 0xe0000000,
762 	0x9a60, 0xffffffff, 0x00000100,
763 	0x92a4, 0xffffffff, 0x00000100,
764 	0xc164, 0xffffffff, 0x00000100,
765 	0x9774, 0xffffffff, 0x00000100,
766 	0x8984, 0xffffffff, 0x06000100,
767 	0x8a18, 0xffffffff, 0x00000100,
768 	0x92a0, 0xffffffff, 0x00000100,
769 	0xc380, 0xffffffff, 0x00000100,
770 	0x8b28, 0xffffffff, 0x00000100,
771 	0x9144, 0xffffffff, 0x00000100,
772 	0x8d88, 0xffffffff, 0x00000100,
773 	0x8d8c, 0xffffffff, 0x00000100,
774 	0x9030, 0xffffffff, 0x00000100,
775 	0x9034, 0xffffffff, 0x00000100,
776 	0x9038, 0xffffffff, 0x00000100,
777 	0x903c, 0xffffffff, 0x00000100,
778 	0xad80, 0xffffffff, 0x00000100,
779 	0xac54, 0xffffffff, 0x00000100,
780 	0x897c, 0xffffffff, 0x06000100,
781 	0x9868, 0xffffffff, 0x00000100,
782 	0x9510, 0xffffffff, 0x00000100,
783 	0xaf04, 0xffffffff, 0x00000100,
784 	0xae04, 0xffffffff, 0x00000100,
785 	0x949c, 0xffffffff, 0x00000100,
786 	0x802c, 0xffffffff, 0xe0000000,
787 	0x9160, 0xffffffff, 0x00010000,
788 	0x9164, 0xffffffff, 0x00030002,
789 	0x9168, 0xffffffff, 0x00040007,
790 	0x916c, 0xffffffff, 0x00060005,
791 	0x9170, 0xffffffff, 0x00090008,
792 	0x9174, 0xffffffff, 0x00020001,
793 	0x9178, 0xffffffff, 0x00040003,
794 	0x917c, 0xffffffff, 0x00000007,
795 	0x9180, 0xffffffff, 0x00060005,
796 	0x9184, 0xffffffff, 0x00090008,
797 	0x9188, 0xffffffff, 0x00030002,
798 	0x918c, 0xffffffff, 0x00050004,
799 	0x9190, 0xffffffff, 0x00000008,
800 	0x9194, 0xffffffff, 0x00070006,
801 	0x9198, 0xffffffff, 0x000a0009,
802 	0x919c, 0xffffffff, 0x00040003,
803 	0x91a0, 0xffffffff, 0x00060005,
804 	0x91a4, 0xffffffff, 0x00000009,
805 	0x91a8, 0xffffffff, 0x00080007,
806 	0x91ac, 0xffffffff, 0x000b000a,
807 	0x91b0, 0xffffffff, 0x00050004,
808 	0x91b4, 0xffffffff, 0x00070006,
809 	0x91b8, 0xffffffff, 0x0008000b,
810 	0x91bc, 0xffffffff, 0x000a0009,
811 	0x91c0, 0xffffffff, 0x000d000c,
812 	0x9200, 0xffffffff, 0x00090008,
813 	0x9204, 0xffffffff, 0x000b000a,
814 	0x9208, 0xffffffff, 0x000c000f,
815 	0x920c, 0xffffffff, 0x000e000d,
816 	0x9210, 0xffffffff, 0x00110010,
817 	0x9214, 0xffffffff, 0x000a0009,
818 	0x9218, 0xffffffff, 0x000c000b,
819 	0x921c, 0xffffffff, 0x0000000f,
820 	0x9220, 0xffffffff, 0x000e000d,
821 	0x9224, 0xffffffff, 0x00110010,
822 	0x9228, 0xffffffff, 0x000b000a,
823 	0x922c, 0xffffffff, 0x000d000c,
824 	0x9230, 0xffffffff, 0x00000010,
825 	0x9234, 0xffffffff, 0x000f000e,
826 	0x9238, 0xffffffff, 0x00120011,
827 	0x923c, 0xffffffff, 0x000c000b,
828 	0x9240, 0xffffffff, 0x000e000d,
829 	0x9244, 0xffffffff, 0x00000011,
830 	0x9248, 0xffffffff, 0x0010000f,
831 	0x924c, 0xffffffff, 0x00130012,
832 	0x9250, 0xffffffff, 0x000d000c,
833 	0x9254, 0xffffffff, 0x000f000e,
834 	0x9258, 0xffffffff, 0x00100013,
835 	0x925c, 0xffffffff, 0x00120011,
836 	0x9260, 0xffffffff, 0x00150014,
837 	0x9150, 0xffffffff, 0x96940200,
838 	0x8708, 0xffffffff, 0x00900100,
839 	0xc478, 0xffffffff, 0x00000080,
840 	0xc404, 0xffffffff, 0x0020003f,
841 	0x30, 0xffffffff, 0x0000001c,
842 	0x34, 0x000f0000, 0x000f0000,
843 	0x160c, 0xffffffff, 0x00000100,
844 	0x1024, 0xffffffff, 0x00000100,
845 	0x102c, 0x00000101, 0x00000000,
846 	0x20a8, 0xffffffff, 0x00000104,
847 	0x55e4, 0xff000fff, 0x00000100,
848 	0x55e8, 0x00000001, 0x00000001,
849 	0x2f50, 0x00000001, 0x00000001,
850 	0x30cc, 0xc0000fff, 0x00000104,
851 	0xc1e4, 0x00000001, 0x00000001,
852 	0xd0c0, 0xfffffff0, 0x00000100,
853 	0xd8c0, 0xfffffff0, 0x00000100
854 };
855 
856 static const u32 verde_mgcg_cgcg_init[] =
857 {
858 	0xc400, 0xffffffff, 0xfffffffc,
859 	0x802c, 0xffffffff, 0xe0000000,
860 	0x9a60, 0xffffffff, 0x00000100,
861 	0x92a4, 0xffffffff, 0x00000100,
862 	0xc164, 0xffffffff, 0x00000100,
863 	0x9774, 0xffffffff, 0x00000100,
864 	0x8984, 0xffffffff, 0x06000100,
865 	0x8a18, 0xffffffff, 0x00000100,
866 	0x92a0, 0xffffffff, 0x00000100,
867 	0xc380, 0xffffffff, 0x00000100,
868 	0x8b28, 0xffffffff, 0x00000100,
869 	0x9144, 0xffffffff, 0x00000100,
870 	0x8d88, 0xffffffff, 0x00000100,
871 	0x8d8c, 0xffffffff, 0x00000100,
872 	0x9030, 0xffffffff, 0x00000100,
873 	0x9034, 0xffffffff, 0x00000100,
874 	0x9038, 0xffffffff, 0x00000100,
875 	0x903c, 0xffffffff, 0x00000100,
876 	0xad80, 0xffffffff, 0x00000100,
877 	0xac54, 0xffffffff, 0x00000100,
878 	0x897c, 0xffffffff, 0x06000100,
879 	0x9868, 0xffffffff, 0x00000100,
880 	0x9510, 0xffffffff, 0x00000100,
881 	0xaf04, 0xffffffff, 0x00000100,
882 	0xae04, 0xffffffff, 0x00000100,
883 	0x949c, 0xffffffff, 0x00000100,
884 	0x802c, 0xffffffff, 0xe0000000,
885 	0x9160, 0xffffffff, 0x00010000,
886 	0x9164, 0xffffffff, 0x00030002,
887 	0x9168, 0xffffffff, 0x00040007,
888 	0x916c, 0xffffffff, 0x00060005,
889 	0x9170, 0xffffffff, 0x00090008,
890 	0x9174, 0xffffffff, 0x00020001,
891 	0x9178, 0xffffffff, 0x00040003,
892 	0x917c, 0xffffffff, 0x00000007,
893 	0x9180, 0xffffffff, 0x00060005,
894 	0x9184, 0xffffffff, 0x00090008,
895 	0x9188, 0xffffffff, 0x00030002,
896 	0x918c, 0xffffffff, 0x00050004,
897 	0x9190, 0xffffffff, 0x00000008,
898 	0x9194, 0xffffffff, 0x00070006,
899 	0x9198, 0xffffffff, 0x000a0009,
900 	0x919c, 0xffffffff, 0x00040003,
901 	0x91a0, 0xffffffff, 0x00060005,
902 	0x91a4, 0xffffffff, 0x00000009,
903 	0x91a8, 0xffffffff, 0x00080007,
904 	0x91ac, 0xffffffff, 0x000b000a,
905 	0x91b0, 0xffffffff, 0x00050004,
906 	0x91b4, 0xffffffff, 0x00070006,
907 	0x91b8, 0xffffffff, 0x0008000b,
908 	0x91bc, 0xffffffff, 0x000a0009,
909 	0x91c0, 0xffffffff, 0x000d000c,
910 	0x9200, 0xffffffff, 0x00090008,
911 	0x9204, 0xffffffff, 0x000b000a,
912 	0x9208, 0xffffffff, 0x000c000f,
913 	0x920c, 0xffffffff, 0x000e000d,
914 	0x9210, 0xffffffff, 0x00110010,
915 	0x9214, 0xffffffff, 0x000a0009,
916 	0x9218, 0xffffffff, 0x000c000b,
917 	0x921c, 0xffffffff, 0x0000000f,
918 	0x9220, 0xffffffff, 0x000e000d,
919 	0x9224, 0xffffffff, 0x00110010,
920 	0x9228, 0xffffffff, 0x000b000a,
921 	0x922c, 0xffffffff, 0x000d000c,
922 	0x9230, 0xffffffff, 0x00000010,
923 	0x9234, 0xffffffff, 0x000f000e,
924 	0x9238, 0xffffffff, 0x00120011,
925 	0x923c, 0xffffffff, 0x000c000b,
926 	0x9240, 0xffffffff, 0x000e000d,
927 	0x9244, 0xffffffff, 0x00000011,
928 	0x9248, 0xffffffff, 0x0010000f,
929 	0x924c, 0xffffffff, 0x00130012,
930 	0x9250, 0xffffffff, 0x000d000c,
931 	0x9254, 0xffffffff, 0x000f000e,
932 	0x9258, 0xffffffff, 0x00100013,
933 	0x925c, 0xffffffff, 0x00120011,
934 	0x9260, 0xffffffff, 0x00150014,
935 	0x9150, 0xffffffff, 0x96940200,
936 	0x8708, 0xffffffff, 0x00900100,
937 	0xc478, 0xffffffff, 0x00000080,
938 	0xc404, 0xffffffff, 0x0020003f,
939 	0x30, 0xffffffff, 0x0000001c,
940 	0x34, 0x000f0000, 0x000f0000,
941 	0x160c, 0xffffffff, 0x00000100,
942 	0x1024, 0xffffffff, 0x00000100,
943 	0x102c, 0x00000101, 0x00000000,
944 	0x20a8, 0xffffffff, 0x00000104,
945 	0x264c, 0x000c0000, 0x000c0000,
946 	0x2648, 0x000c0000, 0x000c0000,
947 	0x55e4, 0xff000fff, 0x00000100,
948 	0x55e8, 0x00000001, 0x00000001,
949 	0x2f50, 0x00000001, 0x00000001,
950 	0x30cc, 0xc0000fff, 0x00000104,
951 	0xc1e4, 0x00000001, 0x00000001,
952 	0xd0c0, 0xfffffff0, 0x00000100,
953 	0xd8c0, 0xfffffff0, 0x00000100
954 };
955 
956 static const u32 oland_mgcg_cgcg_init[] =
957 {
958 	0xc400, 0xffffffff, 0xfffffffc,
959 	0x802c, 0xffffffff, 0xe0000000,
960 	0x9a60, 0xffffffff, 0x00000100,
961 	0x92a4, 0xffffffff, 0x00000100,
962 	0xc164, 0xffffffff, 0x00000100,
963 	0x9774, 0xffffffff, 0x00000100,
964 	0x8984, 0xffffffff, 0x06000100,
965 	0x8a18, 0xffffffff, 0x00000100,
966 	0x92a0, 0xffffffff, 0x00000100,
967 	0xc380, 0xffffffff, 0x00000100,
968 	0x8b28, 0xffffffff, 0x00000100,
969 	0x9144, 0xffffffff, 0x00000100,
970 	0x8d88, 0xffffffff, 0x00000100,
971 	0x8d8c, 0xffffffff, 0x00000100,
972 	0x9030, 0xffffffff, 0x00000100,
973 	0x9034, 0xffffffff, 0x00000100,
974 	0x9038, 0xffffffff, 0x00000100,
975 	0x903c, 0xffffffff, 0x00000100,
976 	0xad80, 0xffffffff, 0x00000100,
977 	0xac54, 0xffffffff, 0x00000100,
978 	0x897c, 0xffffffff, 0x06000100,
979 	0x9868, 0xffffffff, 0x00000100,
980 	0x9510, 0xffffffff, 0x00000100,
981 	0xaf04, 0xffffffff, 0x00000100,
982 	0xae04, 0xffffffff, 0x00000100,
983 	0x949c, 0xffffffff, 0x00000100,
984 	0x802c, 0xffffffff, 0xe0000000,
985 	0x9160, 0xffffffff, 0x00010000,
986 	0x9164, 0xffffffff, 0x00030002,
987 	0x9168, 0xffffffff, 0x00040007,
988 	0x916c, 0xffffffff, 0x00060005,
989 	0x9170, 0xffffffff, 0x00090008,
990 	0x9174, 0xffffffff, 0x00020001,
991 	0x9178, 0xffffffff, 0x00040003,
992 	0x917c, 0xffffffff, 0x00000007,
993 	0x9180, 0xffffffff, 0x00060005,
994 	0x9184, 0xffffffff, 0x00090008,
995 	0x9188, 0xffffffff, 0x00030002,
996 	0x918c, 0xffffffff, 0x00050004,
997 	0x9190, 0xffffffff, 0x00000008,
998 	0x9194, 0xffffffff, 0x00070006,
999 	0x9198, 0xffffffff, 0x000a0009,
1000 	0x919c, 0xffffffff, 0x00040003,
1001 	0x91a0, 0xffffffff, 0x00060005,
1002 	0x91a4, 0xffffffff, 0x00000009,
1003 	0x91a8, 0xffffffff, 0x00080007,
1004 	0x91ac, 0xffffffff, 0x000b000a,
1005 	0x91b0, 0xffffffff, 0x00050004,
1006 	0x91b4, 0xffffffff, 0x00070006,
1007 	0x91b8, 0xffffffff, 0x0008000b,
1008 	0x91bc, 0xffffffff, 0x000a0009,
1009 	0x91c0, 0xffffffff, 0x000d000c,
1010 	0x91c4, 0xffffffff, 0x00060005,
1011 	0x91c8, 0xffffffff, 0x00080007,
1012 	0x91cc, 0xffffffff, 0x0000000b,
1013 	0x91d0, 0xffffffff, 0x000a0009,
1014 	0x91d4, 0xffffffff, 0x000d000c,
1015 	0x9150, 0xffffffff, 0x96940200,
1016 	0x8708, 0xffffffff, 0x00900100,
1017 	0xc478, 0xffffffff, 0x00000080,
1018 	0xc404, 0xffffffff, 0x0020003f,
1019 	0x30, 0xffffffff, 0x0000001c,
1020 	0x34, 0x000f0000, 0x000f0000,
1021 	0x160c, 0xffffffff, 0x00000100,
1022 	0x1024, 0xffffffff, 0x00000100,
1023 	0x102c, 0x00000101, 0x00000000,
1024 	0x20a8, 0xffffffff, 0x00000104,
1025 	0x264c, 0x000c0000, 0x000c0000,
1026 	0x2648, 0x000c0000, 0x000c0000,
1027 	0x55e4, 0xff000fff, 0x00000100,
1028 	0x55e8, 0x00000001, 0x00000001,
1029 	0x2f50, 0x00000001, 0x00000001,
1030 	0x30cc, 0xc0000fff, 0x00000104,
1031 	0xc1e4, 0x00000001, 0x00000001,
1032 	0xd0c0, 0xfffffff0, 0x00000100,
1033 	0xd8c0, 0xfffffff0, 0x00000100
1034 };
1035 
1036 static const u32 hainan_mgcg_cgcg_init[] =
1037 {
1038 	0xc400, 0xffffffff, 0xfffffffc,
1039 	0x802c, 0xffffffff, 0xe0000000,
1040 	0x9a60, 0xffffffff, 0x00000100,
1041 	0x92a4, 0xffffffff, 0x00000100,
1042 	0xc164, 0xffffffff, 0x00000100,
1043 	0x9774, 0xffffffff, 0x00000100,
1044 	0x8984, 0xffffffff, 0x06000100,
1045 	0x8a18, 0xffffffff, 0x00000100,
1046 	0x92a0, 0xffffffff, 0x00000100,
1047 	0xc380, 0xffffffff, 0x00000100,
1048 	0x8b28, 0xffffffff, 0x00000100,
1049 	0x9144, 0xffffffff, 0x00000100,
1050 	0x8d88, 0xffffffff, 0x00000100,
1051 	0x8d8c, 0xffffffff, 0x00000100,
1052 	0x9030, 0xffffffff, 0x00000100,
1053 	0x9034, 0xffffffff, 0x00000100,
1054 	0x9038, 0xffffffff, 0x00000100,
1055 	0x903c, 0xffffffff, 0x00000100,
1056 	0xad80, 0xffffffff, 0x00000100,
1057 	0xac54, 0xffffffff, 0x00000100,
1058 	0x897c, 0xffffffff, 0x06000100,
1059 	0x9868, 0xffffffff, 0x00000100,
1060 	0x9510, 0xffffffff, 0x00000100,
1061 	0xaf04, 0xffffffff, 0x00000100,
1062 	0xae04, 0xffffffff, 0x00000100,
1063 	0x949c, 0xffffffff, 0x00000100,
1064 	0x802c, 0xffffffff, 0xe0000000,
1065 	0x9160, 0xffffffff, 0x00010000,
1066 	0x9164, 0xffffffff, 0x00030002,
1067 	0x9168, 0xffffffff, 0x00040007,
1068 	0x916c, 0xffffffff, 0x00060005,
1069 	0x9170, 0xffffffff, 0x00090008,
1070 	0x9174, 0xffffffff, 0x00020001,
1071 	0x9178, 0xffffffff, 0x00040003,
1072 	0x917c, 0xffffffff, 0x00000007,
1073 	0x9180, 0xffffffff, 0x00060005,
1074 	0x9184, 0xffffffff, 0x00090008,
1075 	0x9188, 0xffffffff, 0x00030002,
1076 	0x918c, 0xffffffff, 0x00050004,
1077 	0x9190, 0xffffffff, 0x00000008,
1078 	0x9194, 0xffffffff, 0x00070006,
1079 	0x9198, 0xffffffff, 0x000a0009,
1080 	0x919c, 0xffffffff, 0x00040003,
1081 	0x91a0, 0xffffffff, 0x00060005,
1082 	0x91a4, 0xffffffff, 0x00000009,
1083 	0x91a8, 0xffffffff, 0x00080007,
1084 	0x91ac, 0xffffffff, 0x000b000a,
1085 	0x91b0, 0xffffffff, 0x00050004,
1086 	0x91b4, 0xffffffff, 0x00070006,
1087 	0x91b8, 0xffffffff, 0x0008000b,
1088 	0x91bc, 0xffffffff, 0x000a0009,
1089 	0x91c0, 0xffffffff, 0x000d000c,
1090 	0x91c4, 0xffffffff, 0x00060005,
1091 	0x91c8, 0xffffffff, 0x00080007,
1092 	0x91cc, 0xffffffff, 0x0000000b,
1093 	0x91d0, 0xffffffff, 0x000a0009,
1094 	0x91d4, 0xffffffff, 0x000d000c,
1095 	0x9150, 0xffffffff, 0x96940200,
1096 	0x8708, 0xffffffff, 0x00900100,
1097 	0xc478, 0xffffffff, 0x00000080,
1098 	0xc404, 0xffffffff, 0x0020003f,
1099 	0x30, 0xffffffff, 0x0000001c,
1100 	0x34, 0x000f0000, 0x000f0000,
1101 	0x160c, 0xffffffff, 0x00000100,
1102 	0x1024, 0xffffffff, 0x00000100,
1103 	0x20a8, 0xffffffff, 0x00000104,
1104 	0x264c, 0x000c0000, 0x000c0000,
1105 	0x2648, 0x000c0000, 0x000c0000,
1106 	0x2f50, 0x00000001, 0x00000001,
1107 	0x30cc, 0xc0000fff, 0x00000104,
1108 	0xc1e4, 0x00000001, 0x00000001,
1109 	0xd0c0, 0xfffffff0, 0x00000100,
1110 	0xd8c0, 0xfffffff0, 0x00000100
1111 };
1112 
1113 static u32 verde_pg_init[] =
1114 {
1115 	0x353c, 0xffffffff, 0x40000,
1116 	0x3538, 0xffffffff, 0x200010ff,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x0,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x353c, 0xffffffff, 0x0,
1122 	0x353c, 0xffffffff, 0x7007,
1123 	0x3538, 0xffffffff, 0x300010ff,
1124 	0x353c, 0xffffffff, 0x0,
1125 	0x353c, 0xffffffff, 0x0,
1126 	0x353c, 0xffffffff, 0x0,
1127 	0x353c, 0xffffffff, 0x0,
1128 	0x353c, 0xffffffff, 0x0,
1129 	0x353c, 0xffffffff, 0x400000,
1130 	0x3538, 0xffffffff, 0x100010ff,
1131 	0x353c, 0xffffffff, 0x0,
1132 	0x353c, 0xffffffff, 0x0,
1133 	0x353c, 0xffffffff, 0x0,
1134 	0x353c, 0xffffffff, 0x0,
1135 	0x353c, 0xffffffff, 0x0,
1136 	0x353c, 0xffffffff, 0x120200,
1137 	0x3538, 0xffffffff, 0x500010ff,
1138 	0x353c, 0xffffffff, 0x0,
1139 	0x353c, 0xffffffff, 0x0,
1140 	0x353c, 0xffffffff, 0x0,
1141 	0x353c, 0xffffffff, 0x0,
1142 	0x353c, 0xffffffff, 0x0,
1143 	0x353c, 0xffffffff, 0x1e1e16,
1144 	0x3538, 0xffffffff, 0x600010ff,
1145 	0x353c, 0xffffffff, 0x0,
1146 	0x353c, 0xffffffff, 0x0,
1147 	0x353c, 0xffffffff, 0x0,
1148 	0x353c, 0xffffffff, 0x0,
1149 	0x353c, 0xffffffff, 0x0,
1150 	0x353c, 0xffffffff, 0x171f1e,
1151 	0x3538, 0xffffffff, 0x700010ff,
1152 	0x353c, 0xffffffff, 0x0,
1153 	0x353c, 0xffffffff, 0x0,
1154 	0x353c, 0xffffffff, 0x0,
1155 	0x353c, 0xffffffff, 0x0,
1156 	0x353c, 0xffffffff, 0x0,
1157 	0x353c, 0xffffffff, 0x0,
1158 	0x3538, 0xffffffff, 0x9ff,
1159 	0x3500, 0xffffffff, 0x0,
1160 	0x3504, 0xffffffff, 0x10000800,
1161 	0x3504, 0xffffffff, 0xf,
1162 	0x3504, 0xffffffff, 0xf,
1163 	0x3500, 0xffffffff, 0x4,
1164 	0x3504, 0xffffffff, 0x1000051e,
1165 	0x3504, 0xffffffff, 0xffff,
1166 	0x3504, 0xffffffff, 0xffff,
1167 	0x3500, 0xffffffff, 0x8,
1168 	0x3504, 0xffffffff, 0x80500,
1169 	0x3500, 0xffffffff, 0x12,
1170 	0x3504, 0xffffffff, 0x9050c,
1171 	0x3500, 0xffffffff, 0x1d,
1172 	0x3504, 0xffffffff, 0xb052c,
1173 	0x3500, 0xffffffff, 0x2a,
1174 	0x3504, 0xffffffff, 0x1053e,
1175 	0x3500, 0xffffffff, 0x2d,
1176 	0x3504, 0xffffffff, 0x10546,
1177 	0x3500, 0xffffffff, 0x30,
1178 	0x3504, 0xffffffff, 0xa054e,
1179 	0x3500, 0xffffffff, 0x3c,
1180 	0x3504, 0xffffffff, 0x1055f,
1181 	0x3500, 0xffffffff, 0x3f,
1182 	0x3504, 0xffffffff, 0x10567,
1183 	0x3500, 0xffffffff, 0x42,
1184 	0x3504, 0xffffffff, 0x1056f,
1185 	0x3500, 0xffffffff, 0x45,
1186 	0x3504, 0xffffffff, 0x10572,
1187 	0x3500, 0xffffffff, 0x48,
1188 	0x3504, 0xffffffff, 0x20575,
1189 	0x3500, 0xffffffff, 0x4c,
1190 	0x3504, 0xffffffff, 0x190801,
1191 	0x3500, 0xffffffff, 0x67,
1192 	0x3504, 0xffffffff, 0x1082a,
1193 	0x3500, 0xffffffff, 0x6a,
1194 	0x3504, 0xffffffff, 0x1b082d,
1195 	0x3500, 0xffffffff, 0x87,
1196 	0x3504, 0xffffffff, 0x310851,
1197 	0x3500, 0xffffffff, 0xba,
1198 	0x3504, 0xffffffff, 0x891,
1199 	0x3500, 0xffffffff, 0xbc,
1200 	0x3504, 0xffffffff, 0x893,
1201 	0x3500, 0xffffffff, 0xbe,
1202 	0x3504, 0xffffffff, 0x20895,
1203 	0x3500, 0xffffffff, 0xc2,
1204 	0x3504, 0xffffffff, 0x20899,
1205 	0x3500, 0xffffffff, 0xc6,
1206 	0x3504, 0xffffffff, 0x2089d,
1207 	0x3500, 0xffffffff, 0xca,
1208 	0x3504, 0xffffffff, 0x8a1,
1209 	0x3500, 0xffffffff, 0xcc,
1210 	0x3504, 0xffffffff, 0x8a3,
1211 	0x3500, 0xffffffff, 0xce,
1212 	0x3504, 0xffffffff, 0x308a5,
1213 	0x3500, 0xffffffff, 0xd3,
1214 	0x3504, 0xffffffff, 0x6d08cd,
1215 	0x3500, 0xffffffff, 0x142,
1216 	0x3504, 0xffffffff, 0x2000095a,
1217 	0x3504, 0xffffffff, 0x1,
1218 	0x3500, 0xffffffff, 0x144,
1219 	0x3504, 0xffffffff, 0x301f095b,
1220 	0x3500, 0xffffffff, 0x165,
1221 	0x3504, 0xffffffff, 0xc094d,
1222 	0x3500, 0xffffffff, 0x173,
1223 	0x3504, 0xffffffff, 0xf096d,
1224 	0x3500, 0xffffffff, 0x184,
1225 	0x3504, 0xffffffff, 0x15097f,
1226 	0x3500, 0xffffffff, 0x19b,
1227 	0x3504, 0xffffffff, 0xc0998,
1228 	0x3500, 0xffffffff, 0x1a9,
1229 	0x3504, 0xffffffff, 0x409a7,
1230 	0x3500, 0xffffffff, 0x1af,
1231 	0x3504, 0xffffffff, 0xcdc,
1232 	0x3500, 0xffffffff, 0x1b1,
1233 	0x3504, 0xffffffff, 0x800,
1234 	0x3508, 0xffffffff, 0x6c9b2000,
1235 	0x3510, 0xfc00, 0x2000,
1236 	0x3544, 0xffffffff, 0xfc0,
1237 	0x28d4, 0x00000100, 0x100
1238 };
1239 
si_init_golden_registers(struct radeon_device * rdev)1240 static void si_init_golden_registers(struct radeon_device *rdev)
1241 {
1242 	switch (rdev->family) {
1243 	case CHIP_TAHITI:
1244 		radeon_program_register_sequence(rdev,
1245 						 tahiti_golden_registers,
1246 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1247 		radeon_program_register_sequence(rdev,
1248 						 tahiti_golden_rlc_registers,
1249 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1250 		radeon_program_register_sequence(rdev,
1251 						 tahiti_mgcg_cgcg_init,
1252 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1253 		radeon_program_register_sequence(rdev,
1254 						 tahiti_golden_registers2,
1255 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1256 		break;
1257 	case CHIP_PITCAIRN:
1258 		radeon_program_register_sequence(rdev,
1259 						 pitcairn_golden_registers,
1260 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1261 		radeon_program_register_sequence(rdev,
1262 						 pitcairn_golden_rlc_registers,
1263 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1264 		radeon_program_register_sequence(rdev,
1265 						 pitcairn_mgcg_cgcg_init,
1266 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1267 		break;
1268 	case CHIP_VERDE:
1269 		radeon_program_register_sequence(rdev,
1270 						 verde_golden_registers,
1271 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1272 		radeon_program_register_sequence(rdev,
1273 						 verde_golden_rlc_registers,
1274 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1275 		radeon_program_register_sequence(rdev,
1276 						 verde_mgcg_cgcg_init,
1277 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1278 		radeon_program_register_sequence(rdev,
1279 						 verde_pg_init,
1280 						 (const u32)ARRAY_SIZE(verde_pg_init));
1281 		break;
1282 	case CHIP_OLAND:
1283 		radeon_program_register_sequence(rdev,
1284 						 oland_golden_registers,
1285 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1286 		radeon_program_register_sequence(rdev,
1287 						 oland_golden_rlc_registers,
1288 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1289 		radeon_program_register_sequence(rdev,
1290 						 oland_mgcg_cgcg_init,
1291 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1292 		break;
1293 	case CHIP_HAINAN:
1294 		radeon_program_register_sequence(rdev,
1295 						 hainan_golden_registers,
1296 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1297 		radeon_program_register_sequence(rdev,
1298 						 hainan_golden_registers2,
1299 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1300 		radeon_program_register_sequence(rdev,
1301 						 hainan_mgcg_cgcg_init,
1302 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1303 		break;
1304 	default:
1305 		break;
1306 	}
1307 }
1308 
1309 /**
1310  * si_get_allowed_info_register - fetch the register for the info ioctl
1311  *
1312  * @rdev: radeon_device pointer
1313  * @reg: register offset in bytes
1314  * @val: register value
1315  *
1316  * Returns 0 for success or -EINVAL for an invalid register
1317  *
1318  */
si_get_allowed_info_register(struct radeon_device * rdev,u32 reg,u32 * val)1319 int si_get_allowed_info_register(struct radeon_device *rdev,
1320 				 u32 reg, u32 *val)
1321 {
1322 	switch (reg) {
1323 	case GRBM_STATUS:
1324 	case GRBM_STATUS2:
1325 	case GRBM_STATUS_SE0:
1326 	case GRBM_STATUS_SE1:
1327 	case SRBM_STATUS:
1328 	case SRBM_STATUS2:
1329 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1330 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1331 	case UVD_STATUS:
1332 		*val = RREG32(reg);
1333 		return 0;
1334 	default:
1335 		return -EINVAL;
1336 	}
1337 }
1338 
1339 #define PCIE_BUS_CLK                10000
1340 #define TCLK                        (PCIE_BUS_CLK / 10)
1341 
1342 /**
1343  * si_get_xclk - get the xclk
1344  *
1345  * @rdev: radeon_device pointer
1346  *
1347  * Returns the reference clock used by the gfx engine
1348  * (SI).
1349  */
si_get_xclk(struct radeon_device * rdev)1350 u32 si_get_xclk(struct radeon_device *rdev)
1351 {
1352 	u32 reference_clock = rdev->clock.spll.reference_freq;
1353 	u32 tmp;
1354 
1355 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1356 	if (tmp & MUX_TCLK_TO_XCLK)
1357 		return TCLK;
1358 
1359 	tmp = RREG32(CG_CLKPIN_CNTL);
1360 	if (tmp & XTALIN_DIVIDE)
1361 		return reference_clock / 4;
1362 
1363 	return reference_clock;
1364 }
1365 
1366 /* get temperature in millidegrees */
si_get_temp(struct radeon_device * rdev)1367 int si_get_temp(struct radeon_device *rdev)
1368 {
1369 	u32 temp;
1370 	int actual_temp = 0;
1371 
1372 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1373 		CTF_TEMP_SHIFT;
1374 
1375 	if (temp & 0x200)
1376 		actual_temp = 255;
1377 	else
1378 		actual_temp = temp & 0x1ff;
1379 
1380 	actual_temp = (actual_temp * 1000);
1381 
1382 	return actual_temp;
1383 }
1384 
1385 #define TAHITI_IO_MC_REGS_SIZE 36
1386 
1387 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1388 	{0x0000006f, 0x03044000},
1389 	{0x00000070, 0x0480c018},
1390 	{0x00000071, 0x00000040},
1391 	{0x00000072, 0x01000000},
1392 	{0x00000074, 0x000000ff},
1393 	{0x00000075, 0x00143400},
1394 	{0x00000076, 0x08ec0800},
1395 	{0x00000077, 0x040000cc},
1396 	{0x00000079, 0x00000000},
1397 	{0x0000007a, 0x21000409},
1398 	{0x0000007c, 0x00000000},
1399 	{0x0000007d, 0xe8000000},
1400 	{0x0000007e, 0x044408a8},
1401 	{0x0000007f, 0x00000003},
1402 	{0x00000080, 0x00000000},
1403 	{0x00000081, 0x01000000},
1404 	{0x00000082, 0x02000000},
1405 	{0x00000083, 0x00000000},
1406 	{0x00000084, 0xe3f3e4f4},
1407 	{0x00000085, 0x00052024},
1408 	{0x00000087, 0x00000000},
1409 	{0x00000088, 0x66036603},
1410 	{0x00000089, 0x01000000},
1411 	{0x0000008b, 0x1c0a0000},
1412 	{0x0000008c, 0xff010000},
1413 	{0x0000008e, 0xffffefff},
1414 	{0x0000008f, 0xfff3efff},
1415 	{0x00000090, 0xfff3efbf},
1416 	{0x00000094, 0x00101101},
1417 	{0x00000095, 0x00000fff},
1418 	{0x00000096, 0x00116fff},
1419 	{0x00000097, 0x60010000},
1420 	{0x00000098, 0x10010000},
1421 	{0x00000099, 0x00006000},
1422 	{0x0000009a, 0x00001000},
1423 	{0x0000009f, 0x00a77400}
1424 };
1425 
1426 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1427 	{0x0000006f, 0x03044000},
1428 	{0x00000070, 0x0480c018},
1429 	{0x00000071, 0x00000040},
1430 	{0x00000072, 0x01000000},
1431 	{0x00000074, 0x000000ff},
1432 	{0x00000075, 0x00143400},
1433 	{0x00000076, 0x08ec0800},
1434 	{0x00000077, 0x040000cc},
1435 	{0x00000079, 0x00000000},
1436 	{0x0000007a, 0x21000409},
1437 	{0x0000007c, 0x00000000},
1438 	{0x0000007d, 0xe8000000},
1439 	{0x0000007e, 0x044408a8},
1440 	{0x0000007f, 0x00000003},
1441 	{0x00000080, 0x00000000},
1442 	{0x00000081, 0x01000000},
1443 	{0x00000082, 0x02000000},
1444 	{0x00000083, 0x00000000},
1445 	{0x00000084, 0xe3f3e4f4},
1446 	{0x00000085, 0x00052024},
1447 	{0x00000087, 0x00000000},
1448 	{0x00000088, 0x66036603},
1449 	{0x00000089, 0x01000000},
1450 	{0x0000008b, 0x1c0a0000},
1451 	{0x0000008c, 0xff010000},
1452 	{0x0000008e, 0xffffefff},
1453 	{0x0000008f, 0xfff3efff},
1454 	{0x00000090, 0xfff3efbf},
1455 	{0x00000094, 0x00101101},
1456 	{0x00000095, 0x00000fff},
1457 	{0x00000096, 0x00116fff},
1458 	{0x00000097, 0x60010000},
1459 	{0x00000098, 0x10010000},
1460 	{0x00000099, 0x00006000},
1461 	{0x0000009a, 0x00001000},
1462 	{0x0000009f, 0x00a47400}
1463 };
1464 
1465 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1466 	{0x0000006f, 0x03044000},
1467 	{0x00000070, 0x0480c018},
1468 	{0x00000071, 0x00000040},
1469 	{0x00000072, 0x01000000},
1470 	{0x00000074, 0x000000ff},
1471 	{0x00000075, 0x00143400},
1472 	{0x00000076, 0x08ec0800},
1473 	{0x00000077, 0x040000cc},
1474 	{0x00000079, 0x00000000},
1475 	{0x0000007a, 0x21000409},
1476 	{0x0000007c, 0x00000000},
1477 	{0x0000007d, 0xe8000000},
1478 	{0x0000007e, 0x044408a8},
1479 	{0x0000007f, 0x00000003},
1480 	{0x00000080, 0x00000000},
1481 	{0x00000081, 0x01000000},
1482 	{0x00000082, 0x02000000},
1483 	{0x00000083, 0x00000000},
1484 	{0x00000084, 0xe3f3e4f4},
1485 	{0x00000085, 0x00052024},
1486 	{0x00000087, 0x00000000},
1487 	{0x00000088, 0x66036603},
1488 	{0x00000089, 0x01000000},
1489 	{0x0000008b, 0x1c0a0000},
1490 	{0x0000008c, 0xff010000},
1491 	{0x0000008e, 0xffffefff},
1492 	{0x0000008f, 0xfff3efff},
1493 	{0x00000090, 0xfff3efbf},
1494 	{0x00000094, 0x00101101},
1495 	{0x00000095, 0x00000fff},
1496 	{0x00000096, 0x00116fff},
1497 	{0x00000097, 0x60010000},
1498 	{0x00000098, 0x10010000},
1499 	{0x00000099, 0x00006000},
1500 	{0x0000009a, 0x00001000},
1501 	{0x0000009f, 0x00a37400}
1502 };
1503 
1504 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1505 	{0x0000006f, 0x03044000},
1506 	{0x00000070, 0x0480c018},
1507 	{0x00000071, 0x00000040},
1508 	{0x00000072, 0x01000000},
1509 	{0x00000074, 0x000000ff},
1510 	{0x00000075, 0x00143400},
1511 	{0x00000076, 0x08ec0800},
1512 	{0x00000077, 0x040000cc},
1513 	{0x00000079, 0x00000000},
1514 	{0x0000007a, 0x21000409},
1515 	{0x0000007c, 0x00000000},
1516 	{0x0000007d, 0xe8000000},
1517 	{0x0000007e, 0x044408a8},
1518 	{0x0000007f, 0x00000003},
1519 	{0x00000080, 0x00000000},
1520 	{0x00000081, 0x01000000},
1521 	{0x00000082, 0x02000000},
1522 	{0x00000083, 0x00000000},
1523 	{0x00000084, 0xe3f3e4f4},
1524 	{0x00000085, 0x00052024},
1525 	{0x00000087, 0x00000000},
1526 	{0x00000088, 0x66036603},
1527 	{0x00000089, 0x01000000},
1528 	{0x0000008b, 0x1c0a0000},
1529 	{0x0000008c, 0xff010000},
1530 	{0x0000008e, 0xffffefff},
1531 	{0x0000008f, 0xfff3efff},
1532 	{0x00000090, 0xfff3efbf},
1533 	{0x00000094, 0x00101101},
1534 	{0x00000095, 0x00000fff},
1535 	{0x00000096, 0x00116fff},
1536 	{0x00000097, 0x60010000},
1537 	{0x00000098, 0x10010000},
1538 	{0x00000099, 0x00006000},
1539 	{0x0000009a, 0x00001000},
1540 	{0x0000009f, 0x00a17730}
1541 };
1542 
1543 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1544 	{0x0000006f, 0x03044000},
1545 	{0x00000070, 0x0480c018},
1546 	{0x00000071, 0x00000040},
1547 	{0x00000072, 0x01000000},
1548 	{0x00000074, 0x000000ff},
1549 	{0x00000075, 0x00143400},
1550 	{0x00000076, 0x08ec0800},
1551 	{0x00000077, 0x040000cc},
1552 	{0x00000079, 0x00000000},
1553 	{0x0000007a, 0x21000409},
1554 	{0x0000007c, 0x00000000},
1555 	{0x0000007d, 0xe8000000},
1556 	{0x0000007e, 0x044408a8},
1557 	{0x0000007f, 0x00000003},
1558 	{0x00000080, 0x00000000},
1559 	{0x00000081, 0x01000000},
1560 	{0x00000082, 0x02000000},
1561 	{0x00000083, 0x00000000},
1562 	{0x00000084, 0xe3f3e4f4},
1563 	{0x00000085, 0x00052024},
1564 	{0x00000087, 0x00000000},
1565 	{0x00000088, 0x66036603},
1566 	{0x00000089, 0x01000000},
1567 	{0x0000008b, 0x1c0a0000},
1568 	{0x0000008c, 0xff010000},
1569 	{0x0000008e, 0xffffefff},
1570 	{0x0000008f, 0xfff3efff},
1571 	{0x00000090, 0xfff3efbf},
1572 	{0x00000094, 0x00101101},
1573 	{0x00000095, 0x00000fff},
1574 	{0x00000096, 0x00116fff},
1575 	{0x00000097, 0x60010000},
1576 	{0x00000098, 0x10010000},
1577 	{0x00000099, 0x00006000},
1578 	{0x0000009a, 0x00001000},
1579 	{0x0000009f, 0x00a07730}
1580 };
1581 
1582 /* ucode loading */
si_mc_load_microcode(struct radeon_device * rdev)1583 int si_mc_load_microcode(struct radeon_device *rdev)
1584 {
1585 	const __be32 *fw_data = NULL;
1586 	const __le32 *new_fw_data = NULL;
1587 	u32 running;
1588 	const u32 *io_mc_regs = NULL;
1589 	const __le32 *new_io_mc_regs = NULL;
1590 	int i, regs_size, ucode_size;
1591 
1592 	if (!rdev->mc_fw)
1593 		return -EINVAL;
1594 
1595 	if (rdev->new_fw) {
1596 		const struct mc_firmware_header_v1_0 *hdr =
1597 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1598 
1599 		radeon_ucode_print_mc_hdr(&hdr->header);
1600 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1601 		new_io_mc_regs = (const __le32 *)
1602 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1603 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1604 		new_fw_data = (const __le32 *)
1605 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1606 	} else {
1607 		ucode_size = rdev->mc_fw->size / 4;
1608 
1609 		switch (rdev->family) {
1610 		case CHIP_TAHITI:
1611 			io_mc_regs = &tahiti_io_mc_regs[0][0];
1612 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1613 			break;
1614 		case CHIP_PITCAIRN:
1615 			io_mc_regs = &pitcairn_io_mc_regs[0][0];
1616 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1617 			break;
1618 		case CHIP_VERDE:
1619 		default:
1620 			io_mc_regs = &verde_io_mc_regs[0][0];
1621 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1622 			break;
1623 		case CHIP_OLAND:
1624 			io_mc_regs = &oland_io_mc_regs[0][0];
1625 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1626 			break;
1627 		case CHIP_HAINAN:
1628 			io_mc_regs = &hainan_io_mc_regs[0][0];
1629 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1630 			break;
1631 		}
1632 		fw_data = (const __be32 *)rdev->mc_fw->data;
1633 	}
1634 
1635 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1636 
1637 	if (running == 0) {
1638 		/* reset the engine and set to writable */
1639 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1640 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1641 
1642 		/* load mc io regs */
1643 		for (i = 0; i < regs_size; i++) {
1644 			if (rdev->new_fw) {
1645 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1646 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1647 			} else {
1648 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1649 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1650 			}
1651 		}
1652 		/* load the MC ucode */
1653 		for (i = 0; i < ucode_size; i++) {
1654 			if (rdev->new_fw)
1655 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1656 			else
1657 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1658 		}
1659 
1660 		/* put the engine back into the active state */
1661 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1662 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1663 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1664 
1665 		/* wait for training to complete */
1666 		for (i = 0; i < rdev->usec_timeout; i++) {
1667 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1668 				break;
1669 			udelay(1);
1670 		}
1671 		for (i = 0; i < rdev->usec_timeout; i++) {
1672 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1673 				break;
1674 			udelay(1);
1675 		}
1676 	}
1677 
1678 	return 0;
1679 }
1680 
si_init_microcode(struct radeon_device * rdev)1681 static int si_init_microcode(struct radeon_device *rdev)
1682 {
1683 	const char *chip_name;
1684 	const char *new_chip_name;
1685 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1686 	size_t smc_req_size, mc2_req_size;
1687 	char fw_name[30];
1688 	int err;
1689 	int new_fw = 0;
1690 	bool new_smc = false;
1691 	bool si58_fw = false;
1692 	bool banks2_fw = false;
1693 
1694 	DRM_DEBUG("\n");
1695 
1696 	switch (rdev->family) {
1697 	case CHIP_TAHITI:
1698 		chip_name = "TAHITI";
1699 		new_chip_name = "tahiti";
1700 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1701 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1702 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1703 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1704 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1705 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1706 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1707 		break;
1708 	case CHIP_PITCAIRN:
1709 		chip_name = "PITCAIRN";
1710 		if ((rdev->pdev->revision == 0x81) &&
1711 		    ((rdev->pdev->device == 0x6810) ||
1712 		     (rdev->pdev->device == 0x6811)))
1713 			new_smc = true;
1714 		new_chip_name = "pitcairn";
1715 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1716 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1717 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1718 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1719 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1720 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1721 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1722 		break;
1723 	case CHIP_VERDE:
1724 		chip_name = "VERDE";
1725 		if (((rdev->pdev->device == 0x6820) &&
1726 		     ((rdev->pdev->revision == 0x81) ||
1727 		      (rdev->pdev->revision == 0x83))) ||
1728 		    ((rdev->pdev->device == 0x6821) &&
1729 		     ((rdev->pdev->revision == 0x83) ||
1730 		      (rdev->pdev->revision == 0x87))) ||
1731 		    ((rdev->pdev->revision == 0x87) &&
1732 		     ((rdev->pdev->device == 0x6823) ||
1733 		      (rdev->pdev->device == 0x682b))))
1734 			new_smc = true;
1735 		new_chip_name = "verde";
1736 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1737 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1738 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1739 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1740 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1741 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1742 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1743 		break;
1744 	case CHIP_OLAND:
1745 		chip_name = "OLAND";
1746 		if (((rdev->pdev->revision == 0x81) &&
1747 		     ((rdev->pdev->device == 0x6600) ||
1748 		      (rdev->pdev->device == 0x6604) ||
1749 		      (rdev->pdev->device == 0x6605) ||
1750 		      (rdev->pdev->device == 0x6610))) ||
1751 		    ((rdev->pdev->revision == 0x83) &&
1752 		     (rdev->pdev->device == 0x6610)))
1753 			new_smc = true;
1754 		new_chip_name = "oland";
1755 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1756 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1757 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1758 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1759 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1760 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1761 		break;
1762 	case CHIP_HAINAN:
1763 		chip_name = "HAINAN";
1764 		if (((rdev->pdev->revision == 0x81) &&
1765 		     (rdev->pdev->device == 0x6660)) ||
1766 		    ((rdev->pdev->revision == 0x83) &&
1767 		     ((rdev->pdev->device == 0x6660) ||
1768 		      (rdev->pdev->device == 0x6663) ||
1769 		      (rdev->pdev->device == 0x6665) ||
1770 		      (rdev->pdev->device == 0x6667))))
1771 			new_smc = true;
1772 		else if ((rdev->pdev->revision == 0xc3) &&
1773 			 (rdev->pdev->device == 0x6665))
1774 			banks2_fw = true;
1775 		new_chip_name = "hainan";
1776 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1777 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1778 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1779 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1780 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1781 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1782 		break;
1783 	default: BUG();
1784 	}
1785 
1786 	/* this memory configuration requires special firmware */
1787 	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1788 		si58_fw = true;
1789 
1790 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1791 
1792 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1793 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1794 	if (err) {
1795 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1796 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1797 		if (err)
1798 			goto out;
1799 		if (rdev->pfp_fw->size != pfp_req_size) {
1800 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1801 			       rdev->pfp_fw->size, fw_name);
1802 			err = -EINVAL;
1803 			goto out;
1804 		}
1805 	} else {
1806 		err = radeon_ucode_validate(rdev->pfp_fw);
1807 		if (err) {
1808 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1809 			       fw_name);
1810 			goto out;
1811 		} else {
1812 			new_fw++;
1813 		}
1814 	}
1815 
1816 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1817 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1818 	if (err) {
1819 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1820 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1821 		if (err)
1822 			goto out;
1823 		if (rdev->me_fw->size != me_req_size) {
1824 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1825 			       rdev->me_fw->size, fw_name);
1826 			err = -EINVAL;
1827 		}
1828 	} else {
1829 		err = radeon_ucode_validate(rdev->me_fw);
1830 		if (err) {
1831 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1832 			       fw_name);
1833 			goto out;
1834 		} else {
1835 			new_fw++;
1836 		}
1837 	}
1838 
1839 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1840 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1841 	if (err) {
1842 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1843 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1844 		if (err)
1845 			goto out;
1846 		if (rdev->ce_fw->size != ce_req_size) {
1847 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1848 			       rdev->ce_fw->size, fw_name);
1849 			err = -EINVAL;
1850 		}
1851 	} else {
1852 		err = radeon_ucode_validate(rdev->ce_fw);
1853 		if (err) {
1854 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1855 			       fw_name);
1856 			goto out;
1857 		} else {
1858 			new_fw++;
1859 		}
1860 	}
1861 
1862 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1863 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1864 	if (err) {
1865 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1866 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1867 		if (err)
1868 			goto out;
1869 		if (rdev->rlc_fw->size != rlc_req_size) {
1870 			pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1871 			       rdev->rlc_fw->size, fw_name);
1872 			err = -EINVAL;
1873 		}
1874 	} else {
1875 		err = radeon_ucode_validate(rdev->rlc_fw);
1876 		if (err) {
1877 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1878 			       fw_name);
1879 			goto out;
1880 		} else {
1881 			new_fw++;
1882 		}
1883 	}
1884 
1885 	if (si58_fw)
1886 		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1887 	else
1888 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1889 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1890 	if (err) {
1891 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1892 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1893 		if (err) {
1894 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1895 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1896 			if (err)
1897 				goto out;
1898 		}
1899 		if ((rdev->mc_fw->size != mc_req_size) &&
1900 		    (rdev->mc_fw->size != mc2_req_size)) {
1901 			pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1902 			       rdev->mc_fw->size, fw_name);
1903 			err = -EINVAL;
1904 		}
1905 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1906 	} else {
1907 		err = radeon_ucode_validate(rdev->mc_fw);
1908 		if (err) {
1909 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1910 			       fw_name);
1911 			goto out;
1912 		} else {
1913 			new_fw++;
1914 		}
1915 	}
1916 
1917 	if (banks2_fw)
1918 		snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1919 	else if (new_smc)
1920 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1921 	else
1922 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1923 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1924 	if (err) {
1925 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1926 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1927 		if (err) {
1928 			pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1929 			release_firmware(rdev->smc_fw);
1930 			rdev->smc_fw = NULL;
1931 			err = 0;
1932 		} else if (rdev->smc_fw->size != smc_req_size) {
1933 			pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1934 			       rdev->smc_fw->size, fw_name);
1935 			err = -EINVAL;
1936 		}
1937 	} else {
1938 		err = radeon_ucode_validate(rdev->smc_fw);
1939 		if (err) {
1940 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1941 			       fw_name);
1942 			goto out;
1943 		} else {
1944 			new_fw++;
1945 		}
1946 	}
1947 
1948 	if (new_fw == 0) {
1949 		rdev->new_fw = false;
1950 	} else if (new_fw < 6) {
1951 		pr_err("si_fw: mixing new and old firmware!\n");
1952 		err = -EINVAL;
1953 	} else {
1954 		rdev->new_fw = true;
1955 	}
1956 out:
1957 	if (err) {
1958 		if (err != -EINVAL)
1959 			pr_err("si_cp: Failed to load firmware \"%s\"\n",
1960 			       fw_name);
1961 		release_firmware(rdev->pfp_fw);
1962 		rdev->pfp_fw = NULL;
1963 		release_firmware(rdev->me_fw);
1964 		rdev->me_fw = NULL;
1965 		release_firmware(rdev->ce_fw);
1966 		rdev->ce_fw = NULL;
1967 		release_firmware(rdev->rlc_fw);
1968 		rdev->rlc_fw = NULL;
1969 		release_firmware(rdev->mc_fw);
1970 		rdev->mc_fw = NULL;
1971 		release_firmware(rdev->smc_fw);
1972 		rdev->smc_fw = NULL;
1973 	}
1974 	return err;
1975 }
1976 
1977 /* watermark setup */
dce6_line_buffer_adjust(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,struct drm_display_mode * mode,struct drm_display_mode * other_mode)1978 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1979 				   struct radeon_crtc *radeon_crtc,
1980 				   struct drm_display_mode *mode,
1981 				   struct drm_display_mode *other_mode)
1982 {
1983 	u32 tmp, buffer_alloc, i;
1984 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1985 	/*
1986 	 * Line Buffer Setup
1987 	 * There are 3 line buffers, each one shared by 2 display controllers.
1988 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1989 	 * the display controllers.  The paritioning is done via one of four
1990 	 * preset allocations specified in bits 21:20:
1991 	 *  0 - half lb
1992 	 *  2 - whole lb, other crtc must be disabled
1993 	 */
1994 	/* this can get tricky if we have two large displays on a paired group
1995 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1996 	 * non-linked crtcs for maximum line buffer allocation.
1997 	 */
1998 	if (radeon_crtc->base.enabled && mode) {
1999 		if (other_mode) {
2000 			tmp = 0; /* 1/2 */
2001 			buffer_alloc = 1;
2002 		} else {
2003 			tmp = 2; /* whole */
2004 			buffer_alloc = 2;
2005 		}
2006 	} else {
2007 		tmp = 0;
2008 		buffer_alloc = 0;
2009 	}
2010 
2011 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
2012 	       DC_LB_MEMORY_CONFIG(tmp));
2013 
2014 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
2015 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
2016 	for (i = 0; i < rdev->usec_timeout; i++) {
2017 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
2018 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
2019 			break;
2020 		udelay(1);
2021 	}
2022 
2023 	if (radeon_crtc->base.enabled && mode) {
2024 		switch (tmp) {
2025 		case 0:
2026 		default:
2027 			return 4096 * 2;
2028 		case 2:
2029 			return 8192 * 2;
2030 		}
2031 	}
2032 
2033 	/* controller not enabled, so no lb used */
2034 	return 0;
2035 }
2036 
si_get_number_of_dram_channels(struct radeon_device * rdev)2037 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2038 {
2039 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2040 
2041 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2042 	case 0:
2043 	default:
2044 		return 1;
2045 	case 1:
2046 		return 2;
2047 	case 2:
2048 		return 4;
2049 	case 3:
2050 		return 8;
2051 	case 4:
2052 		return 3;
2053 	case 5:
2054 		return 6;
2055 	case 6:
2056 		return 10;
2057 	case 7:
2058 		return 12;
2059 	case 8:
2060 		return 16;
2061 	}
2062 }
2063 
2064 struct dce6_wm_params {
2065 	u32 dram_channels; /* number of dram channels */
2066 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2067 	u32 sclk;          /* engine clock in kHz */
2068 	u32 disp_clk;      /* display clock in kHz */
2069 	u32 src_width;     /* viewport width */
2070 	u32 active_time;   /* active display time in ns */
2071 	u32 blank_time;    /* blank time in ns */
2072 	bool interlaced;    /* mode is interlaced */
2073 	fixed20_12 vsc;    /* vertical scale ratio */
2074 	u32 num_heads;     /* number of active crtcs */
2075 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2076 	u32 lb_size;       /* line buffer allocated to pipe */
2077 	u32 vtaps;         /* vertical scaler taps */
2078 };
2079 
dce6_dram_bandwidth(struct dce6_wm_params * wm)2080 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2081 {
2082 	/* Calculate raw DRAM Bandwidth */
2083 	fixed20_12 dram_efficiency; /* 0.7 */
2084 	fixed20_12 yclk, dram_channels, bandwidth;
2085 	fixed20_12 a;
2086 
2087 	a.full = dfixed_const(1000);
2088 	yclk.full = dfixed_const(wm->yclk);
2089 	yclk.full = dfixed_div(yclk, a);
2090 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2091 	a.full = dfixed_const(10);
2092 	dram_efficiency.full = dfixed_const(7);
2093 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2094 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2095 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2096 
2097 	return dfixed_trunc(bandwidth);
2098 }
2099 
dce6_dram_bandwidth_for_display(struct dce6_wm_params * wm)2100 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2101 {
2102 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2103 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2104 	fixed20_12 yclk, dram_channels, bandwidth;
2105 	fixed20_12 a;
2106 
2107 	a.full = dfixed_const(1000);
2108 	yclk.full = dfixed_const(wm->yclk);
2109 	yclk.full = dfixed_div(yclk, a);
2110 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2111 	a.full = dfixed_const(10);
2112 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2113 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2114 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2115 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2116 
2117 	return dfixed_trunc(bandwidth);
2118 }
2119 
dce6_data_return_bandwidth(struct dce6_wm_params * wm)2120 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2121 {
2122 	/* Calculate the display Data return Bandwidth */
2123 	fixed20_12 return_efficiency; /* 0.8 */
2124 	fixed20_12 sclk, bandwidth;
2125 	fixed20_12 a;
2126 
2127 	a.full = dfixed_const(1000);
2128 	sclk.full = dfixed_const(wm->sclk);
2129 	sclk.full = dfixed_div(sclk, a);
2130 	a.full = dfixed_const(10);
2131 	return_efficiency.full = dfixed_const(8);
2132 	return_efficiency.full = dfixed_div(return_efficiency, a);
2133 	a.full = dfixed_const(32);
2134 	bandwidth.full = dfixed_mul(a, sclk);
2135 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2136 
2137 	return dfixed_trunc(bandwidth);
2138 }
2139 
dce6_get_dmif_bytes_per_request(struct dce6_wm_params * wm)2140 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2141 {
2142 	return 32;
2143 }
2144 
dce6_dmif_request_bandwidth(struct dce6_wm_params * wm)2145 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2146 {
2147 	/* Calculate the DMIF Request Bandwidth */
2148 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2149 	fixed20_12 disp_clk, sclk, bandwidth;
2150 	fixed20_12 a, b1, b2;
2151 	u32 min_bandwidth;
2152 
2153 	a.full = dfixed_const(1000);
2154 	disp_clk.full = dfixed_const(wm->disp_clk);
2155 	disp_clk.full = dfixed_div(disp_clk, a);
2156 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2157 	b1.full = dfixed_mul(a, disp_clk);
2158 
2159 	a.full = dfixed_const(1000);
2160 	sclk.full = dfixed_const(wm->sclk);
2161 	sclk.full = dfixed_div(sclk, a);
2162 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2163 	b2.full = dfixed_mul(a, sclk);
2164 
2165 	a.full = dfixed_const(10);
2166 	disp_clk_request_efficiency.full = dfixed_const(8);
2167 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2168 
2169 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2170 
2171 	a.full = dfixed_const(min_bandwidth);
2172 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2173 
2174 	return dfixed_trunc(bandwidth);
2175 }
2176 
dce6_available_bandwidth(struct dce6_wm_params * wm)2177 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2178 {
2179 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2180 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2181 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2182 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2183 
2184 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2185 }
2186 
dce6_average_bandwidth(struct dce6_wm_params * wm)2187 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2188 {
2189 	/* Calculate the display mode Average Bandwidth
2190 	 * DisplayMode should contain the source and destination dimensions,
2191 	 * timing, etc.
2192 	 */
2193 	fixed20_12 bpp;
2194 	fixed20_12 line_time;
2195 	fixed20_12 src_width;
2196 	fixed20_12 bandwidth;
2197 	fixed20_12 a;
2198 
2199 	a.full = dfixed_const(1000);
2200 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2201 	line_time.full = dfixed_div(line_time, a);
2202 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2203 	src_width.full = dfixed_const(wm->src_width);
2204 	bandwidth.full = dfixed_mul(src_width, bpp);
2205 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2206 	bandwidth.full = dfixed_div(bandwidth, line_time);
2207 
2208 	return dfixed_trunc(bandwidth);
2209 }
2210 
dce6_latency_watermark(struct dce6_wm_params * wm)2211 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2212 {
2213 	/* First calcualte the latency in ns */
2214 	u32 mc_latency = 2000; /* 2000 ns. */
2215 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2216 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2217 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2218 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2219 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2220 		(wm->num_heads * cursor_line_pair_return_time);
2221 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2222 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2223 	u32 tmp, dmif_size = 12288;
2224 	fixed20_12 a, b, c;
2225 
2226 	if (wm->num_heads == 0)
2227 		return 0;
2228 
2229 	a.full = dfixed_const(2);
2230 	b.full = dfixed_const(1);
2231 	if ((wm->vsc.full > a.full) ||
2232 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2233 	    (wm->vtaps >= 5) ||
2234 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2235 		max_src_lines_per_dst_line = 4;
2236 	else
2237 		max_src_lines_per_dst_line = 2;
2238 
2239 	a.full = dfixed_const(available_bandwidth);
2240 	b.full = dfixed_const(wm->num_heads);
2241 	a.full = dfixed_div(a, b);
2242 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2243 	tmp = min(dfixed_trunc(a), tmp);
2244 
2245 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2246 
2247 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2248 	b.full = dfixed_const(1000);
2249 	c.full = dfixed_const(lb_fill_bw);
2250 	b.full = dfixed_div(c, b);
2251 	a.full = dfixed_div(a, b);
2252 	line_fill_time = dfixed_trunc(a);
2253 
2254 	if (line_fill_time < wm->active_time)
2255 		return latency;
2256 	else
2257 		return latency + (line_fill_time - wm->active_time);
2258 
2259 }
2260 
dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params * wm)2261 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2262 {
2263 	if (dce6_average_bandwidth(wm) <=
2264 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2265 		return true;
2266 	else
2267 		return false;
2268 };
2269 
dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params * wm)2270 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2271 {
2272 	if (dce6_average_bandwidth(wm) <=
2273 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2274 		return true;
2275 	else
2276 		return false;
2277 };
2278 
dce6_check_latency_hiding(struct dce6_wm_params * wm)2279 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2280 {
2281 	u32 lb_partitions = wm->lb_size / wm->src_width;
2282 	u32 line_time = wm->active_time + wm->blank_time;
2283 	u32 latency_tolerant_lines;
2284 	u32 latency_hiding;
2285 	fixed20_12 a;
2286 
2287 	a.full = dfixed_const(1);
2288 	if (wm->vsc.full > a.full)
2289 		latency_tolerant_lines = 1;
2290 	else {
2291 		if (lb_partitions <= (wm->vtaps + 1))
2292 			latency_tolerant_lines = 1;
2293 		else
2294 			latency_tolerant_lines = 2;
2295 	}
2296 
2297 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2298 
2299 	if (dce6_latency_watermark(wm) <= latency_hiding)
2300 		return true;
2301 	else
2302 		return false;
2303 }
2304 
dce6_program_watermarks(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,u32 lb_size,u32 num_heads)2305 static void dce6_program_watermarks(struct radeon_device *rdev,
2306 					 struct radeon_crtc *radeon_crtc,
2307 					 u32 lb_size, u32 num_heads)
2308 {
2309 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2310 	struct dce6_wm_params wm_low, wm_high;
2311 	u32 dram_channels;
2312 	u32 active_time;
2313 	u32 line_time = 0;
2314 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2315 	u32 priority_a_mark = 0, priority_b_mark = 0;
2316 	u32 priority_a_cnt = PRIORITY_OFF;
2317 	u32 priority_b_cnt = PRIORITY_OFF;
2318 	u32 tmp, arb_control3;
2319 	fixed20_12 a, b, c;
2320 
2321 	if (radeon_crtc->base.enabled && num_heads && mode) {
2322 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2323 					    (u32)mode->clock);
2324 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2325 					  (u32)mode->clock);
2326 		line_time = min(line_time, (u32)65535);
2327 		priority_a_cnt = 0;
2328 		priority_b_cnt = 0;
2329 
2330 		if (rdev->family == CHIP_ARUBA)
2331 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2332 		else
2333 			dram_channels = si_get_number_of_dram_channels(rdev);
2334 
2335 		/* watermark for high clocks */
2336 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2337 			wm_high.yclk =
2338 				radeon_dpm_get_mclk(rdev, false) * 10;
2339 			wm_high.sclk =
2340 				radeon_dpm_get_sclk(rdev, false) * 10;
2341 		} else {
2342 			wm_high.yclk = rdev->pm.current_mclk * 10;
2343 			wm_high.sclk = rdev->pm.current_sclk * 10;
2344 		}
2345 
2346 		wm_high.disp_clk = mode->clock;
2347 		wm_high.src_width = mode->crtc_hdisplay;
2348 		wm_high.active_time = active_time;
2349 		wm_high.blank_time = line_time - wm_high.active_time;
2350 		wm_high.interlaced = false;
2351 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2352 			wm_high.interlaced = true;
2353 		wm_high.vsc = radeon_crtc->vsc;
2354 		wm_high.vtaps = 1;
2355 		if (radeon_crtc->rmx_type != RMX_OFF)
2356 			wm_high.vtaps = 2;
2357 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2358 		wm_high.lb_size = lb_size;
2359 		wm_high.dram_channels = dram_channels;
2360 		wm_high.num_heads = num_heads;
2361 
2362 		/* watermark for low clocks */
2363 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2364 			wm_low.yclk =
2365 				radeon_dpm_get_mclk(rdev, true) * 10;
2366 			wm_low.sclk =
2367 				radeon_dpm_get_sclk(rdev, true) * 10;
2368 		} else {
2369 			wm_low.yclk = rdev->pm.current_mclk * 10;
2370 			wm_low.sclk = rdev->pm.current_sclk * 10;
2371 		}
2372 
2373 		wm_low.disp_clk = mode->clock;
2374 		wm_low.src_width = mode->crtc_hdisplay;
2375 		wm_low.active_time = active_time;
2376 		wm_low.blank_time = line_time - wm_low.active_time;
2377 		wm_low.interlaced = false;
2378 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2379 			wm_low.interlaced = true;
2380 		wm_low.vsc = radeon_crtc->vsc;
2381 		wm_low.vtaps = 1;
2382 		if (radeon_crtc->rmx_type != RMX_OFF)
2383 			wm_low.vtaps = 2;
2384 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2385 		wm_low.lb_size = lb_size;
2386 		wm_low.dram_channels = dram_channels;
2387 		wm_low.num_heads = num_heads;
2388 
2389 		/* set for high clocks */
2390 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2391 		/* set for low clocks */
2392 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2393 
2394 		/* possibly force display priority to high */
2395 		/* should really do this at mode validation time... */
2396 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2397 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2398 		    !dce6_check_latency_hiding(&wm_high) ||
2399 		    (rdev->disp_priority == 2)) {
2400 			DRM_DEBUG_KMS("force priority to high\n");
2401 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2402 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2403 		}
2404 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2405 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2406 		    !dce6_check_latency_hiding(&wm_low) ||
2407 		    (rdev->disp_priority == 2)) {
2408 			DRM_DEBUG_KMS("force priority to high\n");
2409 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2410 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2411 		}
2412 
2413 		a.full = dfixed_const(1000);
2414 		b.full = dfixed_const(mode->clock);
2415 		b.full = dfixed_div(b, a);
2416 		c.full = dfixed_const(latency_watermark_a);
2417 		c.full = dfixed_mul(c, b);
2418 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2419 		c.full = dfixed_div(c, a);
2420 		a.full = dfixed_const(16);
2421 		c.full = dfixed_div(c, a);
2422 		priority_a_mark = dfixed_trunc(c);
2423 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2424 
2425 		a.full = dfixed_const(1000);
2426 		b.full = dfixed_const(mode->clock);
2427 		b.full = dfixed_div(b, a);
2428 		c.full = dfixed_const(latency_watermark_b);
2429 		c.full = dfixed_mul(c, b);
2430 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2431 		c.full = dfixed_div(c, a);
2432 		a.full = dfixed_const(16);
2433 		c.full = dfixed_div(c, a);
2434 		priority_b_mark = dfixed_trunc(c);
2435 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2436 
2437 		/* Save number of lines the linebuffer leads before the scanout */
2438 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2439 	}
2440 
2441 	/* select wm A */
2442 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2443 	tmp = arb_control3;
2444 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2445 	tmp |= LATENCY_WATERMARK_MASK(1);
2446 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2447 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2448 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2449 		LATENCY_HIGH_WATERMARK(line_time)));
2450 	/* select wm B */
2451 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2452 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2453 	tmp |= LATENCY_WATERMARK_MASK(2);
2454 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2455 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2456 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2457 		LATENCY_HIGH_WATERMARK(line_time)));
2458 	/* restore original selection */
2459 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2460 
2461 	/* write the priority marks */
2462 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2463 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2464 
2465 	/* save values for DPM */
2466 	radeon_crtc->line_time = line_time;
2467 	radeon_crtc->wm_high = latency_watermark_a;
2468 	radeon_crtc->wm_low = latency_watermark_b;
2469 }
2470 
dce6_bandwidth_update(struct radeon_device * rdev)2471 void dce6_bandwidth_update(struct radeon_device *rdev)
2472 {
2473 	struct drm_display_mode *mode0 = NULL;
2474 	struct drm_display_mode *mode1 = NULL;
2475 	u32 num_heads = 0, lb_size;
2476 	int i;
2477 
2478 	if (!rdev->mode_info.mode_config_initialized)
2479 		return;
2480 
2481 	radeon_update_display_priority(rdev);
2482 
2483 	for (i = 0; i < rdev->num_crtc; i++) {
2484 		if (rdev->mode_info.crtcs[i]->base.enabled)
2485 			num_heads++;
2486 	}
2487 	for (i = 0; i < rdev->num_crtc; i += 2) {
2488 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2489 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2490 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2491 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2492 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2493 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2494 	}
2495 }
2496 
2497 /*
2498  * Core functions
2499  */
si_tiling_mode_table_init(struct radeon_device * rdev)2500 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2501 {
2502 	u32 *tile = rdev->config.si.tile_mode_array;
2503 	const u32 num_tile_mode_states =
2504 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2505 	u32 reg_offset, split_equal_to_row_size;
2506 
2507 	switch (rdev->config.si.mem_row_size_in_kb) {
2508 	case 1:
2509 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2510 		break;
2511 	case 2:
2512 	default:
2513 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2514 		break;
2515 	case 4:
2516 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2517 		break;
2518 	}
2519 
2520 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2521 		tile[reg_offset] = 0;
2522 
2523 	switch(rdev->family) {
2524 	case CHIP_TAHITI:
2525 	case CHIP_PITCAIRN:
2526 		/* non-AA compressed depth or any compressed stencil */
2527 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2528 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2529 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2530 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2531 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2532 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2534 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2535 		/* 2xAA/4xAA compressed depth only */
2536 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2539 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2540 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2541 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2543 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2544 		/* 8xAA compressed depth only */
2545 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2547 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2548 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2549 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2550 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2552 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2553 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2554 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2555 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2556 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2557 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2558 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2559 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2561 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2562 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2563 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2565 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2566 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2567 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2568 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2570 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2571 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2572 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2574 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2575 			   TILE_SPLIT(split_equal_to_row_size) |
2576 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2577 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2579 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2580 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2581 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2583 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2584 			   TILE_SPLIT(split_equal_to_row_size) |
2585 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2586 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2588 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2589 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2590 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2592 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2593 			   TILE_SPLIT(split_equal_to_row_size) |
2594 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2595 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2597 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2598 		/* 1D and 1D Array Surfaces */
2599 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2600 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2601 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2602 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2603 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2604 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2606 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2607 		/* Displayable maps. */
2608 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2609 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2610 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2611 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2612 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2613 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2615 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2616 		/* Display 8bpp. */
2617 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2619 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2620 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2621 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2622 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2624 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2625 		/* Display 16bpp. */
2626 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2627 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2628 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2629 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2630 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2631 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2633 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2634 		/* Display 32bpp. */
2635 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2637 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2638 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2639 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2640 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2643 		/* Thin. */
2644 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2645 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2646 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2647 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2648 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2649 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2651 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2652 		/* Thin 8 bpp. */
2653 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2655 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2656 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2657 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2658 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2660 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2661 		/* Thin 16 bpp. */
2662 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2663 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2664 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2665 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2666 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2667 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2669 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2670 		/* Thin 32 bpp. */
2671 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2673 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2674 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2675 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2676 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2679 		/* Thin 64 bpp. */
2680 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2682 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2683 			   TILE_SPLIT(split_equal_to_row_size) |
2684 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2685 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2688 		/* 8 bpp PRT. */
2689 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2691 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2692 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2693 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2694 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2695 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2696 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2697 		/* 16 bpp PRT */
2698 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2700 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2701 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2702 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2703 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2705 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2706 		/* 32 bpp PRT */
2707 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2709 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2710 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2711 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2712 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2714 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2715 		/* 64 bpp PRT */
2716 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2717 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2718 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2719 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2720 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2721 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2723 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2724 		/* 128 bpp PRT */
2725 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2727 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2728 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2729 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2730 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2733 
2734 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2735 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2736 		break;
2737 
2738 	case CHIP_VERDE:
2739 	case CHIP_OLAND:
2740 	case CHIP_HAINAN:
2741 		/* non-AA compressed depth or any compressed stencil */
2742 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2743 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2744 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2745 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2746 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2747 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2749 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2750 		/* 2xAA/4xAA compressed depth only */
2751 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2752 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2753 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2755 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2756 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2758 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2759 		/* 8xAA compressed depth only */
2760 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2764 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2765 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2768 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2769 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2770 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2771 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2773 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2774 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2777 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2778 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2779 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2780 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2781 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2782 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2783 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2785 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2786 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2787 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2789 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2790 			   TILE_SPLIT(split_equal_to_row_size) |
2791 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2792 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2794 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2795 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2796 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799 			   TILE_SPLIT(split_equal_to_row_size) |
2800 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2801 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2803 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2804 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2805 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2806 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2807 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808 			   TILE_SPLIT(split_equal_to_row_size) |
2809 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2810 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2812 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2813 		/* 1D and 1D Array Surfaces */
2814 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2815 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2816 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2818 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2819 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2821 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2822 		/* Displayable maps. */
2823 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2824 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2826 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2827 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2828 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2830 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2831 		/* Display 8bpp. */
2832 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2833 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2835 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2836 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2837 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2839 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2840 		/* Display 16bpp. */
2841 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2843 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2844 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2845 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2846 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2848 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2849 		/* Display 32bpp. */
2850 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2851 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2852 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2853 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2854 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2855 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2857 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2858 		/* Thin. */
2859 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2861 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2862 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2863 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2864 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2866 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2867 		/* Thin 8 bpp. */
2868 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2869 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2870 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2871 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2872 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2873 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2875 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2876 		/* Thin 16 bpp. */
2877 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2879 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2880 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2881 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2882 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2884 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2885 		/* Thin 32 bpp. */
2886 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2887 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2888 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2889 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2890 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2891 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2893 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2894 		/* Thin 64 bpp. */
2895 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2897 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2898 			   TILE_SPLIT(split_equal_to_row_size) |
2899 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2900 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2901 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2902 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2903 		/* 8 bpp PRT. */
2904 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2906 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2907 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2908 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2909 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2910 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2911 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2912 		/* 16 bpp PRT */
2913 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2915 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2916 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2917 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2918 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2920 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2921 		/* 32 bpp PRT */
2922 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2923 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2924 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2925 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2926 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2927 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2928 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2929 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2930 		/* 64 bpp PRT */
2931 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2933 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2934 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2935 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2936 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2938 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2939 		/* 128 bpp PRT */
2940 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2941 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2942 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2943 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2944 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2945 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2946 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2947 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2948 
2949 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2950 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2951 		break;
2952 
2953 	default:
2954 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2955 	}
2956 }
2957 
si_select_se_sh(struct radeon_device * rdev,u32 se_num,u32 sh_num)2958 static void si_select_se_sh(struct radeon_device *rdev,
2959 			    u32 se_num, u32 sh_num)
2960 {
2961 	u32 data = INSTANCE_BROADCAST_WRITES;
2962 
2963 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2964 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2965 	else if (se_num == 0xffffffff)
2966 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2967 	else if (sh_num == 0xffffffff)
2968 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2969 	else
2970 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2971 	WREG32(GRBM_GFX_INDEX, data);
2972 }
2973 
si_create_bitmask(u32 bit_width)2974 static u32 si_create_bitmask(u32 bit_width)
2975 {
2976 	u32 i, mask = 0;
2977 
2978 	for (i = 0; i < bit_width; i++) {
2979 		mask <<= 1;
2980 		mask |= 1;
2981 	}
2982 	return mask;
2983 }
2984 
si_get_cu_enabled(struct radeon_device * rdev,u32 cu_per_sh)2985 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2986 {
2987 	u32 data, mask;
2988 
2989 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2990 	if (data & 1)
2991 		data &= INACTIVE_CUS_MASK;
2992 	else
2993 		data = 0;
2994 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2995 
2996 	data >>= INACTIVE_CUS_SHIFT;
2997 
2998 	mask = si_create_bitmask(cu_per_sh);
2999 
3000 	return ~data & mask;
3001 }
3002 
si_setup_spi(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 cu_per_sh)3003 static void si_setup_spi(struct radeon_device *rdev,
3004 			 u32 se_num, u32 sh_per_se,
3005 			 u32 cu_per_sh)
3006 {
3007 	int i, j, k;
3008 	u32 data, mask, active_cu;
3009 
3010 	for (i = 0; i < se_num; i++) {
3011 		for (j = 0; j < sh_per_se; j++) {
3012 			si_select_se_sh(rdev, i, j);
3013 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3014 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3015 
3016 			mask = 1;
3017 			for (k = 0; k < 16; k++) {
3018 				mask <<= k;
3019 				if (active_cu & mask) {
3020 					data &= ~mask;
3021 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3022 					break;
3023 				}
3024 			}
3025 		}
3026 	}
3027 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3028 }
3029 
si_get_rb_disabled(struct radeon_device * rdev,u32 max_rb_num_per_se,u32 sh_per_se)3030 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3031 			      u32 max_rb_num_per_se,
3032 			      u32 sh_per_se)
3033 {
3034 	u32 data, mask;
3035 
3036 	data = RREG32(CC_RB_BACKEND_DISABLE);
3037 	if (data & 1)
3038 		data &= BACKEND_DISABLE_MASK;
3039 	else
3040 		data = 0;
3041 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3042 
3043 	data >>= BACKEND_DISABLE_SHIFT;
3044 
3045 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3046 
3047 	return data & mask;
3048 }
3049 
si_setup_rb(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 max_rb_num_per_se)3050 static void si_setup_rb(struct radeon_device *rdev,
3051 			u32 se_num, u32 sh_per_se,
3052 			u32 max_rb_num_per_se)
3053 {
3054 	int i, j;
3055 	u32 data, mask;
3056 	u32 disabled_rbs = 0;
3057 	u32 enabled_rbs = 0;
3058 
3059 	for (i = 0; i < se_num; i++) {
3060 		for (j = 0; j < sh_per_se; j++) {
3061 			si_select_se_sh(rdev, i, j);
3062 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3063 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3064 		}
3065 	}
3066 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3067 
3068 	mask = 1;
3069 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3070 		if (!(disabled_rbs & mask))
3071 			enabled_rbs |= mask;
3072 		mask <<= 1;
3073 	}
3074 
3075 	rdev->config.si.backend_enable_mask = enabled_rbs;
3076 
3077 	for (i = 0; i < se_num; i++) {
3078 		si_select_se_sh(rdev, i, 0xffffffff);
3079 		data = 0;
3080 		for (j = 0; j < sh_per_se; j++) {
3081 			switch (enabled_rbs & 3) {
3082 			case 1:
3083 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3084 				break;
3085 			case 2:
3086 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3087 				break;
3088 			case 3:
3089 			default:
3090 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3091 				break;
3092 			}
3093 			enabled_rbs >>= 2;
3094 		}
3095 		WREG32(PA_SC_RASTER_CONFIG, data);
3096 	}
3097 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3098 }
3099 
si_gpu_init(struct radeon_device * rdev)3100 static void si_gpu_init(struct radeon_device *rdev)
3101 {
3102 	u32 gb_addr_config = 0;
3103 	u32 mc_shared_chmap __unused, mc_arb_ramcfg;
3104 	u32 sx_debug_1;
3105 	u32 hdp_host_path_cntl;
3106 	u32 tmp;
3107 	int i, j;
3108 
3109 	switch (rdev->family) {
3110 	case CHIP_TAHITI:
3111 		rdev->config.si.max_shader_engines = 2;
3112 		rdev->config.si.max_tile_pipes = 12;
3113 		rdev->config.si.max_cu_per_sh = 8;
3114 		rdev->config.si.max_sh_per_se = 2;
3115 		rdev->config.si.max_backends_per_se = 4;
3116 		rdev->config.si.max_texture_channel_caches = 12;
3117 		rdev->config.si.max_gprs = 256;
3118 		rdev->config.si.max_gs_threads = 32;
3119 		rdev->config.si.max_hw_contexts = 8;
3120 
3121 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3122 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3123 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3124 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3125 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3126 		break;
3127 	case CHIP_PITCAIRN:
3128 		rdev->config.si.max_shader_engines = 2;
3129 		rdev->config.si.max_tile_pipes = 8;
3130 		rdev->config.si.max_cu_per_sh = 5;
3131 		rdev->config.si.max_sh_per_se = 2;
3132 		rdev->config.si.max_backends_per_se = 4;
3133 		rdev->config.si.max_texture_channel_caches = 8;
3134 		rdev->config.si.max_gprs = 256;
3135 		rdev->config.si.max_gs_threads = 32;
3136 		rdev->config.si.max_hw_contexts = 8;
3137 
3138 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3139 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3140 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3141 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3142 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3143 		break;
3144 	case CHIP_VERDE:
3145 	default:
3146 		rdev->config.si.max_shader_engines = 1;
3147 		rdev->config.si.max_tile_pipes = 4;
3148 		rdev->config.si.max_cu_per_sh = 5;
3149 		rdev->config.si.max_sh_per_se = 2;
3150 		rdev->config.si.max_backends_per_se = 4;
3151 		rdev->config.si.max_texture_channel_caches = 4;
3152 		rdev->config.si.max_gprs = 256;
3153 		rdev->config.si.max_gs_threads = 32;
3154 		rdev->config.si.max_hw_contexts = 8;
3155 
3156 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3157 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3158 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3159 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3160 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3161 		break;
3162 	case CHIP_OLAND:
3163 		rdev->config.si.max_shader_engines = 1;
3164 		rdev->config.si.max_tile_pipes = 4;
3165 		rdev->config.si.max_cu_per_sh = 6;
3166 		rdev->config.si.max_sh_per_se = 1;
3167 		rdev->config.si.max_backends_per_se = 2;
3168 		rdev->config.si.max_texture_channel_caches = 4;
3169 		rdev->config.si.max_gprs = 256;
3170 		rdev->config.si.max_gs_threads = 16;
3171 		rdev->config.si.max_hw_contexts = 8;
3172 
3173 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3174 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3175 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3176 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3177 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3178 		break;
3179 	case CHIP_HAINAN:
3180 		rdev->config.si.max_shader_engines = 1;
3181 		rdev->config.si.max_tile_pipes = 4;
3182 		rdev->config.si.max_cu_per_sh = 5;
3183 		rdev->config.si.max_sh_per_se = 1;
3184 		rdev->config.si.max_backends_per_se = 1;
3185 		rdev->config.si.max_texture_channel_caches = 2;
3186 		rdev->config.si.max_gprs = 256;
3187 		rdev->config.si.max_gs_threads = 16;
3188 		rdev->config.si.max_hw_contexts = 8;
3189 
3190 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3191 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3192 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3193 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3194 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3195 		break;
3196 	}
3197 
3198 	/* Initialize HDP */
3199 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3200 		WREG32((0x2c14 + j), 0x00000000);
3201 		WREG32((0x2c18 + j), 0x00000000);
3202 		WREG32((0x2c1c + j), 0x00000000);
3203 		WREG32((0x2c20 + j), 0x00000000);
3204 		WREG32((0x2c24 + j), 0x00000000);
3205 	}
3206 
3207 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3208 	WREG32(SRBM_INT_CNTL, 1);
3209 	WREG32(SRBM_INT_ACK, 1);
3210 
3211 	evergreen_fix_pci_max_read_req_size(rdev);
3212 
3213 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3214 
3215 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3216 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3217 
3218 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3219 	rdev->config.si.mem_max_burst_length_bytes = 256;
3220 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3221 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3222 	if (rdev->config.si.mem_row_size_in_kb > 4)
3223 		rdev->config.si.mem_row_size_in_kb = 4;
3224 	/* XXX use MC settings? */
3225 	rdev->config.si.shader_engine_tile_size = 32;
3226 	rdev->config.si.num_gpus = 1;
3227 	rdev->config.si.multi_gpu_tile_size = 64;
3228 
3229 	/* fix up row size */
3230 	gb_addr_config &= ~ROW_SIZE_MASK;
3231 	switch (rdev->config.si.mem_row_size_in_kb) {
3232 	case 1:
3233 	default:
3234 		gb_addr_config |= ROW_SIZE(0);
3235 		break;
3236 	case 2:
3237 		gb_addr_config |= ROW_SIZE(1);
3238 		break;
3239 	case 4:
3240 		gb_addr_config |= ROW_SIZE(2);
3241 		break;
3242 	}
3243 
3244 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3245 	 * not have bank info, so create a custom tiling dword.
3246 	 * bits 3:0   num_pipes
3247 	 * bits 7:4   num_banks
3248 	 * bits 11:8  group_size
3249 	 * bits 15:12 row_size
3250 	 */
3251 	rdev->config.si.tile_config = 0;
3252 	switch (rdev->config.si.num_tile_pipes) {
3253 	case 1:
3254 		rdev->config.si.tile_config |= (0 << 0);
3255 		break;
3256 	case 2:
3257 		rdev->config.si.tile_config |= (1 << 0);
3258 		break;
3259 	case 4:
3260 		rdev->config.si.tile_config |= (2 << 0);
3261 		break;
3262 	case 8:
3263 	default:
3264 		/* XXX what about 12? */
3265 		rdev->config.si.tile_config |= (3 << 0);
3266 		break;
3267 	}
3268 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3269 	case 0: /* four banks */
3270 		rdev->config.si.tile_config |= 0 << 4;
3271 		break;
3272 	case 1: /* eight banks */
3273 		rdev->config.si.tile_config |= 1 << 4;
3274 		break;
3275 	case 2: /* sixteen banks */
3276 	default:
3277 		rdev->config.si.tile_config |= 2 << 4;
3278 		break;
3279 	}
3280 	rdev->config.si.tile_config |=
3281 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3282 	rdev->config.si.tile_config |=
3283 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3284 
3285 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3286 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3287 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3288 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3289 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3290 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3291 	if (rdev->has_uvd) {
3292 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3293 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3294 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3295 	}
3296 
3297 	si_tiling_mode_table_init(rdev);
3298 
3299 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3300 		    rdev->config.si.max_sh_per_se,
3301 		    rdev->config.si.max_backends_per_se);
3302 
3303 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3304 		     rdev->config.si.max_sh_per_se,
3305 		     rdev->config.si.max_cu_per_sh);
3306 
3307 	rdev->config.si.active_cus = 0;
3308 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3309 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3310 			rdev->config.si.active_cus +=
3311 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3312 		}
3313 	}
3314 
3315 	/* set HW defaults for 3D engine */
3316 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3317 				     ROQ_IB2_START(0x2b)));
3318 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3319 
3320 	sx_debug_1 = RREG32(SX_DEBUG_1);
3321 	WREG32(SX_DEBUG_1, sx_debug_1);
3322 
3323 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3324 
3325 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3326 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3327 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3328 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3329 
3330 	WREG32(VGT_NUM_INSTANCES, 1);
3331 
3332 	WREG32(CP_PERFMON_CNTL, 0);
3333 
3334 	WREG32(SQ_CONFIG, 0);
3335 
3336 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3337 					  FORCE_EOV_MAX_REZ_CNT(255)));
3338 
3339 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3340 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3341 
3342 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3343 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3344 
3345 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3346 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3347 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3348 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3349 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3350 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3351 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3352 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3353 
3354 	tmp = RREG32(HDP_MISC_CNTL);
3355 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3356 	WREG32(HDP_MISC_CNTL, tmp);
3357 
3358 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3359 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3360 
3361 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3362 
3363 	udelay(50);
3364 }
3365 
3366 /*
3367  * GPU scratch registers helpers function.
3368  */
si_scratch_init(struct radeon_device * rdev)3369 static void si_scratch_init(struct radeon_device *rdev)
3370 {
3371 	int i;
3372 
3373 	rdev->scratch.num_reg = 7;
3374 	rdev->scratch.reg_base = SCRATCH_REG0;
3375 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3376 		rdev->scratch.free[i] = true;
3377 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3378 	}
3379 }
3380 
si_fence_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3381 void si_fence_ring_emit(struct radeon_device *rdev,
3382 			struct radeon_fence *fence)
3383 {
3384 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3385 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3386 
3387 	/* flush read cache over gart */
3388 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3389 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3390 	radeon_ring_write(ring, 0);
3391 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3392 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3393 			  PACKET3_TC_ACTION_ENA |
3394 			  PACKET3_SH_KCACHE_ACTION_ENA |
3395 			  PACKET3_SH_ICACHE_ACTION_ENA);
3396 	radeon_ring_write(ring, 0xFFFFFFFF);
3397 	radeon_ring_write(ring, 0);
3398 	radeon_ring_write(ring, 10); /* poll interval */
3399 	/* EVENT_WRITE_EOP - flush caches, send int */
3400 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3401 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3402 	radeon_ring_write(ring, lower_32_bits(addr));
3403 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3404 	radeon_ring_write(ring, fence->seq);
3405 	radeon_ring_write(ring, 0);
3406 }
3407 
3408 /*
3409  * IB stuff
3410  */
si_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)3411 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3412 {
3413 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3414 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3415 	u32 header;
3416 
3417 	if (ib->is_const_ib) {
3418 		/* set switch buffer packet before const IB */
3419 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3420 		radeon_ring_write(ring, 0);
3421 
3422 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3423 	} else {
3424 		u32 next_rptr;
3425 		if (ring->rptr_save_reg) {
3426 			next_rptr = ring->wptr + 3 + 4 + 8;
3427 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3428 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3429 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3430 			radeon_ring_write(ring, next_rptr);
3431 		} else if (rdev->wb.enabled) {
3432 			next_rptr = ring->wptr + 5 + 4 + 8;
3433 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3434 			radeon_ring_write(ring, (1 << 8));
3435 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3436 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3437 			radeon_ring_write(ring, next_rptr);
3438 		}
3439 
3440 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3441 	}
3442 
3443 	radeon_ring_write(ring, header);
3444 	radeon_ring_write(ring,
3445 #ifdef __BIG_ENDIAN
3446 			  (2 << 0) |
3447 #endif
3448 			  (ib->gpu_addr & 0xFFFFFFFC));
3449 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3450 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3451 
3452 	if (!ib->is_const_ib) {
3453 		/* flush read cache over gart for this vmid */
3454 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3455 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3456 		radeon_ring_write(ring, vm_id);
3457 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3458 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3459 				  PACKET3_TC_ACTION_ENA |
3460 				  PACKET3_SH_KCACHE_ACTION_ENA |
3461 				  PACKET3_SH_ICACHE_ACTION_ENA);
3462 		radeon_ring_write(ring, 0xFFFFFFFF);
3463 		radeon_ring_write(ring, 0);
3464 		radeon_ring_write(ring, 10); /* poll interval */
3465 	}
3466 }
3467 
3468 /*
3469  * CP.
3470  */
si_cp_enable(struct radeon_device * rdev,bool enable)3471 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3472 {
3473 	if (enable)
3474 		WREG32(CP_ME_CNTL, 0);
3475 	else {
3476 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3477 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3478 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3479 		WREG32(SCRATCH_UMSK, 0);
3480 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3481 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3482 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3483 	}
3484 	udelay(50);
3485 }
3486 
si_cp_load_microcode(struct radeon_device * rdev)3487 static int si_cp_load_microcode(struct radeon_device *rdev)
3488 {
3489 	int i;
3490 
3491 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3492 		return -EINVAL;
3493 
3494 	si_cp_enable(rdev, false);
3495 
3496 	if (rdev->new_fw) {
3497 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3498 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3499 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3500 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3501 		const struct gfx_firmware_header_v1_0 *me_hdr =
3502 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3503 		const __le32 *fw_data;
3504 		u32 fw_size;
3505 
3506 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3507 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3508 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3509 
3510 		/* PFP */
3511 		fw_data = (const __le32 *)
3512 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3513 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3514 		WREG32(CP_PFP_UCODE_ADDR, 0);
3515 		for (i = 0; i < fw_size; i++)
3516 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3517 		WREG32(CP_PFP_UCODE_ADDR, 0);
3518 
3519 		/* CE */
3520 		fw_data = (const __le32 *)
3521 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3522 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3523 		WREG32(CP_CE_UCODE_ADDR, 0);
3524 		for (i = 0; i < fw_size; i++)
3525 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3526 		WREG32(CP_CE_UCODE_ADDR, 0);
3527 
3528 		/* ME */
3529 		fw_data = (const __be32 *)
3530 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3531 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3532 		WREG32(CP_ME_RAM_WADDR, 0);
3533 		for (i = 0; i < fw_size; i++)
3534 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3535 		WREG32(CP_ME_RAM_WADDR, 0);
3536 	} else {
3537 		const __be32 *fw_data;
3538 
3539 		/* PFP */
3540 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3541 		WREG32(CP_PFP_UCODE_ADDR, 0);
3542 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3543 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3544 		WREG32(CP_PFP_UCODE_ADDR, 0);
3545 
3546 		/* CE */
3547 		fw_data = (const __be32 *)rdev->ce_fw->data;
3548 		WREG32(CP_CE_UCODE_ADDR, 0);
3549 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3550 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3551 		WREG32(CP_CE_UCODE_ADDR, 0);
3552 
3553 		/* ME */
3554 		fw_data = (const __be32 *)rdev->me_fw->data;
3555 		WREG32(CP_ME_RAM_WADDR, 0);
3556 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3557 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3558 		WREG32(CP_ME_RAM_WADDR, 0);
3559 	}
3560 
3561 	WREG32(CP_PFP_UCODE_ADDR, 0);
3562 	WREG32(CP_CE_UCODE_ADDR, 0);
3563 	WREG32(CP_ME_RAM_WADDR, 0);
3564 	WREG32(CP_ME_RAM_RADDR, 0);
3565 	return 0;
3566 }
3567 
si_cp_start(struct radeon_device * rdev)3568 static int si_cp_start(struct radeon_device *rdev)
3569 {
3570 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3571 	int r, i;
3572 
3573 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3574 	if (r) {
3575 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3576 		return r;
3577 	}
3578 	/* init the CP */
3579 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3580 	radeon_ring_write(ring, 0x1);
3581 	radeon_ring_write(ring, 0x0);
3582 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3583 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3584 	radeon_ring_write(ring, 0);
3585 	radeon_ring_write(ring, 0);
3586 
3587 	/* init the CE partitions */
3588 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3589 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3590 	radeon_ring_write(ring, 0xc000);
3591 	radeon_ring_write(ring, 0xe000);
3592 	radeon_ring_unlock_commit(rdev, ring, false);
3593 
3594 	si_cp_enable(rdev, true);
3595 
3596 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3597 	if (r) {
3598 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3599 		return r;
3600 	}
3601 
3602 	/* setup clear context state */
3603 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3604 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3605 
3606 	for (i = 0; i < si_default_size; i++)
3607 		radeon_ring_write(ring, si_default_state[i]);
3608 
3609 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3610 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3611 
3612 	/* set clear context state */
3613 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3614 	radeon_ring_write(ring, 0);
3615 
3616 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3617 	radeon_ring_write(ring, 0x00000316);
3618 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3619 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3620 
3621 	radeon_ring_unlock_commit(rdev, ring, false);
3622 
3623 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3624 		ring = &rdev->ring[i];
3625 		r = radeon_ring_lock(rdev, ring, 2);
3626 
3627 		/* clear the compute context state */
3628 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3629 		radeon_ring_write(ring, 0);
3630 
3631 		radeon_ring_unlock_commit(rdev, ring, false);
3632 	}
3633 
3634 	return 0;
3635 }
3636 
si_cp_fini(struct radeon_device * rdev)3637 static void si_cp_fini(struct radeon_device *rdev)
3638 {
3639 	struct radeon_ring *ring;
3640 	si_cp_enable(rdev, false);
3641 
3642 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3643 	radeon_ring_fini(rdev, ring);
3644 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3645 
3646 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3647 	radeon_ring_fini(rdev, ring);
3648 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3649 
3650 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3651 	radeon_ring_fini(rdev, ring);
3652 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3653 }
3654 
si_cp_resume(struct radeon_device * rdev)3655 static int si_cp_resume(struct radeon_device *rdev)
3656 {
3657 	struct radeon_ring *ring;
3658 	u32 tmp;
3659 	u32 rb_bufsz;
3660 	int r;
3661 
3662 	si_enable_gui_idle_interrupt(rdev, false);
3663 
3664 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3665 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3666 
3667 	/* Set the write pointer delay */
3668 	WREG32(CP_RB_WPTR_DELAY, 0);
3669 
3670 	WREG32(CP_DEBUG, 0);
3671 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3672 
3673 	/* ring 0 - compute and gfx */
3674 	/* Set ring buffer size */
3675 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3676 	rb_bufsz = order_base_2(ring->ring_size / 8);
3677 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3678 #ifdef __BIG_ENDIAN
3679 	tmp |= BUF_SWAP_32BIT;
3680 #endif
3681 	WREG32(CP_RB0_CNTL, tmp);
3682 
3683 	/* Initialize the ring buffer's read and write pointers */
3684 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3685 	ring->wptr = 0;
3686 	WREG32(CP_RB0_WPTR, ring->wptr);
3687 
3688 	/* set the wb address whether it's enabled or not */
3689 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3690 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3691 
3692 	if (rdev->wb.enabled)
3693 		WREG32(SCRATCH_UMSK, 0xff);
3694 	else {
3695 		tmp |= RB_NO_UPDATE;
3696 		WREG32(SCRATCH_UMSK, 0);
3697 	}
3698 
3699 	mdelay(1);
3700 	WREG32(CP_RB0_CNTL, tmp);
3701 
3702 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3703 
3704 	/* ring1  - compute only */
3705 	/* Set ring buffer size */
3706 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3707 	rb_bufsz = order_base_2(ring->ring_size / 8);
3708 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3709 #ifdef __BIG_ENDIAN
3710 	tmp |= BUF_SWAP_32BIT;
3711 #endif
3712 	WREG32(CP_RB1_CNTL, tmp);
3713 
3714 	/* Initialize the ring buffer's read and write pointers */
3715 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3716 	ring->wptr = 0;
3717 	WREG32(CP_RB1_WPTR, ring->wptr);
3718 
3719 	/* set the wb address whether it's enabled or not */
3720 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3721 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3722 
3723 	mdelay(1);
3724 	WREG32(CP_RB1_CNTL, tmp);
3725 
3726 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3727 
3728 	/* ring2 - compute only */
3729 	/* Set ring buffer size */
3730 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3731 	rb_bufsz = order_base_2(ring->ring_size / 8);
3732 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3733 #ifdef __BIG_ENDIAN
3734 	tmp |= BUF_SWAP_32BIT;
3735 #endif
3736 	WREG32(CP_RB2_CNTL, tmp);
3737 
3738 	/* Initialize the ring buffer's read and write pointers */
3739 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3740 	ring->wptr = 0;
3741 	WREG32(CP_RB2_WPTR, ring->wptr);
3742 
3743 	/* set the wb address whether it's enabled or not */
3744 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3745 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3746 
3747 	mdelay(1);
3748 	WREG32(CP_RB2_CNTL, tmp);
3749 
3750 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3751 
3752 	/* start the rings */
3753 	si_cp_start(rdev);
3754 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3755 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3756 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3757 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3758 	if (r) {
3759 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3760 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3761 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3762 		return r;
3763 	}
3764 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3765 	if (r) {
3766 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3767 	}
3768 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3769 	if (r) {
3770 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3771 	}
3772 
3773 	si_enable_gui_idle_interrupt(rdev, true);
3774 
3775 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3776 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3777 
3778 	return 0;
3779 }
3780 
si_gpu_check_soft_reset(struct radeon_device * rdev)3781 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3782 {
3783 	u32 reset_mask = 0;
3784 	u32 tmp;
3785 
3786 	/* GRBM_STATUS */
3787 	tmp = RREG32(GRBM_STATUS);
3788 	if (tmp & (PA_BUSY | SC_BUSY |
3789 		   BCI_BUSY | SX_BUSY |
3790 		   TA_BUSY | VGT_BUSY |
3791 		   DB_BUSY | CB_BUSY |
3792 		   GDS_BUSY | SPI_BUSY |
3793 		   IA_BUSY | IA_BUSY_NO_DMA))
3794 		reset_mask |= RADEON_RESET_GFX;
3795 
3796 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3797 		   CP_BUSY | CP_COHERENCY_BUSY))
3798 		reset_mask |= RADEON_RESET_CP;
3799 
3800 	if (tmp & GRBM_EE_BUSY)
3801 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3802 
3803 	/* GRBM_STATUS2 */
3804 	tmp = RREG32(GRBM_STATUS2);
3805 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3806 		reset_mask |= RADEON_RESET_RLC;
3807 
3808 	/* DMA_STATUS_REG 0 */
3809 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3810 	if (!(tmp & DMA_IDLE))
3811 		reset_mask |= RADEON_RESET_DMA;
3812 
3813 	/* DMA_STATUS_REG 1 */
3814 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3815 	if (!(tmp & DMA_IDLE))
3816 		reset_mask |= RADEON_RESET_DMA1;
3817 
3818 	/* SRBM_STATUS2 */
3819 	tmp = RREG32(SRBM_STATUS2);
3820 	if (tmp & DMA_BUSY)
3821 		reset_mask |= RADEON_RESET_DMA;
3822 
3823 	if (tmp & DMA1_BUSY)
3824 		reset_mask |= RADEON_RESET_DMA1;
3825 
3826 	/* SRBM_STATUS */
3827 	tmp = RREG32(SRBM_STATUS);
3828 
3829 	if (tmp & IH_BUSY)
3830 		reset_mask |= RADEON_RESET_IH;
3831 
3832 	if (tmp & SEM_BUSY)
3833 		reset_mask |= RADEON_RESET_SEM;
3834 
3835 	if (tmp & GRBM_RQ_PENDING)
3836 		reset_mask |= RADEON_RESET_GRBM;
3837 
3838 	if (tmp & VMC_BUSY)
3839 		reset_mask |= RADEON_RESET_VMC;
3840 
3841 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3842 		   MCC_BUSY | MCD_BUSY))
3843 		reset_mask |= RADEON_RESET_MC;
3844 
3845 	if (evergreen_is_display_hung(rdev))
3846 		reset_mask |= RADEON_RESET_DISPLAY;
3847 
3848 	/* VM_L2_STATUS */
3849 	tmp = RREG32(VM_L2_STATUS);
3850 	if (tmp & L2_BUSY)
3851 		reset_mask |= RADEON_RESET_VMC;
3852 
3853 	/* Skip MC reset as it's mostly likely not hung, just busy */
3854 	if (reset_mask & RADEON_RESET_MC) {
3855 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3856 		reset_mask &= ~RADEON_RESET_MC;
3857 	}
3858 
3859 	return reset_mask;
3860 }
3861 
si_gpu_soft_reset(struct radeon_device * rdev,u32 reset_mask)3862 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3863 {
3864 	struct evergreen_mc_save save;
3865 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3866 	u32 tmp;
3867 
3868 	if (reset_mask == 0)
3869 		return;
3870 
3871 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3872 
3873 	evergreen_print_gpu_status_regs(rdev);
3874 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3875 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3876 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3877 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3878 
3879 	/* disable PG/CG */
3880 	si_fini_pg(rdev);
3881 	si_fini_cg(rdev);
3882 
3883 	/* stop the rlc */
3884 	si_rlc_stop(rdev);
3885 
3886 	/* Disable CP parsing/prefetching */
3887 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3888 
3889 	if (reset_mask & RADEON_RESET_DMA) {
3890 		/* dma0 */
3891 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3892 		tmp &= ~DMA_RB_ENABLE;
3893 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3894 	}
3895 	if (reset_mask & RADEON_RESET_DMA1) {
3896 		/* dma1 */
3897 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3898 		tmp &= ~DMA_RB_ENABLE;
3899 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3900 	}
3901 
3902 	udelay(50);
3903 
3904 	evergreen_mc_stop(rdev, &save);
3905 	if (evergreen_mc_wait_for_idle(rdev)) {
3906 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3907 	}
3908 
3909 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3910 		grbm_soft_reset = SOFT_RESET_CB |
3911 			SOFT_RESET_DB |
3912 			SOFT_RESET_GDS |
3913 			SOFT_RESET_PA |
3914 			SOFT_RESET_SC |
3915 			SOFT_RESET_BCI |
3916 			SOFT_RESET_SPI |
3917 			SOFT_RESET_SX |
3918 			SOFT_RESET_TC |
3919 			SOFT_RESET_TA |
3920 			SOFT_RESET_VGT |
3921 			SOFT_RESET_IA;
3922 	}
3923 
3924 	if (reset_mask & RADEON_RESET_CP) {
3925 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3926 
3927 		srbm_soft_reset |= SOFT_RESET_GRBM;
3928 	}
3929 
3930 	if (reset_mask & RADEON_RESET_DMA)
3931 		srbm_soft_reset |= SOFT_RESET_DMA;
3932 
3933 	if (reset_mask & RADEON_RESET_DMA1)
3934 		srbm_soft_reset |= SOFT_RESET_DMA1;
3935 
3936 	if (reset_mask & RADEON_RESET_DISPLAY)
3937 		srbm_soft_reset |= SOFT_RESET_DC;
3938 
3939 	if (reset_mask & RADEON_RESET_RLC)
3940 		grbm_soft_reset |= SOFT_RESET_RLC;
3941 
3942 	if (reset_mask & RADEON_RESET_SEM)
3943 		srbm_soft_reset |= SOFT_RESET_SEM;
3944 
3945 	if (reset_mask & RADEON_RESET_IH)
3946 		srbm_soft_reset |= SOFT_RESET_IH;
3947 
3948 	if (reset_mask & RADEON_RESET_GRBM)
3949 		srbm_soft_reset |= SOFT_RESET_GRBM;
3950 
3951 	if (reset_mask & RADEON_RESET_VMC)
3952 		srbm_soft_reset |= SOFT_RESET_VMC;
3953 
3954 	if (reset_mask & RADEON_RESET_MC)
3955 		srbm_soft_reset |= SOFT_RESET_MC;
3956 
3957 	if (grbm_soft_reset) {
3958 		tmp = RREG32(GRBM_SOFT_RESET);
3959 		tmp |= grbm_soft_reset;
3960 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3961 		WREG32(GRBM_SOFT_RESET, tmp);
3962 		tmp = RREG32(GRBM_SOFT_RESET);
3963 
3964 		udelay(50);
3965 
3966 		tmp &= ~grbm_soft_reset;
3967 		WREG32(GRBM_SOFT_RESET, tmp);
3968 		tmp = RREG32(GRBM_SOFT_RESET);
3969 	}
3970 
3971 	if (srbm_soft_reset) {
3972 		tmp = RREG32(SRBM_SOFT_RESET);
3973 		tmp |= srbm_soft_reset;
3974 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3975 		WREG32(SRBM_SOFT_RESET, tmp);
3976 		tmp = RREG32(SRBM_SOFT_RESET);
3977 
3978 		udelay(50);
3979 
3980 		tmp &= ~srbm_soft_reset;
3981 		WREG32(SRBM_SOFT_RESET, tmp);
3982 		tmp = RREG32(SRBM_SOFT_RESET);
3983 	}
3984 
3985 	/* Wait a little for things to settle down */
3986 	udelay(50);
3987 
3988 	evergreen_mc_resume(rdev, &save);
3989 	udelay(50);
3990 
3991 	evergreen_print_gpu_status_regs(rdev);
3992 }
3993 
si_set_clk_bypass_mode(struct radeon_device * rdev)3994 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3995 {
3996 	u32 tmp, i;
3997 
3998 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3999 	tmp |= SPLL_BYPASS_EN;
4000 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4001 
4002 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4003 	tmp |= SPLL_CTLREQ_CHG;
4004 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4005 
4006 	for (i = 0; i < rdev->usec_timeout; i++) {
4007 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
4008 			break;
4009 		udelay(1);
4010 	}
4011 
4012 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4013 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4014 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4015 
4016 	tmp = RREG32(MPLL_CNTL_MODE);
4017 	tmp &= ~MPLL_MCLK_SEL;
4018 	WREG32(MPLL_CNTL_MODE, tmp);
4019 }
4020 
si_spll_powerdown(struct radeon_device * rdev)4021 static void si_spll_powerdown(struct radeon_device *rdev)
4022 {
4023 	u32 tmp;
4024 
4025 	tmp = RREG32(SPLL_CNTL_MODE);
4026 	tmp |= SPLL_SW_DIR_CONTROL;
4027 	WREG32(SPLL_CNTL_MODE, tmp);
4028 
4029 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4030 	tmp |= SPLL_RESET;
4031 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4032 
4033 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4034 	tmp |= SPLL_SLEEP;
4035 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4036 
4037 	tmp = RREG32(SPLL_CNTL_MODE);
4038 	tmp &= ~SPLL_SW_DIR_CONTROL;
4039 	WREG32(SPLL_CNTL_MODE, tmp);
4040 }
4041 
si_gpu_pci_config_reset(struct radeon_device * rdev)4042 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4043 {
4044 	struct evergreen_mc_save save;
4045 	u32 tmp, i;
4046 
4047 	dev_info(rdev->dev, "GPU pci config reset\n");
4048 
4049 	/* disable dpm? */
4050 
4051 	/* disable cg/pg */
4052 	si_fini_pg(rdev);
4053 	si_fini_cg(rdev);
4054 
4055 	/* Disable CP parsing/prefetching */
4056 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4057 	/* dma0 */
4058 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4059 	tmp &= ~DMA_RB_ENABLE;
4060 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4061 	/* dma1 */
4062 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4063 	tmp &= ~DMA_RB_ENABLE;
4064 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4065 	/* XXX other engines? */
4066 
4067 	/* halt the rlc, disable cp internal ints */
4068 	si_rlc_stop(rdev);
4069 
4070 	udelay(50);
4071 
4072 	/* disable mem access */
4073 	evergreen_mc_stop(rdev, &save);
4074 	if (evergreen_mc_wait_for_idle(rdev)) {
4075 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4076 	}
4077 
4078 	/* set mclk/sclk to bypass */
4079 	si_set_clk_bypass_mode(rdev);
4080 	/* powerdown spll */
4081 	si_spll_powerdown(rdev);
4082 	/* disable BM */
4083 	pci_clear_master(rdev->pdev);
4084 	/* reset */
4085 	radeon_pci_config_reset(rdev);
4086 	/* wait for asic to come out of reset */
4087 	for (i = 0; i < rdev->usec_timeout; i++) {
4088 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4089 			break;
4090 		udelay(1);
4091 	}
4092 }
4093 
si_asic_reset(struct radeon_device * rdev,bool hard)4094 int si_asic_reset(struct radeon_device *rdev, bool hard)
4095 {
4096 	u32 reset_mask;
4097 
4098 	if (hard) {
4099 		si_gpu_pci_config_reset(rdev);
4100 		return 0;
4101 	}
4102 
4103 	reset_mask = si_gpu_check_soft_reset(rdev);
4104 
4105 	if (reset_mask)
4106 		r600_set_bios_scratch_engine_hung(rdev, true);
4107 
4108 	/* try soft reset */
4109 	si_gpu_soft_reset(rdev, reset_mask);
4110 
4111 	reset_mask = si_gpu_check_soft_reset(rdev);
4112 
4113 	/* try pci config reset */
4114 	if (reset_mask && radeon_hard_reset)
4115 		si_gpu_pci_config_reset(rdev);
4116 
4117 	reset_mask = si_gpu_check_soft_reset(rdev);
4118 
4119 	if (!reset_mask)
4120 		r600_set_bios_scratch_engine_hung(rdev, false);
4121 
4122 	return 0;
4123 }
4124 
4125 /**
4126  * si_gfx_is_lockup - Check if the GFX engine is locked up
4127  *
4128  * @rdev: radeon_device pointer
4129  * @ring: radeon_ring structure holding ring information
4130  *
4131  * Check if the GFX engine is locked up.
4132  * Returns true if the engine appears to be locked up, false if not.
4133  */
si_gfx_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)4134 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4135 {
4136 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4137 
4138 	if (!(reset_mask & (RADEON_RESET_GFX |
4139 			    RADEON_RESET_COMPUTE |
4140 			    RADEON_RESET_CP))) {
4141 		radeon_ring_lockup_update(rdev, ring);
4142 		return false;
4143 	}
4144 	return radeon_ring_test_lockup(rdev, ring);
4145 }
4146 
4147 /* MC */
si_mc_program(struct radeon_device * rdev)4148 static void si_mc_program(struct radeon_device *rdev)
4149 {
4150 	struct evergreen_mc_save save;
4151 	u32 tmp;
4152 	int i, j;
4153 
4154 	/* Initialize HDP */
4155 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4156 		WREG32((0x2c14 + j), 0x00000000);
4157 		WREG32((0x2c18 + j), 0x00000000);
4158 		WREG32((0x2c1c + j), 0x00000000);
4159 		WREG32((0x2c20 + j), 0x00000000);
4160 		WREG32((0x2c24 + j), 0x00000000);
4161 	}
4162 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4163 
4164 	evergreen_mc_stop(rdev, &save);
4165 	if (radeon_mc_wait_for_idle(rdev)) {
4166 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4167 	}
4168 	if (!ASIC_IS_NODCE(rdev))
4169 		/* Lockout access through VGA aperture*/
4170 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4171 	/* Update configuration */
4172 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4173 	       rdev->mc.vram_start >> 12);
4174 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4175 	       rdev->mc.vram_end >> 12);
4176 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4177 	       rdev->vram_scratch.gpu_addr >> 12);
4178 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4179 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4180 	WREG32(MC_VM_FB_LOCATION, tmp);
4181 	/* XXX double check these! */
4182 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4183 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4184 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4185 	WREG32(MC_VM_AGP_BASE, 0);
4186 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4187 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4188 	if (radeon_mc_wait_for_idle(rdev)) {
4189 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4190 	}
4191 	evergreen_mc_resume(rdev, &save);
4192 	if (!ASIC_IS_NODCE(rdev)) {
4193 		/* we need to own VRAM, so turn off the VGA renderer here
4194 		 * to stop it overwriting our objects */
4195 		rv515_vga_render_disable(rdev);
4196 	}
4197 }
4198 
si_vram_gtt_location(struct radeon_device * rdev,struct radeon_mc * mc)4199 void si_vram_gtt_location(struct radeon_device *rdev,
4200 			  struct radeon_mc *mc)
4201 {
4202 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4203 		/* leave room for at least 1024M GTT */
4204 		dev_warn(rdev->dev, "limiting VRAM\n");
4205 		mc->real_vram_size = 0xFFC0000000ULL;
4206 		mc->mc_vram_size = 0xFFC0000000ULL;
4207 	}
4208 	radeon_vram_location(rdev, &rdev->mc, 0);
4209 	rdev->mc.gtt_base_align = 0;
4210 	radeon_gtt_location(rdev, mc);
4211 }
4212 
si_mc_init(struct radeon_device * rdev)4213 static int si_mc_init(struct radeon_device *rdev)
4214 {
4215 	u32 tmp;
4216 	int chansize, numchan;
4217 
4218 	/* Get VRAM informations */
4219 	rdev->mc.vram_is_ddr = true;
4220 	tmp = RREG32(MC_ARB_RAMCFG);
4221 	if (tmp & CHANSIZE_OVERRIDE) {
4222 		chansize = 16;
4223 	} else if (tmp & CHANSIZE_MASK) {
4224 		chansize = 64;
4225 	} else {
4226 		chansize = 32;
4227 	}
4228 	tmp = RREG32(MC_SHARED_CHMAP);
4229 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4230 	case 0:
4231 	default:
4232 		numchan = 1;
4233 		break;
4234 	case 1:
4235 		numchan = 2;
4236 		break;
4237 	case 2:
4238 		numchan = 4;
4239 		break;
4240 	case 3:
4241 		numchan = 8;
4242 		break;
4243 	case 4:
4244 		numchan = 3;
4245 		break;
4246 	case 5:
4247 		numchan = 6;
4248 		break;
4249 	case 6:
4250 		numchan = 10;
4251 		break;
4252 	case 7:
4253 		numchan = 12;
4254 		break;
4255 	case 8:
4256 		numchan = 16;
4257 		break;
4258 	}
4259 	rdev->mc.vram_width = numchan * chansize;
4260 	/* Could aper size report 0 ? */
4261 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4262 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4263 	/* size in MB on si */
4264 	tmp = RREG32(CONFIG_MEMSIZE);
4265 	/* some boards may have garbage in the upper 16 bits */
4266 	if (tmp & 0xffff0000) {
4267 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4268 		if (tmp & 0xffff)
4269 			tmp &= 0xffff;
4270 	}
4271 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4272 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4273 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4274 	si_vram_gtt_location(rdev, &rdev->mc);
4275 	radeon_update_bandwidth_info(rdev);
4276 
4277 	return 0;
4278 }
4279 
4280 /*
4281  * GART
4282  */
si_pcie_gart_tlb_flush(struct radeon_device * rdev)4283 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4284 {
4285 	/* flush hdp cache */
4286 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4287 
4288 	/* bits 0-15 are the VM contexts0-15 */
4289 	WREG32(VM_INVALIDATE_REQUEST, 1);
4290 }
4291 
si_pcie_gart_enable(struct radeon_device * rdev)4292 static int si_pcie_gart_enable(struct radeon_device *rdev)
4293 {
4294 	int r, i;
4295 
4296 	if (rdev->gart.robj == NULL) {
4297 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4298 		return -EINVAL;
4299 	}
4300 	r = radeon_gart_table_vram_pin(rdev);
4301 	if (r)
4302 		return r;
4303 	/* Setup TLB control */
4304 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4305 	       (0xA << 7) |
4306 	       ENABLE_L1_TLB |
4307 	       ENABLE_L1_FRAGMENT_PROCESSING |
4308 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4309 	       ENABLE_ADVANCED_DRIVER_MODEL |
4310 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4311 	/* Setup L2 cache */
4312 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4313 	       ENABLE_L2_FRAGMENT_PROCESSING |
4314 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4315 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4316 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4317 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4318 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4319 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4320 	       BANK_SELECT(4) |
4321 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4322 	/* setup context0 */
4323 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4324 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4325 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4326 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4327 			(u32)(rdev->dummy_page.addr >> 12));
4328 	WREG32(VM_CONTEXT0_CNTL2, 0);
4329 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4330 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4331 
4332 	WREG32(0x15D4, 0);
4333 	WREG32(0x15D8, 0);
4334 	WREG32(0x15DC, 0);
4335 
4336 	/* empty context1-15 */
4337 	/* set vm size, must be a multiple of 4 */
4338 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4339 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4340 	/* Assign the pt base to something valid for now; the pts used for
4341 	 * the VMs are determined by the application and setup and assigned
4342 	 * on the fly in the vm part of radeon_gart.c
4343 	 */
4344 	for (i = 1; i < 16; i++) {
4345 		if (i < 8)
4346 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4347 			       rdev->vm_manager.saved_table_addr[i]);
4348 		else
4349 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4350 			       rdev->vm_manager.saved_table_addr[i]);
4351 	}
4352 
4353 	/* enable context1-15 */
4354 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4355 	       (u32)(rdev->dummy_page.addr >> 12));
4356 	WREG32(VM_CONTEXT1_CNTL2, 4);
4357 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4358 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4359 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4360 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4361 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4362 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4363 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4364 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4365 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4366 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4367 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4368 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4369 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4370 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4371 
4372 	si_pcie_gart_tlb_flush(rdev);
4373 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4374 		 (unsigned)(rdev->mc.gtt_size >> 20),
4375 		 (unsigned long long)rdev->gart.table_addr);
4376 	rdev->gart.ready = true;
4377 	return 0;
4378 }
4379 
si_pcie_gart_disable(struct radeon_device * rdev)4380 static void si_pcie_gart_disable(struct radeon_device *rdev)
4381 {
4382 	unsigned i;
4383 
4384 	for (i = 1; i < 16; ++i) {
4385 		uint32_t reg;
4386 		if (i < 8)
4387 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4388 		else
4389 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4390 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4391 	}
4392 
4393 	/* Disable all tables */
4394 	WREG32(VM_CONTEXT0_CNTL, 0);
4395 	WREG32(VM_CONTEXT1_CNTL, 0);
4396 	/* Setup TLB control */
4397 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4398 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4399 	/* Setup L2 cache */
4400 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4401 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4402 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4403 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4404 	WREG32(VM_L2_CNTL2, 0);
4405 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4406 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4407 	radeon_gart_table_vram_unpin(rdev);
4408 }
4409 
si_pcie_gart_fini(struct radeon_device * rdev)4410 static void si_pcie_gart_fini(struct radeon_device *rdev)
4411 {
4412 	si_pcie_gart_disable(rdev);
4413 	radeon_gart_table_vram_free(rdev);
4414 	radeon_gart_fini(rdev);
4415 }
4416 
4417 /* vm parser */
si_vm_reg_valid(u32 reg)4418 static bool si_vm_reg_valid(u32 reg)
4419 {
4420 	/* context regs are fine */
4421 	if (reg >= 0x28000)
4422 		return true;
4423 
4424 	/* shader regs are also fine */
4425 	if (reg >= 0xB000 && reg < 0xC000)
4426 		return true;
4427 
4428 	/* check config regs */
4429 	switch (reg) {
4430 	case GRBM_GFX_INDEX:
4431 	case CP_STRMOUT_CNTL:
4432 	case VGT_VTX_VECT_EJECT_REG:
4433 	case VGT_CACHE_INVALIDATION:
4434 	case VGT_ESGS_RING_SIZE:
4435 	case VGT_GSVS_RING_SIZE:
4436 	case VGT_GS_VERTEX_REUSE:
4437 	case VGT_PRIMITIVE_TYPE:
4438 	case VGT_INDEX_TYPE:
4439 	case VGT_NUM_INDICES:
4440 	case VGT_NUM_INSTANCES:
4441 	case VGT_TF_RING_SIZE:
4442 	case VGT_HS_OFFCHIP_PARAM:
4443 	case VGT_TF_MEMORY_BASE:
4444 	case PA_CL_ENHANCE:
4445 	case PA_SU_LINE_STIPPLE_VALUE:
4446 	case PA_SC_LINE_STIPPLE_STATE:
4447 	case PA_SC_ENHANCE:
4448 	case SQC_CACHES:
4449 	case SPI_STATIC_THREAD_MGMT_1:
4450 	case SPI_STATIC_THREAD_MGMT_2:
4451 	case SPI_STATIC_THREAD_MGMT_3:
4452 	case SPI_PS_MAX_WAVE_ID:
4453 	case SPI_CONFIG_CNTL:
4454 	case SPI_CONFIG_CNTL_1:
4455 	case TA_CNTL_AUX:
4456 	case TA_CS_BC_BASE_ADDR:
4457 		return true;
4458 	default:
4459 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4460 		return false;
4461 	}
4462 }
4463 
si_vm_packet3_ce_check(struct radeon_device * rdev,u32 * ib,struct radeon_cs_packet * pkt)4464 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4465 				  u32 *ib, struct radeon_cs_packet *pkt)
4466 {
4467 	switch (pkt->opcode) {
4468 	case PACKET3_NOP:
4469 	case PACKET3_SET_BASE:
4470 	case PACKET3_SET_CE_DE_COUNTERS:
4471 	case PACKET3_LOAD_CONST_RAM:
4472 	case PACKET3_WRITE_CONST_RAM:
4473 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4474 	case PACKET3_DUMP_CONST_RAM:
4475 	case PACKET3_INCREMENT_CE_COUNTER:
4476 	case PACKET3_WAIT_ON_DE_COUNTER:
4477 	case PACKET3_CE_WRITE:
4478 		break;
4479 	default:
4480 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4481 		return -EINVAL;
4482 	}
4483 	return 0;
4484 }
4485 
si_vm_packet3_cp_dma_check(u32 * ib,u32 idx)4486 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4487 {
4488 	u32 start_reg, reg, i;
4489 	u32 command = ib[idx + 4];
4490 	u32 info = ib[idx + 1];
4491 	u32 idx_value = ib[idx];
4492 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4493 		/* src address space is register */
4494 		if (((info & 0x60000000) >> 29) == 0) {
4495 			start_reg = idx_value << 2;
4496 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4497 				reg = start_reg;
4498 				if (!si_vm_reg_valid(reg)) {
4499 					DRM_ERROR("CP DMA Bad SRC register\n");
4500 					return -EINVAL;
4501 				}
4502 			} else {
4503 				for (i = 0; i < (command & 0x1fffff); i++) {
4504 					reg = start_reg + (4 * i);
4505 					if (!si_vm_reg_valid(reg)) {
4506 						DRM_ERROR("CP DMA Bad SRC register\n");
4507 						return -EINVAL;
4508 					}
4509 				}
4510 			}
4511 		}
4512 	}
4513 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4514 		/* dst address space is register */
4515 		if (((info & 0x00300000) >> 20) == 0) {
4516 			start_reg = ib[idx + 2];
4517 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4518 				reg = start_reg;
4519 				if (!si_vm_reg_valid(reg)) {
4520 					DRM_ERROR("CP DMA Bad DST register\n");
4521 					return -EINVAL;
4522 				}
4523 			} else {
4524 				for (i = 0; i < (command & 0x1fffff); i++) {
4525 					reg = start_reg + (4 * i);
4526 				if (!si_vm_reg_valid(reg)) {
4527 						DRM_ERROR("CP DMA Bad DST register\n");
4528 						return -EINVAL;
4529 					}
4530 				}
4531 			}
4532 		}
4533 	}
4534 	return 0;
4535 }
4536 
si_vm_packet3_gfx_check(struct radeon_device * rdev,u32 * ib,struct radeon_cs_packet * pkt)4537 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4538 				   u32 *ib, struct radeon_cs_packet *pkt)
4539 {
4540 	int r;
4541 	u32 idx = pkt->idx + 1;
4542 	u32 idx_value = ib[idx];
4543 	u32 start_reg, end_reg, reg, i;
4544 
4545 	switch (pkt->opcode) {
4546 	case PACKET3_NOP:
4547 	case PACKET3_SET_BASE:
4548 	case PACKET3_CLEAR_STATE:
4549 	case PACKET3_INDEX_BUFFER_SIZE:
4550 	case PACKET3_DISPATCH_DIRECT:
4551 	case PACKET3_DISPATCH_INDIRECT:
4552 	case PACKET3_ALLOC_GDS:
4553 	case PACKET3_WRITE_GDS_RAM:
4554 	case PACKET3_ATOMIC_GDS:
4555 	case PACKET3_ATOMIC:
4556 	case PACKET3_OCCLUSION_QUERY:
4557 	case PACKET3_SET_PREDICATION:
4558 	case PACKET3_COND_EXEC:
4559 	case PACKET3_PRED_EXEC:
4560 	case PACKET3_DRAW_INDIRECT:
4561 	case PACKET3_DRAW_INDEX_INDIRECT:
4562 	case PACKET3_INDEX_BASE:
4563 	case PACKET3_DRAW_INDEX_2:
4564 	case PACKET3_CONTEXT_CONTROL:
4565 	case PACKET3_INDEX_TYPE:
4566 	case PACKET3_DRAW_INDIRECT_MULTI:
4567 	case PACKET3_DRAW_INDEX_AUTO:
4568 	case PACKET3_DRAW_INDEX_IMMD:
4569 	case PACKET3_NUM_INSTANCES:
4570 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4571 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4572 	case PACKET3_DRAW_INDEX_OFFSET_2:
4573 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4574 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4575 	case PACKET3_MPEG_INDEX:
4576 	case PACKET3_WAIT_REG_MEM:
4577 	case PACKET3_MEM_WRITE:
4578 	case PACKET3_PFP_SYNC_ME:
4579 	case PACKET3_SURFACE_SYNC:
4580 	case PACKET3_EVENT_WRITE:
4581 	case PACKET3_EVENT_WRITE_EOP:
4582 	case PACKET3_EVENT_WRITE_EOS:
4583 	case PACKET3_SET_CONTEXT_REG:
4584 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4585 	case PACKET3_SET_SH_REG:
4586 	case PACKET3_SET_SH_REG_OFFSET:
4587 	case PACKET3_INCREMENT_DE_COUNTER:
4588 	case PACKET3_WAIT_ON_CE_COUNTER:
4589 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4590 	case PACKET3_ME_WRITE:
4591 		break;
4592 	case PACKET3_COPY_DATA:
4593 		if ((idx_value & 0xf00) == 0) {
4594 			reg = ib[idx + 3] * 4;
4595 			if (!si_vm_reg_valid(reg))
4596 				return -EINVAL;
4597 		}
4598 		break;
4599 	case PACKET3_WRITE_DATA:
4600 		if ((idx_value & 0xf00) == 0) {
4601 			start_reg = ib[idx + 1] * 4;
4602 			if (idx_value & 0x10000) {
4603 				if (!si_vm_reg_valid(start_reg))
4604 					return -EINVAL;
4605 			} else {
4606 				for (i = 0; i < (pkt->count - 2); i++) {
4607 					reg = start_reg + (4 * i);
4608 					if (!si_vm_reg_valid(reg))
4609 						return -EINVAL;
4610 				}
4611 			}
4612 		}
4613 		break;
4614 	case PACKET3_COND_WRITE:
4615 		if (idx_value & 0x100) {
4616 			reg = ib[idx + 5] * 4;
4617 			if (!si_vm_reg_valid(reg))
4618 				return -EINVAL;
4619 		}
4620 		break;
4621 	case PACKET3_COPY_DW:
4622 		if (idx_value & 0x2) {
4623 			reg = ib[idx + 3] * 4;
4624 			if (!si_vm_reg_valid(reg))
4625 				return -EINVAL;
4626 		}
4627 		break;
4628 	case PACKET3_SET_CONFIG_REG:
4629 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4630 		end_reg = 4 * pkt->count + start_reg - 4;
4631 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4632 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4633 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4634 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4635 			return -EINVAL;
4636 		}
4637 		for (i = 0; i < pkt->count; i++) {
4638 			reg = start_reg + (4 * i);
4639 			if (!si_vm_reg_valid(reg))
4640 				return -EINVAL;
4641 		}
4642 		break;
4643 	case PACKET3_CP_DMA:
4644 		r = si_vm_packet3_cp_dma_check(ib, idx);
4645 		if (r)
4646 			return r;
4647 		break;
4648 	default:
4649 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4650 		return -EINVAL;
4651 	}
4652 	return 0;
4653 }
4654 
si_vm_packet3_compute_check(struct radeon_device * rdev,u32 * ib,struct radeon_cs_packet * pkt)4655 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4656 				       u32 *ib, struct radeon_cs_packet *pkt)
4657 {
4658 	int r;
4659 	u32 idx = pkt->idx + 1;
4660 	u32 idx_value = ib[idx];
4661 	u32 start_reg, reg, i;
4662 
4663 	switch (pkt->opcode) {
4664 	case PACKET3_NOP:
4665 	case PACKET3_SET_BASE:
4666 	case PACKET3_CLEAR_STATE:
4667 	case PACKET3_DISPATCH_DIRECT:
4668 	case PACKET3_DISPATCH_INDIRECT:
4669 	case PACKET3_ALLOC_GDS:
4670 	case PACKET3_WRITE_GDS_RAM:
4671 	case PACKET3_ATOMIC_GDS:
4672 	case PACKET3_ATOMIC:
4673 	case PACKET3_OCCLUSION_QUERY:
4674 	case PACKET3_SET_PREDICATION:
4675 	case PACKET3_COND_EXEC:
4676 	case PACKET3_PRED_EXEC:
4677 	case PACKET3_CONTEXT_CONTROL:
4678 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4679 	case PACKET3_WAIT_REG_MEM:
4680 	case PACKET3_MEM_WRITE:
4681 	case PACKET3_PFP_SYNC_ME:
4682 	case PACKET3_SURFACE_SYNC:
4683 	case PACKET3_EVENT_WRITE:
4684 	case PACKET3_EVENT_WRITE_EOP:
4685 	case PACKET3_EVENT_WRITE_EOS:
4686 	case PACKET3_SET_CONTEXT_REG:
4687 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4688 	case PACKET3_SET_SH_REG:
4689 	case PACKET3_SET_SH_REG_OFFSET:
4690 	case PACKET3_INCREMENT_DE_COUNTER:
4691 	case PACKET3_WAIT_ON_CE_COUNTER:
4692 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4693 	case PACKET3_ME_WRITE:
4694 		break;
4695 	case PACKET3_COPY_DATA:
4696 		if ((idx_value & 0xf00) == 0) {
4697 			reg = ib[idx + 3] * 4;
4698 			if (!si_vm_reg_valid(reg))
4699 				return -EINVAL;
4700 		}
4701 		break;
4702 	case PACKET3_WRITE_DATA:
4703 		if ((idx_value & 0xf00) == 0) {
4704 			start_reg = ib[idx + 1] * 4;
4705 			if (idx_value & 0x10000) {
4706 				if (!si_vm_reg_valid(start_reg))
4707 					return -EINVAL;
4708 			} else {
4709 				for (i = 0; i < (pkt->count - 2); i++) {
4710 					reg = start_reg + (4 * i);
4711 					if (!si_vm_reg_valid(reg))
4712 						return -EINVAL;
4713 				}
4714 			}
4715 		}
4716 		break;
4717 	case PACKET3_COND_WRITE:
4718 		if (idx_value & 0x100) {
4719 			reg = ib[idx + 5] * 4;
4720 			if (!si_vm_reg_valid(reg))
4721 				return -EINVAL;
4722 		}
4723 		break;
4724 	case PACKET3_COPY_DW:
4725 		if (idx_value & 0x2) {
4726 			reg = ib[idx + 3] * 4;
4727 			if (!si_vm_reg_valid(reg))
4728 				return -EINVAL;
4729 		}
4730 		break;
4731 	case PACKET3_CP_DMA:
4732 		r = si_vm_packet3_cp_dma_check(ib, idx);
4733 		if (r)
4734 			return r;
4735 		break;
4736 	default:
4737 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4738 		return -EINVAL;
4739 	}
4740 	return 0;
4741 }
4742 
si_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)4743 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4744 {
4745 	int ret = 0;
4746 	u32 idx = 0, i;
4747 	struct radeon_cs_packet pkt;
4748 
4749 	do {
4750 		pkt.idx = idx;
4751 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4752 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4753 		pkt.one_reg_wr = 0;
4754 		switch (pkt.type) {
4755 		case RADEON_PACKET_TYPE0:
4756 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4757 			ret = -EINVAL;
4758 			break;
4759 		case RADEON_PACKET_TYPE2:
4760 			idx += 1;
4761 			break;
4762 		case RADEON_PACKET_TYPE3:
4763 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4764 			if (ib->is_const_ib)
4765 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4766 			else {
4767 				switch (ib->ring) {
4768 				case RADEON_RING_TYPE_GFX_INDEX:
4769 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4770 					break;
4771 				case CAYMAN_RING_TYPE_CP1_INDEX:
4772 				case CAYMAN_RING_TYPE_CP2_INDEX:
4773 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4774 					break;
4775 				default:
4776 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4777 					ret = -EINVAL;
4778 					break;
4779 				}
4780 			}
4781 			idx += pkt.count + 2;
4782 			break;
4783 		default:
4784 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4785 			ret = -EINVAL;
4786 			break;
4787 		}
4788 		if (ret) {
4789 			for (i = 0; i < ib->length_dw; i++) {
4790 				if (i == idx)
4791 					printk("\t0x%08x <---\n", ib->ptr[i]);
4792 				else
4793 					printk("\t0x%08x\n", ib->ptr[i]);
4794 			}
4795 			break;
4796 		}
4797 	} while (idx < ib->length_dw);
4798 
4799 	return ret;
4800 }
4801 
4802 /*
4803  * vm
4804  */
si_vm_init(struct radeon_device * rdev)4805 int si_vm_init(struct radeon_device *rdev)
4806 {
4807 	/* number of VMs */
4808 	rdev->vm_manager.nvm = 16;
4809 	/* base offset of vram pages */
4810 	rdev->vm_manager.vram_base_offset = 0;
4811 
4812 	return 0;
4813 }
4814 
si_vm_fini(struct radeon_device * rdev)4815 void si_vm_fini(struct radeon_device *rdev)
4816 {
4817 }
4818 
4819 /**
4820  * si_vm_decode_fault - print human readable fault info
4821  *
4822  * @rdev: radeon_device pointer
4823  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4824  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4825  *
4826  * Print human readable fault information (SI).
4827  */
si_vm_decode_fault(struct radeon_device * rdev,u32 status,u32 addr)4828 static void si_vm_decode_fault(struct radeon_device *rdev,
4829 			       u32 status, u32 addr)
4830 {
4831 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4832 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4833 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4834 	const char *block;
4835 
4836 	if (rdev->family == CHIP_TAHITI) {
4837 		switch (mc_id) {
4838 		case 160:
4839 		case 144:
4840 		case 96:
4841 		case 80:
4842 		case 224:
4843 		case 208:
4844 		case 32:
4845 		case 16:
4846 			block = "CB";
4847 			break;
4848 		case 161:
4849 		case 145:
4850 		case 97:
4851 		case 81:
4852 		case 225:
4853 		case 209:
4854 		case 33:
4855 		case 17:
4856 			block = "CB_FMASK";
4857 			break;
4858 		case 162:
4859 		case 146:
4860 		case 98:
4861 		case 82:
4862 		case 226:
4863 		case 210:
4864 		case 34:
4865 		case 18:
4866 			block = "CB_CMASK";
4867 			break;
4868 		case 163:
4869 		case 147:
4870 		case 99:
4871 		case 83:
4872 		case 227:
4873 		case 211:
4874 		case 35:
4875 		case 19:
4876 			block = "CB_IMMED";
4877 			break;
4878 		case 164:
4879 		case 148:
4880 		case 100:
4881 		case 84:
4882 		case 228:
4883 		case 212:
4884 		case 36:
4885 		case 20:
4886 			block = "DB";
4887 			break;
4888 		case 165:
4889 		case 149:
4890 		case 101:
4891 		case 85:
4892 		case 229:
4893 		case 213:
4894 		case 37:
4895 		case 21:
4896 			block = "DB_HTILE";
4897 			break;
4898 		case 167:
4899 		case 151:
4900 		case 103:
4901 		case 87:
4902 		case 231:
4903 		case 215:
4904 		case 39:
4905 		case 23:
4906 			block = "DB_STEN";
4907 			break;
4908 		case 72:
4909 		case 68:
4910 		case 64:
4911 		case 8:
4912 		case 4:
4913 		case 0:
4914 		case 136:
4915 		case 132:
4916 		case 128:
4917 		case 200:
4918 		case 196:
4919 		case 192:
4920 			block = "TC";
4921 			break;
4922 		case 112:
4923 		case 48:
4924 			block = "CP";
4925 			break;
4926 		case 49:
4927 		case 177:
4928 		case 50:
4929 		case 178:
4930 			block = "SH";
4931 			break;
4932 		case 53:
4933 		case 190:
4934 			block = "VGT";
4935 			break;
4936 		case 117:
4937 			block = "IH";
4938 			break;
4939 		case 51:
4940 		case 115:
4941 			block = "RLC";
4942 			break;
4943 		case 119:
4944 		case 183:
4945 			block = "DMA0";
4946 			break;
4947 		case 61:
4948 			block = "DMA1";
4949 			break;
4950 		case 248:
4951 		case 120:
4952 			block = "HDP";
4953 			break;
4954 		default:
4955 			block = "unknown";
4956 			break;
4957 		}
4958 	} else {
4959 		switch (mc_id) {
4960 		case 32:
4961 		case 16:
4962 		case 96:
4963 		case 80:
4964 		case 160:
4965 		case 144:
4966 		case 224:
4967 		case 208:
4968 			block = "CB";
4969 			break;
4970 		case 33:
4971 		case 17:
4972 		case 97:
4973 		case 81:
4974 		case 161:
4975 		case 145:
4976 		case 225:
4977 		case 209:
4978 			block = "CB_FMASK";
4979 			break;
4980 		case 34:
4981 		case 18:
4982 		case 98:
4983 		case 82:
4984 		case 162:
4985 		case 146:
4986 		case 226:
4987 		case 210:
4988 			block = "CB_CMASK";
4989 			break;
4990 		case 35:
4991 		case 19:
4992 		case 99:
4993 		case 83:
4994 		case 163:
4995 		case 147:
4996 		case 227:
4997 		case 211:
4998 			block = "CB_IMMED";
4999 			break;
5000 		case 36:
5001 		case 20:
5002 		case 100:
5003 		case 84:
5004 		case 164:
5005 		case 148:
5006 		case 228:
5007 		case 212:
5008 			block = "DB";
5009 			break;
5010 		case 37:
5011 		case 21:
5012 		case 101:
5013 		case 85:
5014 		case 165:
5015 		case 149:
5016 		case 229:
5017 		case 213:
5018 			block = "DB_HTILE";
5019 			break;
5020 		case 39:
5021 		case 23:
5022 		case 103:
5023 		case 87:
5024 		case 167:
5025 		case 151:
5026 		case 231:
5027 		case 215:
5028 			block = "DB_STEN";
5029 			break;
5030 		case 72:
5031 		case 68:
5032 		case 8:
5033 		case 4:
5034 		case 136:
5035 		case 132:
5036 		case 200:
5037 		case 196:
5038 			block = "TC";
5039 			break;
5040 		case 112:
5041 		case 48:
5042 			block = "CP";
5043 			break;
5044 		case 49:
5045 		case 177:
5046 		case 50:
5047 		case 178:
5048 			block = "SH";
5049 			break;
5050 		case 53:
5051 			block = "VGT";
5052 			break;
5053 		case 117:
5054 			block = "IH";
5055 			break;
5056 		case 51:
5057 		case 115:
5058 			block = "RLC";
5059 			break;
5060 		case 119:
5061 		case 183:
5062 			block = "DMA0";
5063 			break;
5064 		case 61:
5065 			block = "DMA1";
5066 			break;
5067 		case 248:
5068 		case 120:
5069 			block = "HDP";
5070 			break;
5071 		default:
5072 			block = "unknown";
5073 			break;
5074 		}
5075 	}
5076 
5077 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5078 	       protections, vmid, addr,
5079 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5080 	       block, mc_id);
5081 }
5082 
si_vm_flush(struct radeon_device * rdev,struct radeon_ring * ring,unsigned vm_id,uint64_t pd_addr)5083 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5084 		 unsigned vm_id, uint64_t pd_addr)
5085 {
5086 	/* write new base address */
5087 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5088 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5089 				 WRITE_DATA_DST_SEL(0)));
5090 
5091 	if (vm_id < 8) {
5092 		radeon_ring_write(ring,
5093 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5094 	} else {
5095 		radeon_ring_write(ring,
5096 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5097 	}
5098 	radeon_ring_write(ring, 0);
5099 	radeon_ring_write(ring, pd_addr >> 12);
5100 
5101 	/* flush hdp cache */
5102 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5103 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5104 				 WRITE_DATA_DST_SEL(0)));
5105 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5106 	radeon_ring_write(ring, 0);
5107 	radeon_ring_write(ring, 0x1);
5108 
5109 	/* bits 0-15 are the VM contexts0-15 */
5110 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5111 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5112 				 WRITE_DATA_DST_SEL(0)));
5113 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5114 	radeon_ring_write(ring, 0);
5115 	radeon_ring_write(ring, 1 << vm_id);
5116 
5117 	/* wait for the invalidate to complete */
5118 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5119 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5120 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5121 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5122 	radeon_ring_write(ring, 0);
5123 	radeon_ring_write(ring, 0); /* ref */
5124 	radeon_ring_write(ring, 0); /* mask */
5125 	radeon_ring_write(ring, 0x20); /* poll interval */
5126 
5127 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5128 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5129 	radeon_ring_write(ring, 0x0);
5130 }
5131 
5132 /*
5133  *  Power and clock gating
5134  */
si_wait_for_rlc_serdes(struct radeon_device * rdev)5135 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5136 {
5137 	int i;
5138 
5139 	for (i = 0; i < rdev->usec_timeout; i++) {
5140 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5141 			break;
5142 		udelay(1);
5143 	}
5144 
5145 	for (i = 0; i < rdev->usec_timeout; i++) {
5146 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5147 			break;
5148 		udelay(1);
5149 	}
5150 }
5151 
si_enable_gui_idle_interrupt(struct radeon_device * rdev,bool enable)5152 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5153 					 bool enable)
5154 {
5155 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5156 	u32 mask;
5157 	int i;
5158 
5159 	if (enable)
5160 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5161 	else
5162 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5163 	WREG32(CP_INT_CNTL_RING0, tmp);
5164 
5165 	if (!enable) {
5166 		/* read a gfx register */
5167 		tmp = RREG32(DB_DEPTH_INFO);
5168 
5169 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5170 		for (i = 0; i < rdev->usec_timeout; i++) {
5171 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5172 				break;
5173 			udelay(1);
5174 		}
5175 	}
5176 }
5177 
si_set_uvd_dcm(struct radeon_device * rdev,bool sw_mode)5178 static void si_set_uvd_dcm(struct radeon_device *rdev,
5179 			   bool sw_mode)
5180 {
5181 	u32 tmp, tmp2;
5182 
5183 	tmp = RREG32(UVD_CGC_CTRL);
5184 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5185 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5186 
5187 	if (sw_mode) {
5188 		tmp &= ~0x7ffff800;
5189 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5190 	} else {
5191 		tmp |= 0x7ffff800;
5192 		tmp2 = 0;
5193 	}
5194 
5195 	WREG32(UVD_CGC_CTRL, tmp);
5196 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5197 }
5198 
si_init_uvd_internal_cg(struct radeon_device * rdev)5199 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5200 {
5201 	bool hw_mode = true;
5202 
5203 	if (hw_mode) {
5204 		si_set_uvd_dcm(rdev, false);
5205 	} else {
5206 		u32 tmp = RREG32(UVD_CGC_CTRL);
5207 		tmp &= ~DCM;
5208 		WREG32(UVD_CGC_CTRL, tmp);
5209 	}
5210 }
5211 
si_halt_rlc(struct radeon_device * rdev)5212 static u32 si_halt_rlc(struct radeon_device *rdev)
5213 {
5214 	u32 data, orig;
5215 
5216 	orig = data = RREG32(RLC_CNTL);
5217 
5218 	if (data & RLC_ENABLE) {
5219 		data &= ~RLC_ENABLE;
5220 		WREG32(RLC_CNTL, data);
5221 
5222 		si_wait_for_rlc_serdes(rdev);
5223 	}
5224 
5225 	return orig;
5226 }
5227 
si_update_rlc(struct radeon_device * rdev,u32 rlc)5228 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5229 {
5230 	u32 tmp;
5231 
5232 	tmp = RREG32(RLC_CNTL);
5233 	if (tmp != rlc)
5234 		WREG32(RLC_CNTL, rlc);
5235 }
5236 
si_enable_dma_pg(struct radeon_device * rdev,bool enable)5237 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5238 {
5239 	u32 data, orig;
5240 
5241 	orig = data = RREG32(DMA_PG);
5242 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5243 		data |= PG_CNTL_ENABLE;
5244 	else
5245 		data &= ~PG_CNTL_ENABLE;
5246 	if (orig != data)
5247 		WREG32(DMA_PG, data);
5248 }
5249 
si_init_dma_pg(struct radeon_device * rdev)5250 static void si_init_dma_pg(struct radeon_device *rdev)
5251 {
5252 	u32 tmp;
5253 
5254 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5255 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5256 
5257 	for (tmp = 0; tmp < 5; tmp++)
5258 		WREG32(DMA_PGFSM_WRITE, 0);
5259 }
5260 
si_enable_gfx_cgpg(struct radeon_device * rdev,bool enable)5261 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5262 			       bool enable)
5263 {
5264 	u32 tmp;
5265 
5266 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5267 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5268 		WREG32(RLC_TTOP_D, tmp);
5269 
5270 		tmp = RREG32(RLC_PG_CNTL);
5271 		tmp |= GFX_PG_ENABLE;
5272 		WREG32(RLC_PG_CNTL, tmp);
5273 
5274 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5275 		tmp |= AUTO_PG_EN;
5276 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5277 	} else {
5278 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5279 		tmp &= ~AUTO_PG_EN;
5280 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5281 
5282 		tmp = RREG32(DB_RENDER_CONTROL);
5283 	}
5284 }
5285 
si_init_gfx_cgpg(struct radeon_device * rdev)5286 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5287 {
5288 	u32 tmp;
5289 
5290 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5291 
5292 	tmp = RREG32(RLC_PG_CNTL);
5293 	tmp |= GFX_PG_SRC;
5294 	WREG32(RLC_PG_CNTL, tmp);
5295 
5296 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5297 
5298 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5299 
5300 	tmp &= ~GRBM_REG_SGIT_MASK;
5301 	tmp |= GRBM_REG_SGIT(0x700);
5302 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5303 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5304 }
5305 
si_get_cu_active_bitmap(struct radeon_device * rdev,u32 se,u32 sh)5306 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5307 {
5308 	u32 mask = 0, tmp, tmp1;
5309 	int i;
5310 
5311 	si_select_se_sh(rdev, se, sh);
5312 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5313 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5314 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5315 
5316 	tmp &= 0xffff0000;
5317 
5318 	tmp |= tmp1;
5319 	tmp >>= 16;
5320 
5321 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5322 		mask <<= 1;
5323 		mask |= 1;
5324 	}
5325 
5326 	return (~tmp) & mask;
5327 }
5328 
si_init_ao_cu_mask(struct radeon_device * rdev)5329 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5330 {
5331 	u32 i, j, k, active_cu_number = 0;
5332 	u32 mask, counter, cu_bitmap;
5333 	u32 tmp = 0;
5334 
5335 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5336 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5337 			mask = 1;
5338 			cu_bitmap = 0;
5339 			counter  = 0;
5340 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5341 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5342 					if (counter < 2)
5343 						cu_bitmap |= mask;
5344 					counter++;
5345 				}
5346 				mask <<= 1;
5347 			}
5348 
5349 			active_cu_number += counter;
5350 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5351 		}
5352 	}
5353 
5354 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5355 
5356 	tmp = RREG32(RLC_MAX_PG_CU);
5357 	tmp &= ~MAX_PU_CU_MASK;
5358 	tmp |= MAX_PU_CU(active_cu_number);
5359 	WREG32(RLC_MAX_PG_CU, tmp);
5360 }
5361 
si_enable_cgcg(struct radeon_device * rdev,bool enable)5362 static void si_enable_cgcg(struct radeon_device *rdev,
5363 			   bool enable)
5364 {
5365 	u32 data, orig, tmp;
5366 
5367 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5368 
5369 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5370 		si_enable_gui_idle_interrupt(rdev, true);
5371 
5372 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5373 
5374 		tmp = si_halt_rlc(rdev);
5375 
5376 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5377 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5378 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5379 
5380 		si_wait_for_rlc_serdes(rdev);
5381 
5382 		si_update_rlc(rdev, tmp);
5383 
5384 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5385 
5386 		data |= CGCG_EN | CGLS_EN;
5387 	} else {
5388 		si_enable_gui_idle_interrupt(rdev, false);
5389 
5390 		RREG32(CB_CGTT_SCLK_CTRL);
5391 		RREG32(CB_CGTT_SCLK_CTRL);
5392 		RREG32(CB_CGTT_SCLK_CTRL);
5393 		RREG32(CB_CGTT_SCLK_CTRL);
5394 
5395 		data &= ~(CGCG_EN | CGLS_EN);
5396 	}
5397 
5398 	if (orig != data)
5399 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5400 }
5401 
si_enable_mgcg(struct radeon_device * rdev,bool enable)5402 static void si_enable_mgcg(struct radeon_device *rdev,
5403 			   bool enable)
5404 {
5405 	u32 data, orig, tmp = 0;
5406 
5407 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5408 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5409 		data = 0x96940200;
5410 		if (orig != data)
5411 			WREG32(CGTS_SM_CTRL_REG, data);
5412 
5413 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5414 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5415 			data |= CP_MEM_LS_EN;
5416 			if (orig != data)
5417 				WREG32(CP_MEM_SLP_CNTL, data);
5418 		}
5419 
5420 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5421 		data &= 0xffffffc0;
5422 		if (orig != data)
5423 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5424 
5425 		tmp = si_halt_rlc(rdev);
5426 
5427 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5428 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5429 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5430 
5431 		si_update_rlc(rdev, tmp);
5432 	} else {
5433 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5434 		data |= 0x00000003;
5435 		if (orig != data)
5436 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5437 
5438 		data = RREG32(CP_MEM_SLP_CNTL);
5439 		if (data & CP_MEM_LS_EN) {
5440 			data &= ~CP_MEM_LS_EN;
5441 			WREG32(CP_MEM_SLP_CNTL, data);
5442 		}
5443 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5444 		data |= LS_OVERRIDE | OVERRIDE;
5445 		if (orig != data)
5446 			WREG32(CGTS_SM_CTRL_REG, data);
5447 
5448 		tmp = si_halt_rlc(rdev);
5449 
5450 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5451 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5452 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5453 
5454 		si_update_rlc(rdev, tmp);
5455 	}
5456 }
5457 
si_enable_uvd_mgcg(struct radeon_device * rdev,bool enable)5458 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5459 			       bool enable)
5460 {
5461 	u32 orig, data, tmp;
5462 
5463 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5464 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5465 		tmp |= 0x3fff;
5466 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5467 
5468 		orig = data = RREG32(UVD_CGC_CTRL);
5469 		data |= DCM;
5470 		if (orig != data)
5471 			WREG32(UVD_CGC_CTRL, data);
5472 
5473 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5474 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5475 	} else {
5476 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5477 		tmp &= ~0x3fff;
5478 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5479 
5480 		orig = data = RREG32(UVD_CGC_CTRL);
5481 		data &= ~DCM;
5482 		if (orig != data)
5483 			WREG32(UVD_CGC_CTRL, data);
5484 
5485 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5486 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5487 	}
5488 }
5489 
5490 static const u32 mc_cg_registers[] =
5491 {
5492 	MC_HUB_MISC_HUB_CG,
5493 	MC_HUB_MISC_SIP_CG,
5494 	MC_HUB_MISC_VM_CG,
5495 	MC_XPB_CLK_GAT,
5496 	ATC_MISC_CG,
5497 	MC_CITF_MISC_WR_CG,
5498 	MC_CITF_MISC_RD_CG,
5499 	MC_CITF_MISC_VM_CG,
5500 	VM_L2_CG,
5501 };
5502 
si_enable_mc_ls(struct radeon_device * rdev,bool enable)5503 static void si_enable_mc_ls(struct radeon_device *rdev,
5504 			    bool enable)
5505 {
5506 	int i;
5507 	u32 orig, data;
5508 
5509 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5510 		orig = data = RREG32(mc_cg_registers[i]);
5511 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5512 			data |= MC_LS_ENABLE;
5513 		else
5514 			data &= ~MC_LS_ENABLE;
5515 		if (data != orig)
5516 			WREG32(mc_cg_registers[i], data);
5517 	}
5518 }
5519 
si_enable_mc_mgcg(struct radeon_device * rdev,bool enable)5520 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5521 			       bool enable)
5522 {
5523 	int i;
5524 	u32 orig, data;
5525 
5526 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5527 		orig = data = RREG32(mc_cg_registers[i]);
5528 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5529 			data |= MC_CG_ENABLE;
5530 		else
5531 			data &= ~MC_CG_ENABLE;
5532 		if (data != orig)
5533 			WREG32(mc_cg_registers[i], data);
5534 	}
5535 }
5536 
si_enable_dma_mgcg(struct radeon_device * rdev,bool enable)5537 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5538 			       bool enable)
5539 {
5540 	u32 orig, data, offset;
5541 	int i;
5542 
5543 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5544 		for (i = 0; i < 2; i++) {
5545 			if (i == 0)
5546 				offset = DMA0_REGISTER_OFFSET;
5547 			else
5548 				offset = DMA1_REGISTER_OFFSET;
5549 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5550 			data &= ~MEM_POWER_OVERRIDE;
5551 			if (data != orig)
5552 				WREG32(DMA_POWER_CNTL + offset, data);
5553 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5554 		}
5555 	} else {
5556 		for (i = 0; i < 2; i++) {
5557 			if (i == 0)
5558 				offset = DMA0_REGISTER_OFFSET;
5559 			else
5560 				offset = DMA1_REGISTER_OFFSET;
5561 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5562 			data |= MEM_POWER_OVERRIDE;
5563 			if (data != orig)
5564 				WREG32(DMA_POWER_CNTL + offset, data);
5565 
5566 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5567 			data = 0xff000000;
5568 			if (data != orig)
5569 				WREG32(DMA_CLK_CTRL + offset, data);
5570 		}
5571 	}
5572 }
5573 
si_enable_bif_mgls(struct radeon_device * rdev,bool enable)5574 static void si_enable_bif_mgls(struct radeon_device *rdev,
5575 			       bool enable)
5576 {
5577 	u32 orig, data;
5578 
5579 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5580 
5581 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5582 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5583 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5584 	else
5585 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5586 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5587 
5588 	if (orig != data)
5589 		WREG32_PCIE(PCIE_CNTL2, data);
5590 }
5591 
si_enable_hdp_mgcg(struct radeon_device * rdev,bool enable)5592 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5593 			       bool enable)
5594 {
5595 	u32 orig, data;
5596 
5597 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5598 
5599 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5600 		data &= ~CLOCK_GATING_DIS;
5601 	else
5602 		data |= CLOCK_GATING_DIS;
5603 
5604 	if (orig != data)
5605 		WREG32(HDP_HOST_PATH_CNTL, data);
5606 }
5607 
si_enable_hdp_ls(struct radeon_device * rdev,bool enable)5608 static void si_enable_hdp_ls(struct radeon_device *rdev,
5609 			     bool enable)
5610 {
5611 	u32 orig, data;
5612 
5613 	orig = data = RREG32(HDP_MEM_POWER_LS);
5614 
5615 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5616 		data |= HDP_LS_ENABLE;
5617 	else
5618 		data &= ~HDP_LS_ENABLE;
5619 
5620 	if (orig != data)
5621 		WREG32(HDP_MEM_POWER_LS, data);
5622 }
5623 
si_update_cg(struct radeon_device * rdev,u32 block,bool enable)5624 static void si_update_cg(struct radeon_device *rdev,
5625 			 u32 block, bool enable)
5626 {
5627 	if (block & RADEON_CG_BLOCK_GFX) {
5628 		si_enable_gui_idle_interrupt(rdev, false);
5629 		/* order matters! */
5630 		if (enable) {
5631 			si_enable_mgcg(rdev, true);
5632 			si_enable_cgcg(rdev, true);
5633 		} else {
5634 			si_enable_cgcg(rdev, false);
5635 			si_enable_mgcg(rdev, false);
5636 		}
5637 		si_enable_gui_idle_interrupt(rdev, true);
5638 	}
5639 
5640 	if (block & RADEON_CG_BLOCK_MC) {
5641 		si_enable_mc_mgcg(rdev, enable);
5642 		si_enable_mc_ls(rdev, enable);
5643 	}
5644 
5645 	if (block & RADEON_CG_BLOCK_SDMA) {
5646 		si_enable_dma_mgcg(rdev, enable);
5647 	}
5648 
5649 	if (block & RADEON_CG_BLOCK_BIF) {
5650 		si_enable_bif_mgls(rdev, enable);
5651 	}
5652 
5653 	if (block & RADEON_CG_BLOCK_UVD) {
5654 		if (rdev->has_uvd) {
5655 			si_enable_uvd_mgcg(rdev, enable);
5656 		}
5657 	}
5658 
5659 	if (block & RADEON_CG_BLOCK_HDP) {
5660 		si_enable_hdp_mgcg(rdev, enable);
5661 		si_enable_hdp_ls(rdev, enable);
5662 	}
5663 }
5664 
si_init_cg(struct radeon_device * rdev)5665 static void si_init_cg(struct radeon_device *rdev)
5666 {
5667 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5668 			    RADEON_CG_BLOCK_MC |
5669 			    RADEON_CG_BLOCK_SDMA |
5670 			    RADEON_CG_BLOCK_BIF |
5671 			    RADEON_CG_BLOCK_HDP), true);
5672 	if (rdev->has_uvd) {
5673 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5674 		si_init_uvd_internal_cg(rdev);
5675 	}
5676 }
5677 
si_fini_cg(struct radeon_device * rdev)5678 static void si_fini_cg(struct radeon_device *rdev)
5679 {
5680 	if (rdev->has_uvd) {
5681 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5682 	}
5683 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5684 			    RADEON_CG_BLOCK_MC |
5685 			    RADEON_CG_BLOCK_SDMA |
5686 			    RADEON_CG_BLOCK_BIF |
5687 			    RADEON_CG_BLOCK_HDP), false);
5688 }
5689 
si_get_csb_size(struct radeon_device * rdev)5690 u32 si_get_csb_size(struct radeon_device *rdev)
5691 {
5692 	u32 count = 0;
5693 	const struct cs_section_def *sect = NULL;
5694 	const struct cs_extent_def *ext = NULL;
5695 
5696 	if (rdev->rlc.cs_data == NULL)
5697 		return 0;
5698 
5699 	/* begin clear state */
5700 	count += 2;
5701 	/* context control state */
5702 	count += 3;
5703 
5704 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5705 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5706 			if (sect->id == SECT_CONTEXT)
5707 				count += 2 + ext->reg_count;
5708 			else
5709 				return 0;
5710 		}
5711 	}
5712 	/* pa_sc_raster_config */
5713 	count += 3;
5714 	/* end clear state */
5715 	count += 2;
5716 	/* clear state */
5717 	count += 2;
5718 
5719 	return count;
5720 }
5721 
si_get_csb_buffer(struct radeon_device * rdev,volatile u32 * buffer)5722 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5723 {
5724 	u32 count = 0, i;
5725 	const struct cs_section_def *sect = NULL;
5726 	const struct cs_extent_def *ext = NULL;
5727 
5728 	if (rdev->rlc.cs_data == NULL)
5729 		return;
5730 	if (buffer == NULL)
5731 		return;
5732 
5733 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5734 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5735 
5736 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5737 	buffer[count++] = cpu_to_le32(0x80000000);
5738 	buffer[count++] = cpu_to_le32(0x80000000);
5739 
5740 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5741 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5742 			if (sect->id == SECT_CONTEXT) {
5743 				buffer[count++] =
5744 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5745 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5746 				for (i = 0; i < ext->reg_count; i++)
5747 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5748 			} else {
5749 				return;
5750 			}
5751 		}
5752 	}
5753 
5754 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5755 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5756 	switch (rdev->family) {
5757 	case CHIP_TAHITI:
5758 	case CHIP_PITCAIRN:
5759 		buffer[count++] = cpu_to_le32(0x2a00126a);
5760 		break;
5761 	case CHIP_VERDE:
5762 		buffer[count++] = cpu_to_le32(0x0000124a);
5763 		break;
5764 	case CHIP_OLAND:
5765 		buffer[count++] = cpu_to_le32(0x00000082);
5766 		break;
5767 	case CHIP_HAINAN:
5768 		buffer[count++] = cpu_to_le32(0x00000000);
5769 		break;
5770 	default:
5771 		buffer[count++] = cpu_to_le32(0x00000000);
5772 		break;
5773 	}
5774 
5775 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5776 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5777 
5778 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5779 	buffer[count++] = cpu_to_le32(0);
5780 }
5781 
si_init_pg(struct radeon_device * rdev)5782 static void si_init_pg(struct radeon_device *rdev)
5783 {
5784 	if (rdev->pg_flags) {
5785 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5786 			si_init_dma_pg(rdev);
5787 		}
5788 		si_init_ao_cu_mask(rdev);
5789 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5790 			si_init_gfx_cgpg(rdev);
5791 		} else {
5792 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5793 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5794 		}
5795 		si_enable_dma_pg(rdev, true);
5796 		si_enable_gfx_cgpg(rdev, true);
5797 	} else {
5798 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5799 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5800 	}
5801 }
5802 
si_fini_pg(struct radeon_device * rdev)5803 static void si_fini_pg(struct radeon_device *rdev)
5804 {
5805 	if (rdev->pg_flags) {
5806 		si_enable_dma_pg(rdev, false);
5807 		si_enable_gfx_cgpg(rdev, false);
5808 	}
5809 }
5810 
5811 /*
5812  * RLC
5813  */
si_rlc_reset(struct radeon_device * rdev)5814 void si_rlc_reset(struct radeon_device *rdev)
5815 {
5816 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5817 
5818 	tmp |= SOFT_RESET_RLC;
5819 	WREG32(GRBM_SOFT_RESET, tmp);
5820 	udelay(50);
5821 	tmp &= ~SOFT_RESET_RLC;
5822 	WREG32(GRBM_SOFT_RESET, tmp);
5823 	udelay(50);
5824 }
5825 
si_rlc_stop(struct radeon_device * rdev)5826 static void si_rlc_stop(struct radeon_device *rdev)
5827 {
5828 	WREG32(RLC_CNTL, 0);
5829 
5830 	si_enable_gui_idle_interrupt(rdev, false);
5831 
5832 	si_wait_for_rlc_serdes(rdev);
5833 }
5834 
si_rlc_start(struct radeon_device * rdev)5835 static void si_rlc_start(struct radeon_device *rdev)
5836 {
5837 	WREG32(RLC_CNTL, RLC_ENABLE);
5838 
5839 	si_enable_gui_idle_interrupt(rdev, true);
5840 
5841 	udelay(50);
5842 }
5843 
si_lbpw_supported(struct radeon_device * rdev)5844 static bool si_lbpw_supported(struct radeon_device *rdev)
5845 {
5846 	u32 tmp;
5847 
5848 	/* Enable LBPW only for DDR3 */
5849 	tmp = RREG32(MC_SEQ_MISC0);
5850 	if ((tmp & 0xF0000000) == 0xB0000000)
5851 		return true;
5852 	return false;
5853 }
5854 
si_enable_lbpw(struct radeon_device * rdev,bool enable)5855 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5856 {
5857 	u32 tmp;
5858 
5859 	tmp = RREG32(RLC_LB_CNTL);
5860 	if (enable)
5861 		tmp |= LOAD_BALANCE_ENABLE;
5862 	else
5863 		tmp &= ~LOAD_BALANCE_ENABLE;
5864 	WREG32(RLC_LB_CNTL, tmp);
5865 
5866 	if (!enable) {
5867 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5868 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5869 	}
5870 }
5871 
si_rlc_resume(struct radeon_device * rdev)5872 static int si_rlc_resume(struct radeon_device *rdev)
5873 {
5874 	u32 i;
5875 
5876 	if (!rdev->rlc_fw)
5877 		return -EINVAL;
5878 
5879 	si_rlc_stop(rdev);
5880 
5881 	si_rlc_reset(rdev);
5882 
5883 	si_init_pg(rdev);
5884 
5885 	si_init_cg(rdev);
5886 
5887 	WREG32(RLC_RL_BASE, 0);
5888 	WREG32(RLC_RL_SIZE, 0);
5889 	WREG32(RLC_LB_CNTL, 0);
5890 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5891 	WREG32(RLC_LB_CNTR_INIT, 0);
5892 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5893 
5894 	WREG32(RLC_MC_CNTL, 0);
5895 	WREG32(RLC_UCODE_CNTL, 0);
5896 
5897 	if (rdev->new_fw) {
5898 		const struct rlc_firmware_header_v1_0 *hdr =
5899 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5900 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5901 		const __le32 *fw_data = (const __le32 *)
5902 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5903 
5904 		radeon_ucode_print_rlc_hdr(&hdr->header);
5905 
5906 		for (i = 0; i < fw_size; i++) {
5907 			WREG32(RLC_UCODE_ADDR, i);
5908 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5909 		}
5910 	} else {
5911 		const __be32 *fw_data =
5912 			(const __be32 *)rdev->rlc_fw->data;
5913 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5914 			WREG32(RLC_UCODE_ADDR, i);
5915 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5916 		}
5917 	}
5918 	WREG32(RLC_UCODE_ADDR, 0);
5919 
5920 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5921 
5922 	si_rlc_start(rdev);
5923 
5924 	return 0;
5925 }
5926 
si_enable_interrupts(struct radeon_device * rdev)5927 static void si_enable_interrupts(struct radeon_device *rdev)
5928 {
5929 	u32 ih_cntl = RREG32(IH_CNTL);
5930 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5931 
5932 	ih_cntl |= ENABLE_INTR;
5933 	ih_rb_cntl |= IH_RB_ENABLE;
5934 	WREG32(IH_CNTL, ih_cntl);
5935 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5936 	rdev->ih.enabled = true;
5937 }
5938 
si_disable_interrupts(struct radeon_device * rdev)5939 static void si_disable_interrupts(struct radeon_device *rdev)
5940 {
5941 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5942 	u32 ih_cntl = RREG32(IH_CNTL);
5943 
5944 	ih_rb_cntl &= ~IH_RB_ENABLE;
5945 	ih_cntl &= ~ENABLE_INTR;
5946 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5947 	WREG32(IH_CNTL, ih_cntl);
5948 	/* set rptr, wptr to 0 */
5949 	WREG32(IH_RB_RPTR, 0);
5950 	WREG32(IH_RB_WPTR, 0);
5951 	rdev->ih.enabled = false;
5952 	rdev->ih.rptr = 0;
5953 }
5954 
si_disable_interrupt_state(struct radeon_device * rdev)5955 static void si_disable_interrupt_state(struct radeon_device *rdev)
5956 {
5957 	int i;
5958 	u32 tmp;
5959 
5960 	tmp = RREG32(CP_INT_CNTL_RING0) &
5961 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5962 	WREG32(CP_INT_CNTL_RING0, tmp);
5963 	WREG32(CP_INT_CNTL_RING1, 0);
5964 	WREG32(CP_INT_CNTL_RING2, 0);
5965 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5966 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5967 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5968 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5969 	WREG32(GRBM_INT_CNTL, 0);
5970 	WREG32(SRBM_INT_CNTL, 0);
5971 	for (i = 0; i < rdev->num_crtc; i++)
5972 		WREG32(INT_MASK + crtc_offsets[i], 0);
5973 	for (i = 0; i < rdev->num_crtc; i++)
5974 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5975 
5976 	if (!ASIC_IS_NODCE(rdev)) {
5977 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5978 
5979 		for (i = 0; i < 6; i++)
5980 			WREG32_AND(DC_HPDx_INT_CONTROL(i),
5981 				   DC_HPDx_INT_POLARITY);
5982 	}
5983 }
5984 
si_irq_init(struct radeon_device * rdev)5985 static int si_irq_init(struct radeon_device *rdev)
5986 {
5987 	int ret = 0;
5988 	int rb_bufsz;
5989 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5990 
5991 	/* allocate ring */
5992 	ret = r600_ih_ring_alloc(rdev);
5993 	if (ret)
5994 		return ret;
5995 
5996 	/* disable irqs */
5997 	si_disable_interrupts(rdev);
5998 
5999 	/* init rlc */
6000 	ret = si_rlc_resume(rdev);
6001 	if (ret) {
6002 		r600_ih_ring_fini(rdev);
6003 		return ret;
6004 	}
6005 
6006 	/* setup interrupt control */
6007 	/* set dummy read address to dummy page address */
6008 	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6009 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6010 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6011 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6012 	 */
6013 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6014 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6015 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6016 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6017 
6018 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6019 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6020 
6021 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6022 		      IH_WPTR_OVERFLOW_CLEAR |
6023 		      (rb_bufsz << 1));
6024 
6025 	if (rdev->wb.enabled)
6026 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6027 
6028 	/* set the writeback address whether it's enabled or not */
6029 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6030 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6031 
6032 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6033 
6034 	/* set rptr, wptr to 0 */
6035 	WREG32(IH_RB_RPTR, 0);
6036 	WREG32(IH_RB_WPTR, 0);
6037 
6038 	/* Default settings for IH_CNTL (disabled at first) */
6039 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6040 	/* RPTR_REARM only works if msi's are enabled */
6041 	if (rdev->msi_enabled)
6042 		ih_cntl |= RPTR_REARM;
6043 	WREG32(IH_CNTL, ih_cntl);
6044 
6045 	/* force the active interrupt state to all disabled */
6046 	si_disable_interrupt_state(rdev);
6047 
6048 	pci_set_master(rdev->pdev);
6049 
6050 	/* enable irqs */
6051 	si_enable_interrupts(rdev);
6052 
6053 	return ret;
6054 }
6055 
6056 /* The order we write back each register here is important */
si_irq_set(struct radeon_device * rdev)6057 int si_irq_set(struct radeon_device *rdev)
6058 {
6059 	int i;
6060 	u32 cp_int_cntl;
6061 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6062 	u32 grbm_int_cntl = 0;
6063 	u32 dma_cntl, dma_cntl1;
6064 	u32 thermal_int = 0;
6065 
6066 	if (!rdev->irq.installed) {
6067 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6068 		return -EINVAL;
6069 	}
6070 	/* don't enable anything if the ih is disabled */
6071 	if (!rdev->ih.enabled) {
6072 		si_disable_interrupts(rdev);
6073 		/* force the active interrupt state to all disabled */
6074 		si_disable_interrupt_state(rdev);
6075 		return 0;
6076 	}
6077 
6078 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6079 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6080 
6081 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6082 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6083 
6084 	thermal_int = RREG32(CG_THERMAL_INT) &
6085 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6086 
6087 	/* enable CP interrupts on all rings */
6088 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6089 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6090 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6091 	}
6092 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6093 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6094 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6095 	}
6096 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6097 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6098 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6099 	}
6100 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6101 		DRM_DEBUG("si_irq_set: sw int dma\n");
6102 		dma_cntl |= TRAP_ENABLE;
6103 	}
6104 
6105 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6106 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6107 		dma_cntl1 |= TRAP_ENABLE;
6108 	}
6109 
6110 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6111 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6112 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6113 
6114 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6115 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6116 
6117 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6118 
6119 	if (rdev->irq.dpm_thermal) {
6120 		DRM_DEBUG("dpm thermal\n");
6121 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6122 	}
6123 
6124 	for (i = 0; i < rdev->num_crtc; i++) {
6125 		radeon_irq_kms_set_irq_n_enabled(
6126 		    rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6127 		    rdev->irq.crtc_vblank_int[i] ||
6128 		    atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6129 	}
6130 
6131 	for (i = 0; i < rdev->num_crtc; i++)
6132 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6133 
6134 	if (!ASIC_IS_NODCE(rdev)) {
6135 		for (i = 0; i < 6; i++) {
6136 			radeon_irq_kms_set_irq_n_enabled(
6137 			    rdev, DC_HPDx_INT_CONTROL(i),
6138 			    DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6139 			    rdev->irq.hpd[i], "HPD", i);
6140 		}
6141 	}
6142 
6143 	WREG32(CG_THERMAL_INT, thermal_int);
6144 
6145 	/* posting read */
6146 	RREG32(SRBM_STATUS);
6147 
6148 	return 0;
6149 }
6150 
6151 /* The order we write back each register here is important */
si_irq_ack(struct radeon_device * rdev)6152 static inline void si_irq_ack(struct radeon_device *rdev)
6153 {
6154 	int i, j;
6155 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6156 	u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6157 
6158 	if (ASIC_IS_NODCE(rdev))
6159 		return;
6160 
6161 	for (i = 0; i < 6; i++) {
6162 		disp_int[i] = RREG32(si_disp_int_status[i]);
6163 		if (i < rdev->num_crtc)
6164 			grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6165 	}
6166 
6167 	/* We write back each interrupt register in pairs of two */
6168 	for (i = 0; i < rdev->num_crtc; i += 2) {
6169 		for (j = i; j < (i + 2); j++) {
6170 			if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6171 				WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6172 				       GRPH_PFLIP_INT_CLEAR);
6173 		}
6174 
6175 		for (j = i; j < (i + 2); j++) {
6176 			if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6177 				WREG32(VBLANK_STATUS + crtc_offsets[j],
6178 				       VBLANK_ACK);
6179 			if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6180 				WREG32(VLINE_STATUS + crtc_offsets[j],
6181 				       VLINE_ACK);
6182 		}
6183 	}
6184 
6185 	for (i = 0; i < 6; i++) {
6186 		if (disp_int[i] & DC_HPD1_INTERRUPT)
6187 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6188 	}
6189 
6190 	for (i = 0; i < 6; i++) {
6191 		if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6192 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6193 	}
6194 }
6195 
si_irq_disable(struct radeon_device * rdev)6196 static void si_irq_disable(struct radeon_device *rdev)
6197 {
6198 	si_disable_interrupts(rdev);
6199 	/* Wait and acknowledge irq */
6200 	mdelay(1);
6201 	si_irq_ack(rdev);
6202 	si_disable_interrupt_state(rdev);
6203 }
6204 
si_irq_suspend(struct radeon_device * rdev)6205 static void si_irq_suspend(struct radeon_device *rdev)
6206 {
6207 	si_irq_disable(rdev);
6208 	si_rlc_stop(rdev);
6209 }
6210 
si_irq_fini(struct radeon_device * rdev)6211 static void si_irq_fini(struct radeon_device *rdev)
6212 {
6213 	si_irq_suspend(rdev);
6214 	r600_ih_ring_fini(rdev);
6215 }
6216 
si_get_ih_wptr(struct radeon_device * rdev)6217 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6218 {
6219 	u32 wptr, tmp;
6220 
6221 	if (rdev->wb.enabled)
6222 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6223 	else
6224 		wptr = RREG32(IH_RB_WPTR);
6225 
6226 	if (wptr & RB_OVERFLOW) {
6227 		wptr &= ~RB_OVERFLOW;
6228 		/* When a ring buffer overflow happen start parsing interrupt
6229 		 * from the last not overwritten vector (wptr + 16). Hopefully
6230 		 * this should allow us to catchup.
6231 		 */
6232 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6233 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6234 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6235 		tmp = RREG32(IH_RB_CNTL);
6236 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6237 		WREG32(IH_RB_CNTL, tmp);
6238 	}
6239 	return (wptr & rdev->ih.ptr_mask);
6240 }
6241 
6242 /*        SI IV Ring
6243  * Each IV ring entry is 128 bits:
6244  * [7:0]    - interrupt source id
6245  * [31:8]   - reserved
6246  * [59:32]  - interrupt source data
6247  * [63:60]  - reserved
6248  * [71:64]  - RINGID
6249  * [79:72]  - VMID
6250  * [127:80] - reserved
6251  */
si_irq_process(struct radeon_device * rdev)6252 int si_irq_process(struct radeon_device *rdev)
6253 {
6254 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6255 	u32 crtc_idx, hpd_idx;
6256 	u32 mask;
6257 	u32 wptr;
6258 	u32 rptr;
6259 	u32 src_id, src_data, ring_id;
6260 	u32 ring_index;
6261 	bool queue_hotplug = false;
6262 	bool queue_dp = false;
6263 	bool queue_thermal = false;
6264 	u32 status, addr;
6265 	const char *event_name;
6266 
6267 	if (!rdev->ih.enabled || rdev->shutdown)
6268 		return IRQ_NONE;
6269 
6270 	wptr = si_get_ih_wptr(rdev);
6271 
6272 restart_ih:
6273 	/* is somebody else already processing irqs? */
6274 	if (atomic_xchg(&rdev->ih.lock, 1))
6275 		return IRQ_NONE;
6276 
6277 	rptr = rdev->ih.rptr;
6278 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6279 
6280 	/* Order reading of wptr vs. reading of IH ring data */
6281 	rmb();
6282 
6283 	/* display interrupts */
6284 	si_irq_ack(rdev);
6285 
6286 	while (rptr != wptr) {
6287 		/* wptr/rptr are in bytes! */
6288 		ring_index = rptr / 4;
6289 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6290 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6291 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6292 
6293 		switch (src_id) {
6294 		case 1: /* D1 vblank/vline */
6295 		case 2: /* D2 vblank/vline */
6296 		case 3: /* D3 vblank/vline */
6297 		case 4: /* D4 vblank/vline */
6298 		case 5: /* D5 vblank/vline */
6299 		case 6: /* D6 vblank/vline */
6300 			crtc_idx = src_id - 1;
6301 
6302 			if (src_data == 0) { /* vblank */
6303 				mask = LB_D1_VBLANK_INTERRUPT;
6304 				event_name = "vblank";
6305 
6306 				if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6307 					drm_handle_vblank(rdev->ddev, crtc_idx);
6308 #ifdef __NetBSD__
6309 					spin_lock(&rdev->irq.vblank_lock);
6310 					rdev->pm.vblank_sync = true;
6311 					DRM_SPIN_WAKEUP_ONE(&rdev->irq.vblank_queue, &rdev->irq.vblank_lock);
6312 					spin_unlock(&rdev->irq.vblank_lock);
6313 #else
6314 					rdev->pm.vblank_sync = true;
6315 					wake_up(&rdev->irq.vblank_queue);
6316 #endif
6317 				}
6318 				if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6319 					radeon_crtc_handle_vblank(rdev,
6320 								  crtc_idx);
6321 				}
6322 
6323 			} else if (src_data == 1) { /* vline */
6324 				mask = LB_D1_VLINE_INTERRUPT;
6325 				event_name = "vline";
6326 			} else {
6327 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6328 					  src_id, src_data);
6329 				break;
6330 			}
6331 
6332 			if (!(disp_int[crtc_idx] & mask)) {
6333 				DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6334 					  crtc_idx + 1, event_name);
6335 			}
6336 
6337 			disp_int[crtc_idx] &= ~mask;
6338 			DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6339 
6340 			break;
6341 		case 8: /* D1 page flip */
6342 		case 10: /* D2 page flip */
6343 		case 12: /* D3 page flip */
6344 		case 14: /* D4 page flip */
6345 		case 16: /* D5 page flip */
6346 		case 18: /* D6 page flip */
6347 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6348 			if (radeon_use_pflipirq > 0)
6349 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6350 			break;
6351 		case 42: /* HPD hotplug */
6352 			if (src_data <= 5) {
6353 				hpd_idx = src_data;
6354 				mask = DC_HPD1_INTERRUPT;
6355 				queue_hotplug = true;
6356 				event_name = "HPD";
6357 
6358 			} else if (src_data <= 11) {
6359 				hpd_idx = src_data - 6;
6360 				mask = DC_HPD1_RX_INTERRUPT;
6361 				queue_dp = true;
6362 				event_name = "HPD_RX";
6363 
6364 			} else {
6365 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6366 					  src_id, src_data);
6367 				break;
6368 			}
6369 
6370 			if (!(disp_int[hpd_idx] & mask))
6371 				DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6372 
6373 			disp_int[hpd_idx] &= ~mask;
6374 			DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6375 			break;
6376 		case 96:
6377 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6378 			WREG32(SRBM_INT_ACK, 0x1);
6379 			break;
6380 		case 124: /* UVD */
6381 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6382 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6383 			break;
6384 		case 146:
6385 		case 147:
6386 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6387 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6388 			/* reset addr and status */
6389 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6390 			if (addr == 0x0 && status == 0x0)
6391 				break;
6392 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6393 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6394 				addr);
6395 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6396 				status);
6397 			si_vm_decode_fault(rdev, status, addr);
6398 			break;
6399 		case 176: /* RINGID0 CP_INT */
6400 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6401 			break;
6402 		case 177: /* RINGID1 CP_INT */
6403 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6404 			break;
6405 		case 178: /* RINGID2 CP_INT */
6406 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6407 			break;
6408 		case 181: /* CP EOP event */
6409 			DRM_DEBUG("IH: CP EOP\n");
6410 			switch (ring_id) {
6411 			case 0:
6412 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6413 				break;
6414 			case 1:
6415 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6416 				break;
6417 			case 2:
6418 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6419 				break;
6420 			}
6421 			break;
6422 		case 224: /* DMA trap event */
6423 			DRM_DEBUG("IH: DMA trap\n");
6424 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6425 			break;
6426 		case 230: /* thermal low to high */
6427 			DRM_DEBUG("IH: thermal low to high\n");
6428 			rdev->pm.dpm.thermal.high_to_low = false;
6429 			queue_thermal = true;
6430 			break;
6431 		case 231: /* thermal high to low */
6432 			DRM_DEBUG("IH: thermal high to low\n");
6433 			rdev->pm.dpm.thermal.high_to_low = true;
6434 			queue_thermal = true;
6435 			break;
6436 		case 233: /* GUI IDLE */
6437 			DRM_DEBUG("IH: GUI idle\n");
6438 			break;
6439 		case 244: /* DMA trap event */
6440 			DRM_DEBUG("IH: DMA1 trap\n");
6441 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6442 			break;
6443 		default:
6444 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6445 			break;
6446 		}
6447 
6448 		/* wptr/rptr are in bytes! */
6449 		rptr += 16;
6450 		rptr &= rdev->ih.ptr_mask;
6451 		WREG32(IH_RB_RPTR, rptr);
6452 	}
6453 	if (queue_dp)
6454 		schedule_work(&rdev->dp_work);
6455 	if (queue_hotplug)
6456 		schedule_delayed_work(&rdev->hotplug_work, 0);
6457 	if (queue_thermal && rdev->pm.dpm_enabled)
6458 		schedule_work(&rdev->pm.dpm.thermal.work);
6459 	rdev->ih.rptr = rptr;
6460 	atomic_set(&rdev->ih.lock, 0);
6461 
6462 	/* make sure wptr hasn't changed while processing */
6463 	wptr = si_get_ih_wptr(rdev);
6464 	if (wptr != rptr)
6465 		goto restart_ih;
6466 
6467 	return IRQ_HANDLED;
6468 }
6469 
6470 /*
6471  * startup/shutdown callbacks
6472  */
si_uvd_init(struct radeon_device * rdev)6473 static void si_uvd_init(struct radeon_device *rdev)
6474 {
6475 	int r;
6476 
6477 	if (!rdev->has_uvd)
6478 		return;
6479 
6480 	r = radeon_uvd_init(rdev);
6481 	if (r) {
6482 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6483 		/*
6484 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6485 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6486 		 * there. So it is pointless to try to go through that code
6487 		 * hence why we disable uvd here.
6488 		 */
6489 		rdev->has_uvd = false;
6490 		return;
6491 	}
6492 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6493 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6494 }
6495 
si_uvd_start(struct radeon_device * rdev)6496 static void si_uvd_start(struct radeon_device *rdev)
6497 {
6498 	int r;
6499 
6500 	if (!rdev->has_uvd)
6501 		return;
6502 
6503 	r = uvd_v2_2_resume(rdev);
6504 	if (r) {
6505 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6506 		goto error;
6507 	}
6508 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6509 	if (r) {
6510 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6511 		goto error;
6512 	}
6513 	return;
6514 
6515 error:
6516 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6517 }
6518 
si_uvd_resume(struct radeon_device * rdev)6519 static void si_uvd_resume(struct radeon_device *rdev)
6520 {
6521 	struct radeon_ring *ring;
6522 	int r;
6523 
6524 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6525 		return;
6526 
6527 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6528 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6529 	if (r) {
6530 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6531 		return;
6532 	}
6533 	r = uvd_v1_0_init(rdev);
6534 	if (r) {
6535 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6536 		return;
6537 	}
6538 }
6539 
si_vce_init(struct radeon_device * rdev)6540 static void si_vce_init(struct radeon_device *rdev)
6541 {
6542 	int r;
6543 
6544 	if (!rdev->has_vce)
6545 		return;
6546 
6547 	r = radeon_vce_init(rdev);
6548 	if (r) {
6549 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6550 		/*
6551 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6552 		 * to early fails si_vce_start() and thus nothing happens
6553 		 * there. So it is pointless to try to go through that code
6554 		 * hence why we disable vce here.
6555 		 */
6556 		rdev->has_vce = false;
6557 		return;
6558 	}
6559 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6560 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6561 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6562 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6563 }
6564 
si_vce_start(struct radeon_device * rdev)6565 static void si_vce_start(struct radeon_device *rdev)
6566 {
6567 	int r;
6568 
6569 	if (!rdev->has_vce)
6570 		return;
6571 
6572 	r = radeon_vce_resume(rdev);
6573 	if (r) {
6574 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6575 		goto error;
6576 	}
6577 	r = vce_v1_0_resume(rdev);
6578 	if (r) {
6579 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6580 		goto error;
6581 	}
6582 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6583 	if (r) {
6584 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6585 		goto error;
6586 	}
6587 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6588 	if (r) {
6589 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6590 		goto error;
6591 	}
6592 	return;
6593 
6594 error:
6595 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6596 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6597 }
6598 
si_vce_resume(struct radeon_device * rdev)6599 static void si_vce_resume(struct radeon_device *rdev)
6600 {
6601 	struct radeon_ring *ring;
6602 	int r;
6603 
6604 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6605 		return;
6606 
6607 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6608 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6609 	if (r) {
6610 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6611 		return;
6612 	}
6613 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6614 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6615 	if (r) {
6616 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6617 		return;
6618 	}
6619 	r = vce_v1_0_init(rdev);
6620 	if (r) {
6621 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6622 		return;
6623 	}
6624 }
6625 
si_startup(struct radeon_device * rdev)6626 static int si_startup(struct radeon_device *rdev)
6627 {
6628 	struct radeon_ring *ring;
6629 	int r;
6630 
6631 	/* enable pcie gen2/3 link */
6632 	si_pcie_gen3_enable(rdev);
6633 	/* enable aspm */
6634 	si_program_aspm(rdev);
6635 
6636 	/* scratch needs to be initialized before MC */
6637 	r = r600_vram_scratch_init(rdev);
6638 	if (r)
6639 		return r;
6640 
6641 	si_mc_program(rdev);
6642 
6643 	if (!rdev->pm.dpm_enabled) {
6644 		r = si_mc_load_microcode(rdev);
6645 		if (r) {
6646 			DRM_ERROR("Failed to load MC firmware!\n");
6647 			return r;
6648 		}
6649 	}
6650 
6651 	r = si_pcie_gart_enable(rdev);
6652 	if (r)
6653 		return r;
6654 	si_gpu_init(rdev);
6655 
6656 	/* allocate rlc buffers */
6657 	if (rdev->family == CHIP_VERDE) {
6658 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6659 		rdev->rlc.reg_list_size =
6660 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6661 	}
6662 	rdev->rlc.cs_data = si_cs_data;
6663 	r = sumo_rlc_init(rdev);
6664 	if (r) {
6665 		DRM_ERROR("Failed to init rlc BOs!\n");
6666 		return r;
6667 	}
6668 
6669 	/* allocate wb buffer */
6670 	r = radeon_wb_init(rdev);
6671 	if (r)
6672 		return r;
6673 
6674 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6675 	if (r) {
6676 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6677 		return r;
6678 	}
6679 
6680 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6681 	if (r) {
6682 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6683 		return r;
6684 	}
6685 
6686 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6687 	if (r) {
6688 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6689 		return r;
6690 	}
6691 
6692 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6693 	if (r) {
6694 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6695 		return r;
6696 	}
6697 
6698 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6699 	if (r) {
6700 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6701 		return r;
6702 	}
6703 
6704 	si_uvd_start(rdev);
6705 	si_vce_start(rdev);
6706 
6707 	/* Enable IRQ */
6708 	if (!rdev->irq.installed) {
6709 		r = radeon_irq_kms_init(rdev);
6710 		if (r)
6711 			return r;
6712 	}
6713 
6714 	r = si_irq_init(rdev);
6715 	if (r) {
6716 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6717 		radeon_irq_kms_fini(rdev);
6718 		return r;
6719 	}
6720 	si_irq_set(rdev);
6721 
6722 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6723 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6724 			     RADEON_CP_PACKET2);
6725 	if (r)
6726 		return r;
6727 
6728 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6729 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6730 			     RADEON_CP_PACKET2);
6731 	if (r)
6732 		return r;
6733 
6734 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6735 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6736 			     RADEON_CP_PACKET2);
6737 	if (r)
6738 		return r;
6739 
6740 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6741 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6742 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6743 	if (r)
6744 		return r;
6745 
6746 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6747 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6748 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6749 	if (r)
6750 		return r;
6751 
6752 	r = si_cp_load_microcode(rdev);
6753 	if (r)
6754 		return r;
6755 	r = si_cp_resume(rdev);
6756 	if (r)
6757 		return r;
6758 
6759 	r = cayman_dma_resume(rdev);
6760 	if (r)
6761 		return r;
6762 
6763 	si_uvd_resume(rdev);
6764 	si_vce_resume(rdev);
6765 
6766 	r = radeon_ib_pool_init(rdev);
6767 	if (r) {
6768 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6769 		return r;
6770 	}
6771 
6772 	r = radeon_vm_manager_init(rdev);
6773 	if (r) {
6774 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6775 		return r;
6776 	}
6777 
6778 	r = radeon_audio_init(rdev);
6779 	if (r)
6780 		return r;
6781 
6782 	return 0;
6783 }
6784 
si_resume(struct radeon_device * rdev)6785 int si_resume(struct radeon_device *rdev)
6786 {
6787 	int r;
6788 
6789 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6790 	 * posting will perform necessary task to bring back GPU into good
6791 	 * shape.
6792 	 */
6793 	/* post card */
6794 	atom_asic_init(rdev->mode_info.atom_context);
6795 
6796 	/* init golden registers */
6797 	si_init_golden_registers(rdev);
6798 
6799 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6800 		radeon_pm_resume(rdev);
6801 
6802 	rdev->accel_working = true;
6803 	r = si_startup(rdev);
6804 	if (r) {
6805 		DRM_ERROR("si startup failed on resume\n");
6806 		rdev->accel_working = false;
6807 		return r;
6808 	}
6809 
6810 	return r;
6811 
6812 }
6813 
si_suspend(struct radeon_device * rdev)6814 int si_suspend(struct radeon_device *rdev)
6815 {
6816 	radeon_pm_suspend(rdev);
6817 	radeon_audio_fini(rdev);
6818 	radeon_vm_manager_fini(rdev);
6819 	si_cp_enable(rdev, false);
6820 	cayman_dma_stop(rdev);
6821 	if (rdev->has_uvd) {
6822 		uvd_v1_0_fini(rdev);
6823 		radeon_uvd_suspend(rdev);
6824 	}
6825 	if (rdev->has_vce)
6826 		radeon_vce_suspend(rdev);
6827 	si_fini_pg(rdev);
6828 	si_fini_cg(rdev);
6829 	si_irq_suspend(rdev);
6830 	radeon_wb_disable(rdev);
6831 	si_pcie_gart_disable(rdev);
6832 	return 0;
6833 }
6834 
6835 /* Plan is to move initialization in that function and use
6836  * helper function so that radeon_device_init pretty much
6837  * do nothing more than calling asic specific function. This
6838  * should also allow to remove a bunch of callback function
6839  * like vram_info.
6840  */
si_init(struct radeon_device * rdev)6841 int si_init(struct radeon_device *rdev)
6842 {
6843 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6844 	int r;
6845 
6846 	/* Read BIOS */
6847 	if (!radeon_get_bios(rdev)) {
6848 		if (ASIC_IS_AVIVO(rdev))
6849 			return -EINVAL;
6850 	}
6851 	/* Must be an ATOMBIOS */
6852 	if (!rdev->is_atom_bios) {
6853 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6854 		return -EINVAL;
6855 	}
6856 	r = radeon_atombios_init(rdev);
6857 	if (r)
6858 		return r;
6859 
6860 	/* Post card if necessary */
6861 	if (!radeon_card_posted(rdev)) {
6862 		if (!rdev->bios) {
6863 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6864 			return -EINVAL;
6865 		}
6866 		DRM_INFO("GPU not posted. posting now...\n");
6867 		atom_asic_init(rdev->mode_info.atom_context);
6868 	}
6869 	/* init golden registers */
6870 	si_init_golden_registers(rdev);
6871 	/* Initialize scratch registers */
6872 	si_scratch_init(rdev);
6873 	/* Initialize surface registers */
6874 	radeon_surface_init(rdev);
6875 	/* Initialize clocks */
6876 	radeon_get_clock_info(rdev->ddev);
6877 
6878 	/* Fence driver */
6879 	r = radeon_fence_driver_init(rdev);
6880 	if (r)
6881 		return r;
6882 
6883 	/* initialize memory controller */
6884 	r = si_mc_init(rdev);
6885 	if (r)
6886 		return r;
6887 	/* Memory manager */
6888 	r = radeon_bo_init(rdev);
6889 	if (r)
6890 		return r;
6891 
6892 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6893 	    !rdev->rlc_fw || !rdev->mc_fw) {
6894 		r = si_init_microcode(rdev);
6895 		if (r) {
6896 			DRM_ERROR("Failed to load firmware!\n");
6897 			return r;
6898 		}
6899 	}
6900 
6901 	/* Initialize power management */
6902 	radeon_pm_init(rdev);
6903 
6904 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6905 	ring->ring_obj = NULL;
6906 	r600_ring_init(rdev, ring, 1024 * 1024);
6907 
6908 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6909 	ring->ring_obj = NULL;
6910 	r600_ring_init(rdev, ring, 1024 * 1024);
6911 
6912 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6913 	ring->ring_obj = NULL;
6914 	r600_ring_init(rdev, ring, 1024 * 1024);
6915 
6916 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6917 	ring->ring_obj = NULL;
6918 	r600_ring_init(rdev, ring, 64 * 1024);
6919 
6920 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6921 	ring->ring_obj = NULL;
6922 	r600_ring_init(rdev, ring, 64 * 1024);
6923 
6924 	si_uvd_init(rdev);
6925 	si_vce_init(rdev);
6926 
6927 	rdev->ih.ring_obj = NULL;
6928 	r600_ih_ring_init(rdev, 64 * 1024);
6929 
6930 	r = r600_pcie_gart_init(rdev);
6931 	if (r)
6932 		return r;
6933 
6934 	rdev->accel_working = true;
6935 	r = si_startup(rdev);
6936 	if (r) {
6937 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6938 		si_cp_fini(rdev);
6939 		cayman_dma_fini(rdev);
6940 		si_irq_fini(rdev);
6941 		sumo_rlc_fini(rdev);
6942 		radeon_wb_fini(rdev);
6943 		radeon_ib_pool_fini(rdev);
6944 		radeon_vm_manager_fini(rdev);
6945 		radeon_irq_kms_fini(rdev);
6946 		si_pcie_gart_fini(rdev);
6947 		rdev->accel_working = false;
6948 	}
6949 
6950 	/* Don't start up if the MC ucode is missing.
6951 	 * The default clocks and voltages before the MC ucode
6952 	 * is loaded are not suffient for advanced operations.
6953 	 */
6954 	if (!rdev->mc_fw) {
6955 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6956 		return -EINVAL;
6957 	}
6958 
6959 	return 0;
6960 }
6961 
si_fini(struct radeon_device * rdev)6962 void si_fini(struct radeon_device *rdev)
6963 {
6964 	radeon_pm_fini(rdev);
6965 	si_cp_fini(rdev);
6966 	cayman_dma_fini(rdev);
6967 	si_fini_pg(rdev);
6968 	si_fini_cg(rdev);
6969 	si_irq_fini(rdev);
6970 	sumo_rlc_fini(rdev);
6971 	radeon_wb_fini(rdev);
6972 	radeon_vm_manager_fini(rdev);
6973 	radeon_ib_pool_fini(rdev);
6974 	radeon_irq_kms_fini(rdev);
6975 	if (rdev->has_uvd) {
6976 		uvd_v1_0_fini(rdev);
6977 		radeon_uvd_fini(rdev);
6978 	}
6979 	if (rdev->has_vce)
6980 		radeon_vce_fini(rdev);
6981 	si_pcie_gart_fini(rdev);
6982 	r600_vram_scratch_fini(rdev);
6983 	radeon_gem_fini(rdev);
6984 	radeon_fence_driver_fini(rdev);
6985 	radeon_bo_fini(rdev);
6986 	radeon_atombios_fini(rdev);
6987 	kfree(rdev->bios);
6988 	rdev->bios = NULL;
6989 }
6990 
6991 /**
6992  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6993  *
6994  * @rdev: radeon_device pointer
6995  *
6996  * Fetches a GPU clock counter snapshot (SI).
6997  * Returns the 64 bit clock counter snapshot.
6998  */
si_get_gpu_clock_counter(struct radeon_device * rdev)6999 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7000 {
7001 	uint64_t clock;
7002 
7003 	mutex_lock(&rdev->gpu_clock_mutex);
7004 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7005 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7006 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7007 	mutex_unlock(&rdev->gpu_clock_mutex);
7008 	return clock;
7009 }
7010 
si_set_uvd_clocks(struct radeon_device * rdev,u32 vclk,u32 dclk)7011 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7012 {
7013 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7014 	int r;
7015 
7016 	/* bypass vclk and dclk with bclk */
7017 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7018 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7019 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7020 
7021 	/* put PLL in bypass mode */
7022 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7023 
7024 	if (!vclk || !dclk) {
7025 		/* keep the Bypass mode */
7026 		return 0;
7027 	}
7028 
7029 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7030 					  16384, 0x03FFFFFF, 0, 128, 5,
7031 					  &fb_div, &vclk_div, &dclk_div);
7032 	if (r)
7033 		return r;
7034 
7035 	/* set RESET_ANTI_MUX to 0 */
7036 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7037 
7038 	/* set VCO_MODE to 1 */
7039 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7040 
7041 	/* disable sleep mode */
7042 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7043 
7044 	/* deassert UPLL_RESET */
7045 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7046 
7047 	mdelay(1);
7048 
7049 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7050 	if (r)
7051 		return r;
7052 
7053 	/* assert UPLL_RESET again */
7054 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7055 
7056 	/* disable spread spectrum. */
7057 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7058 
7059 	/* set feedback divider */
7060 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7061 
7062 	/* set ref divider to 0 */
7063 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7064 
7065 	if (fb_div < 307200)
7066 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7067 	else
7068 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7069 
7070 	/* set PDIV_A and PDIV_B */
7071 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7072 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7073 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7074 
7075 	/* give the PLL some time to settle */
7076 	mdelay(15);
7077 
7078 	/* deassert PLL_RESET */
7079 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7080 
7081 	mdelay(15);
7082 
7083 	/* switch from bypass mode to normal mode */
7084 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7085 
7086 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7087 	if (r)
7088 		return r;
7089 
7090 	/* switch VCLK and DCLK selection */
7091 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7092 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7093 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7094 
7095 	mdelay(100);
7096 
7097 	return 0;
7098 }
7099 
si_pcie_gen3_enable(struct radeon_device * rdev)7100 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7101 {
7102 	struct pci_dev *root = rdev->pdev->bus->self;
7103 	enum pci_bus_speed speed_cap;
7104 	u32 speed_cntl, current_data_rate;
7105 	int i;
7106 	u16 tmp16;
7107 
7108 	if (pci_is_root_bus(rdev->pdev->bus))
7109 		return;
7110 
7111 	if (radeon_pcie_gen2 == 0)
7112 		return;
7113 
7114 	if (rdev->flags & RADEON_IS_IGP)
7115 		return;
7116 
7117 	if (!(rdev->flags & RADEON_IS_PCIE))
7118 		return;
7119 
7120 	speed_cap = pcie_get_speed_cap(root);
7121 	if (speed_cap == PCI_SPEED_UNKNOWN)
7122 		return;
7123 
7124 	if ((speed_cap != PCIE_SPEED_8_0GT) &&
7125 	    (speed_cap != PCIE_SPEED_5_0GT))
7126 		return;
7127 
7128 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7129 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7130 		LC_CURRENT_DATA_RATE_SHIFT;
7131 	if (speed_cap == PCIE_SPEED_8_0GT) {
7132 		if (current_data_rate == 2) {
7133 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7134 			return;
7135 		}
7136 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7137 	} else if (speed_cap == PCIE_SPEED_5_0GT) {
7138 		if (current_data_rate == 1) {
7139 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7140 			return;
7141 		}
7142 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7143 	}
7144 
7145 	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
7146 		return;
7147 
7148 	if (speed_cap == PCIE_SPEED_8_0GT) {
7149 		/* re-try equalization if gen3 is not already enabled */
7150 		if (current_data_rate != 2) {
7151 			u16 bridge_cfg, gpu_cfg;
7152 			u16 bridge_cfg2, gpu_cfg2;
7153 			u32 max_lw, current_lw, tmp;
7154 
7155 			pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7156 						  &bridge_cfg);
7157 			pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
7158 						  &gpu_cfg);
7159 
7160 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7161 			pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
7162 
7163 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7164 			pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
7165 						   tmp16);
7166 
7167 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7168 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7169 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7170 
7171 			if (current_lw < max_lw) {
7172 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7173 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7174 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7175 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7176 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7177 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7178 				}
7179 			}
7180 
7181 			for (i = 0; i < 10; i++) {
7182 				/* check status */
7183 				pcie_capability_read_word(rdev->pdev,
7184 							  PCI_EXP_DEVSTA,
7185 							  &tmp16);
7186 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7187 					break;
7188 
7189 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7190 							  &bridge_cfg);
7191 				pcie_capability_read_word(rdev->pdev,
7192 							  PCI_EXP_LNKCTL,
7193 							  &gpu_cfg);
7194 
7195 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7196 							  &bridge_cfg2);
7197 				pcie_capability_read_word(rdev->pdev,
7198 							  PCI_EXP_LNKCTL2,
7199 							  &gpu_cfg2);
7200 
7201 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7202 				tmp |= LC_SET_QUIESCE;
7203 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7204 
7205 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7206 				tmp |= LC_REDO_EQ;
7207 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7208 
7209 				msleep(100);
7210 
7211 				/* linkctl */
7212 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7213 							  &tmp16);
7214 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7215 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7216 				pcie_capability_write_word(root,
7217 							   PCI_EXP_LNKCTL,
7218 							   tmp16);
7219 
7220 				pcie_capability_read_word(rdev->pdev,
7221 							  PCI_EXP_LNKCTL,
7222 							  &tmp16);
7223 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7224 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7225 				pcie_capability_write_word(rdev->pdev,
7226 							   PCI_EXP_LNKCTL,
7227 							   tmp16);
7228 
7229 				/* linkctl2 */
7230 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7231 							  &tmp16);
7232 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7233 					   PCI_EXP_LNKCTL2_TX_MARGIN);
7234 				tmp16 |= (bridge_cfg2 &
7235 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
7236 					   PCI_EXP_LNKCTL2_TX_MARGIN));
7237 				pcie_capability_write_word(root,
7238 							   PCI_EXP_LNKCTL2,
7239 							   tmp16);
7240 
7241 				pcie_capability_read_word(rdev->pdev,
7242 							  PCI_EXP_LNKCTL2,
7243 							  &tmp16);
7244 				tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
7245 					   PCI_EXP_LNKCTL2_TX_MARGIN);
7246 				tmp16 |= (gpu_cfg2 &
7247 					  (PCI_EXP_LNKCTL2_ENTER_COMP |
7248 					   PCI_EXP_LNKCTL2_TX_MARGIN));
7249 				pcie_capability_write_word(rdev->pdev,
7250 							   PCI_EXP_LNKCTL2,
7251 							   tmp16);
7252 
7253 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7254 				tmp &= ~LC_SET_QUIESCE;
7255 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7256 			}
7257 		}
7258 	}
7259 
7260 	/* set the link speed */
7261 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7262 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7263 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7264 
7265 	pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
7266 	tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
7267 	if (speed_cap == PCIE_SPEED_8_0GT)
7268 		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
7269 	else if (speed_cap == PCIE_SPEED_5_0GT)
7270 		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
7271 	else
7272 		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
7273 	pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
7274 
7275 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7276 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7277 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7278 
7279 	for (i = 0; i < rdev->usec_timeout; i++) {
7280 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7281 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7282 			break;
7283 		udelay(1);
7284 	}
7285 }
7286 
si_program_aspm(struct radeon_device * rdev)7287 static void si_program_aspm(struct radeon_device *rdev)
7288 {
7289 	u32 data, orig;
7290 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7291 	bool disable_clkreq = false;
7292 
7293 	if (radeon_aspm == 0)
7294 		return;
7295 
7296 	if (!(rdev->flags & RADEON_IS_PCIE))
7297 		return;
7298 
7299 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7300 	data &= ~LC_XMIT_N_FTS_MASK;
7301 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7302 	if (orig != data)
7303 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7304 
7305 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7306 	data |= LC_GO_TO_RECOVERY;
7307 	if (orig != data)
7308 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7309 
7310 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7311 	data |= P_IGNORE_EDB_ERR;
7312 	if (orig != data)
7313 		WREG32_PCIE(PCIE_P_CNTL, data);
7314 
7315 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7316 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7317 	data |= LC_PMI_TO_L1_DIS;
7318 	if (!disable_l0s)
7319 		data |= LC_L0S_INACTIVITY(7);
7320 
7321 	if (!disable_l1) {
7322 		data |= LC_L1_INACTIVITY(7);
7323 		data &= ~LC_PMI_TO_L1_DIS;
7324 		if (orig != data)
7325 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7326 
7327 		if (!disable_plloff_in_l1) {
7328 			bool clk_req_support;
7329 
7330 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7331 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7332 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7333 			if (orig != data)
7334 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7335 
7336 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7337 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7338 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7339 			if (orig != data)
7340 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7341 
7342 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7343 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7344 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7345 			if (orig != data)
7346 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7347 
7348 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7349 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7350 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7351 			if (orig != data)
7352 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7353 
7354 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7355 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7356 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7357 				if (orig != data)
7358 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7359 
7360 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7361 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7362 				if (orig != data)
7363 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7364 
7365 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7366 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7367 				if (orig != data)
7368 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7369 
7370 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7371 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7372 				if (orig != data)
7373 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7374 
7375 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7376 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7377 				if (orig != data)
7378 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7379 
7380 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7381 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7382 				if (orig != data)
7383 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7384 
7385 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7386 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7387 				if (orig != data)
7388 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7389 
7390 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7391 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7392 				if (orig != data)
7393 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7394 			}
7395 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7396 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7397 			data |= LC_DYN_LANES_PWR_STATE(3);
7398 			if (orig != data)
7399 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7400 
7401 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7402 			data &= ~LS2_EXIT_TIME_MASK;
7403 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7404 				data |= LS2_EXIT_TIME(5);
7405 			if (orig != data)
7406 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7407 
7408 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7409 			data &= ~LS2_EXIT_TIME_MASK;
7410 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7411 				data |= LS2_EXIT_TIME(5);
7412 			if (orig != data)
7413 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7414 
7415 			if (!disable_clkreq &&
7416 			    !pci_is_root_bus(rdev->pdev->bus)) {
7417 #ifndef __NetBSD__		/* XXX radeon pcie */
7418 				struct pci_dev *root = rdev->pdev->bus->self;
7419 				u32 lnkcap;
7420 #endif
7421 
7422 				clk_req_support = false;
7423 #ifndef __NetBSD__		/* XXX radeon pcie */
7424 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7425 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7426 					clk_req_support = true;
7427 #endif
7428 			} else {
7429 				clk_req_support = false;
7430 			}
7431 
7432 			if (clk_req_support) {
7433 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7434 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7435 				if (orig != data)
7436 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7437 
7438 				orig = data = RREG32(THM_CLK_CNTL);
7439 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7440 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7441 				if (orig != data)
7442 					WREG32(THM_CLK_CNTL, data);
7443 
7444 				orig = data = RREG32(MISC_CLK_CNTL);
7445 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7446 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7447 				if (orig != data)
7448 					WREG32(MISC_CLK_CNTL, data);
7449 
7450 				orig = data = RREG32(CG_CLKPIN_CNTL);
7451 				data &= ~BCLK_AS_XCLK;
7452 				if (orig != data)
7453 					WREG32(CG_CLKPIN_CNTL, data);
7454 
7455 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7456 				data &= ~FORCE_BIF_REFCLK_EN;
7457 				if (orig != data)
7458 					WREG32(CG_CLKPIN_CNTL_2, data);
7459 
7460 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7461 				data &= ~MPLL_CLKOUT_SEL_MASK;
7462 				data |= MPLL_CLKOUT_SEL(4);
7463 				if (orig != data)
7464 					WREG32(MPLL_BYPASSCLK_SEL, data);
7465 
7466 				orig = data = RREG32(SPLL_CNTL_MODE);
7467 				data &= ~SPLL_REFCLK_SEL_MASK;
7468 				if (orig != data)
7469 					WREG32(SPLL_CNTL_MODE, data);
7470 			}
7471 		}
7472 	} else {
7473 		if (orig != data)
7474 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7475 	}
7476 
7477 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7478 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7479 	if (orig != data)
7480 		WREG32_PCIE(PCIE_CNTL2, data);
7481 
7482 	if (!disable_l0s) {
7483 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7484 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7485 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7486 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7487 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7488 				data &= ~LC_L0S_INACTIVITY_MASK;
7489 				if (orig != data)
7490 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7491 			}
7492 		}
7493 	}
7494 }
7495 
si_vce_send_vcepll_ctlreq(struct radeon_device * rdev)7496 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7497 {
7498 	unsigned i;
7499 
7500 	/* make sure VCEPLL_CTLREQ is deasserted */
7501 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7502 
7503 	mdelay(10);
7504 
7505 	/* assert UPLL_CTLREQ */
7506 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7507 
7508 	/* wait for CTLACK and CTLACK2 to get asserted */
7509 	for (i = 0; i < 100; ++i) {
7510 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7511 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7512 			break;
7513 		mdelay(10);
7514 	}
7515 
7516 	/* deassert UPLL_CTLREQ */
7517 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7518 
7519 	if (i == 100) {
7520 		DRM_ERROR("Timeout setting UVD clocks!\n");
7521 		return -ETIMEDOUT;
7522 	}
7523 
7524 	return 0;
7525 }
7526 
si_set_vce_clocks(struct radeon_device * rdev,u32 evclk,u32 ecclk)7527 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7528 {
7529 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7530 	int r;
7531 
7532 	/* bypass evclk and ecclk with bclk */
7533 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7534 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7535 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7536 
7537 	/* put PLL in bypass mode */
7538 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7539 		     ~VCEPLL_BYPASS_EN_MASK);
7540 
7541 	if (!evclk || !ecclk) {
7542 		/* keep the Bypass mode, put PLL to sleep */
7543 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7544 			     ~VCEPLL_SLEEP_MASK);
7545 		return 0;
7546 	}
7547 
7548 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7549 					  16384, 0x03FFFFFF, 0, 128, 5,
7550 					  &fb_div, &evclk_div, &ecclk_div);
7551 	if (r)
7552 		return r;
7553 
7554 	/* set RESET_ANTI_MUX to 0 */
7555 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7556 
7557 	/* set VCO_MODE to 1 */
7558 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7559 		     ~VCEPLL_VCO_MODE_MASK);
7560 
7561 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7562 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7563 		     ~VCEPLL_SLEEP_MASK);
7564 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7565 
7566 	/* deassert VCEPLL_RESET */
7567 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7568 
7569 	mdelay(1);
7570 
7571 	r = si_vce_send_vcepll_ctlreq(rdev);
7572 	if (r)
7573 		return r;
7574 
7575 	/* assert VCEPLL_RESET again */
7576 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7577 
7578 	/* disable spread spectrum. */
7579 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7580 
7581 	/* set feedback divider */
7582 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7583 
7584 	/* set ref divider to 0 */
7585 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7586 
7587 	/* set PDIV_A and PDIV_B */
7588 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7589 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7590 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7591 
7592 	/* give the PLL some time to settle */
7593 	mdelay(15);
7594 
7595 	/* deassert PLL_RESET */
7596 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7597 
7598 	mdelay(15);
7599 
7600 	/* switch from bypass mode to normal mode */
7601 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7602 
7603 	r = si_vce_send_vcepll_ctlreq(rdev);
7604 	if (r)
7605 		return r;
7606 
7607 	/* switch VCLK and DCLK selection */
7608 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7609 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7610 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7611 
7612 	mdelay(100);
7613 
7614 	return 0;
7615 }
7616