xref: /dragonfly/sys/dev/drm/radeon/si.c (revision 3f2dd94a)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37 
38 
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61 
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
69 
70 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
71 MODULE_FIRMWARE("radeon/VERDE_me.bin");
72 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
75 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
76 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
77 
78 MODULE_FIRMWARE("radeon/verde_pfp.bin");
79 MODULE_FIRMWARE("radeon/verde_me.bin");
80 MODULE_FIRMWARE("radeon/verde_ce.bin");
81 MODULE_FIRMWARE("radeon/verde_mc.bin");
82 MODULE_FIRMWARE("radeon/verde_rlc.bin");
83 MODULE_FIRMWARE("radeon/verde_smc.bin");
84 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
85 
86 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
87 MODULE_FIRMWARE("radeon/OLAND_me.bin");
88 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
89 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
91 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
92 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
93 
94 MODULE_FIRMWARE("radeon/oland_pfp.bin");
95 MODULE_FIRMWARE("radeon/oland_me.bin");
96 MODULE_FIRMWARE("radeon/oland_ce.bin");
97 MODULE_FIRMWARE("radeon/oland_mc.bin");
98 MODULE_FIRMWARE("radeon/oland_rlc.bin");
99 MODULE_FIRMWARE("radeon/oland_smc.bin");
100 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
101 
102 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
109 
110 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
111 MODULE_FIRMWARE("radeon/hainan_me.bin");
112 MODULE_FIRMWARE("radeon/hainan_ce.bin");
113 MODULE_FIRMWARE("radeon/hainan_mc.bin");
114 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
115 MODULE_FIRMWARE("radeon/hainan_smc.bin");
116 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
117 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
118 
119 MODULE_FIRMWARE("radeon/si58_mc.bin");
120 
121 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
122 static void si_pcie_gen3_enable(struct radeon_device *rdev);
123 static void si_program_aspm(struct radeon_device *rdev);
124 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
125 					 bool enable);
126 static void si_init_pg(struct radeon_device *rdev);
127 static void si_init_cg(struct radeon_device *rdev);
128 static void si_fini_pg(struct radeon_device *rdev);
129 static void si_fini_cg(struct radeon_device *rdev);
130 static void si_rlc_stop(struct radeon_device *rdev);
131 
132 static const u32 crtc_offsets[] =
133 {
134 	EVERGREEN_CRTC0_REGISTER_OFFSET,
135 	EVERGREEN_CRTC1_REGISTER_OFFSET,
136 	EVERGREEN_CRTC2_REGISTER_OFFSET,
137 	EVERGREEN_CRTC3_REGISTER_OFFSET,
138 	EVERGREEN_CRTC4_REGISTER_OFFSET,
139 	EVERGREEN_CRTC5_REGISTER_OFFSET
140 };
141 
142 static const u32 si_disp_int_status[] =
143 {
144 	DISP_INTERRUPT_STATUS,
145 	DISP_INTERRUPT_STATUS_CONTINUE,
146 	DISP_INTERRUPT_STATUS_CONTINUE2,
147 	DISP_INTERRUPT_STATUS_CONTINUE3,
148 	DISP_INTERRUPT_STATUS_CONTINUE4,
149 	DISP_INTERRUPT_STATUS_CONTINUE5
150 };
151 
152 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
153 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
154 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
155 
156 static const u32 verde_rlc_save_restore_register_list[] =
157 {
158 	(0x8000 << 16) | (0x98f4 >> 2),
159 	0x00000000,
160 	(0x8040 << 16) | (0x98f4 >> 2),
161 	0x00000000,
162 	(0x8000 << 16) | (0xe80 >> 2),
163 	0x00000000,
164 	(0x8040 << 16) | (0xe80 >> 2),
165 	0x00000000,
166 	(0x8000 << 16) | (0x89bc >> 2),
167 	0x00000000,
168 	(0x8040 << 16) | (0x89bc >> 2),
169 	0x00000000,
170 	(0x8000 << 16) | (0x8c1c >> 2),
171 	0x00000000,
172 	(0x8040 << 16) | (0x8c1c >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x98f0 >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0xe7c >> 2),
177 	0x00000000,
178 	(0x8000 << 16) | (0x9148 >> 2),
179 	0x00000000,
180 	(0x8040 << 16) | (0x9148 >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x9150 >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x897c >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x8d8c >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0xac54 >> 2),
189 	0X00000000,
190 	0x3,
191 	(0x9c00 << 16) | (0x98f8 >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x9910 >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9914 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9918 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x991c >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x9920 >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9924 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9928 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x992c >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x9930 >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9934 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9938 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x993c >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9940 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9944 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9948 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x994c >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x9950 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9954 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9958 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x995c >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x9960 >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x9964 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x9968 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x996c >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x9970 >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x9974 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x9978 >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x997c >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x9980 >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0x9984 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0x9988 >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0x998c >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0x8c00 >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0x8c14 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0x8c04 >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x8c08 >> 2),
264 	0x00000000,
265 	(0x8000 << 16) | (0x9b7c >> 2),
266 	0x00000000,
267 	(0x8040 << 16) | (0x9b7c >> 2),
268 	0x00000000,
269 	(0x8000 << 16) | (0xe84 >> 2),
270 	0x00000000,
271 	(0x8040 << 16) | (0xe84 >> 2),
272 	0x00000000,
273 	(0x8000 << 16) | (0x89c0 >> 2),
274 	0x00000000,
275 	(0x8040 << 16) | (0x89c0 >> 2),
276 	0x00000000,
277 	(0x8000 << 16) | (0x914c >> 2),
278 	0x00000000,
279 	(0x8040 << 16) | (0x914c >> 2),
280 	0x00000000,
281 	(0x8000 << 16) | (0x8c20 >> 2),
282 	0x00000000,
283 	(0x8040 << 16) | (0x8c20 >> 2),
284 	0x00000000,
285 	(0x8000 << 16) | (0x9354 >> 2),
286 	0x00000000,
287 	(0x8040 << 16) | (0x9354 >> 2),
288 	0x00000000,
289 	(0x9c00 << 16) | (0x9060 >> 2),
290 	0x00000000,
291 	(0x9c00 << 16) | (0x9364 >> 2),
292 	0x00000000,
293 	(0x9c00 << 16) | (0x9100 >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x913c >> 2),
296 	0x00000000,
297 	(0x8000 << 16) | (0x90e0 >> 2),
298 	0x00000000,
299 	(0x8000 << 16) | (0x90e4 >> 2),
300 	0x00000000,
301 	(0x8000 << 16) | (0x90e8 >> 2),
302 	0x00000000,
303 	(0x8040 << 16) | (0x90e0 >> 2),
304 	0x00000000,
305 	(0x8040 << 16) | (0x90e4 >> 2),
306 	0x00000000,
307 	(0x8040 << 16) | (0x90e8 >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x8bcc >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0x8b24 >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0x88c4 >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0x8e50 >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0x8c0c >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0x8e58 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0x8e5c >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x9508 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x950c >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x9494 >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0xac0c >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0xac10 >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0xac14 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0xae00 >> 2),
336 	0x00000000,
337 	(0x9c00 << 16) | (0xac08 >> 2),
338 	0x00000000,
339 	(0x9c00 << 16) | (0x88d4 >> 2),
340 	0x00000000,
341 	(0x9c00 << 16) | (0x88c8 >> 2),
342 	0x00000000,
343 	(0x9c00 << 16) | (0x88cc >> 2),
344 	0x00000000,
345 	(0x9c00 << 16) | (0x89b0 >> 2),
346 	0x00000000,
347 	(0x9c00 << 16) | (0x8b10 >> 2),
348 	0x00000000,
349 	(0x9c00 << 16) | (0x8a14 >> 2),
350 	0x00000000,
351 	(0x9c00 << 16) | (0x9830 >> 2),
352 	0x00000000,
353 	(0x9c00 << 16) | (0x9834 >> 2),
354 	0x00000000,
355 	(0x9c00 << 16) | (0x9838 >> 2),
356 	0x00000000,
357 	(0x9c00 << 16) | (0x9a10 >> 2),
358 	0x00000000,
359 	(0x8000 << 16) | (0x9870 >> 2),
360 	0x00000000,
361 	(0x8000 << 16) | (0x9874 >> 2),
362 	0x00000000,
363 	(0x8001 << 16) | (0x9870 >> 2),
364 	0x00000000,
365 	(0x8001 << 16) | (0x9874 >> 2),
366 	0x00000000,
367 	(0x8040 << 16) | (0x9870 >> 2),
368 	0x00000000,
369 	(0x8040 << 16) | (0x9874 >> 2),
370 	0x00000000,
371 	(0x8041 << 16) | (0x9870 >> 2),
372 	0x00000000,
373 	(0x8041 << 16) | (0x9874 >> 2),
374 	0x00000000,
375 	0x00000000
376 };
377 
378 static const u32 tahiti_golden_rlc_registers[] =
379 {
380 	0xc424, 0xffffffff, 0x00601005,
381 	0xc47c, 0xffffffff, 0x10104040,
382 	0xc488, 0xffffffff, 0x0100000a,
383 	0xc314, 0xffffffff, 0x00000800,
384 	0xc30c, 0xffffffff, 0x800000f4,
385 	0xf4a8, 0xffffffff, 0x00000000
386 };
387 
388 static const u32 tahiti_golden_registers[] =
389 {
390 	0x9a10, 0x00010000, 0x00018208,
391 	0x9830, 0xffffffff, 0x00000000,
392 	0x9834, 0xf00fffff, 0x00000400,
393 	0x9838, 0x0002021c, 0x00020200,
394 	0xc78, 0x00000080, 0x00000000,
395 	0xd030, 0x000300c0, 0x00800040,
396 	0xd830, 0x000300c0, 0x00800040,
397 	0x5bb0, 0x000000f0, 0x00000070,
398 	0x5bc0, 0x00200000, 0x50100000,
399 	0x7030, 0x31000311, 0x00000011,
400 	0x277c, 0x00000003, 0x000007ff,
401 	0x240c, 0x000007ff, 0x00000000,
402 	0x8a14, 0xf000001f, 0x00000007,
403 	0x8b24, 0xffffffff, 0x00ffffff,
404 	0x8b10, 0x0000ff0f, 0x00000000,
405 	0x28a4c, 0x07ffffff, 0x4e000000,
406 	0x28350, 0x3f3f3fff, 0x2a00126a,
407 	0x30, 0x000000ff, 0x0040,
408 	0x34, 0x00000040, 0x00004040,
409 	0x9100, 0x07ffffff, 0x03000000,
410 	0x8e88, 0x01ff1f3f, 0x00000000,
411 	0x8e84, 0x01ff1f3f, 0x00000000,
412 	0x9060, 0x0000007f, 0x00000020,
413 	0x9508, 0x00010000, 0x00010000,
414 	0xac14, 0x00000200, 0x000002fb,
415 	0xac10, 0xffffffff, 0x0000543b,
416 	0xac0c, 0xffffffff, 0xa9210876,
417 	0x88d0, 0xffffffff, 0x000fff40,
418 	0x88d4, 0x0000001f, 0x00000010,
419 	0x1410, 0x20000000, 0x20fffed8,
420 	0x15c0, 0x000c0fc0, 0x000c0400
421 };
422 
423 static const u32 tahiti_golden_registers2[] =
424 {
425 	0xc64, 0x00000001, 0x00000001
426 };
427 
428 static const u32 pitcairn_golden_rlc_registers[] =
429 {
430 	0xc424, 0xffffffff, 0x00601004,
431 	0xc47c, 0xffffffff, 0x10102020,
432 	0xc488, 0xffffffff, 0x01000020,
433 	0xc314, 0xffffffff, 0x00000800,
434 	0xc30c, 0xffffffff, 0x800000a4
435 };
436 
437 static const u32 pitcairn_golden_registers[] =
438 {
439 	0x9a10, 0x00010000, 0x00018208,
440 	0x9830, 0xffffffff, 0x00000000,
441 	0x9834, 0xf00fffff, 0x00000400,
442 	0x9838, 0x0002021c, 0x00020200,
443 	0xc78, 0x00000080, 0x00000000,
444 	0xd030, 0x000300c0, 0x00800040,
445 	0xd830, 0x000300c0, 0x00800040,
446 	0x5bb0, 0x000000f0, 0x00000070,
447 	0x5bc0, 0x00200000, 0x50100000,
448 	0x7030, 0x31000311, 0x00000011,
449 	0x2ae4, 0x00073ffe, 0x000022a2,
450 	0x240c, 0x000007ff, 0x00000000,
451 	0x8a14, 0xf000001f, 0x00000007,
452 	0x8b24, 0xffffffff, 0x00ffffff,
453 	0x8b10, 0x0000ff0f, 0x00000000,
454 	0x28a4c, 0x07ffffff, 0x4e000000,
455 	0x28350, 0x3f3f3fff, 0x2a00126a,
456 	0x30, 0x000000ff, 0x0040,
457 	0x34, 0x00000040, 0x00004040,
458 	0x9100, 0x07ffffff, 0x03000000,
459 	0x9060, 0x0000007f, 0x00000020,
460 	0x9508, 0x00010000, 0x00010000,
461 	0xac14, 0x000003ff, 0x000000f7,
462 	0xac10, 0xffffffff, 0x00000000,
463 	0xac0c, 0xffffffff, 0x32761054,
464 	0x88d4, 0x0000001f, 0x00000010,
465 	0x15c0, 0x000c0fc0, 0x000c0400
466 };
467 
468 static const u32 verde_golden_rlc_registers[] =
469 {
470 	0xc424, 0xffffffff, 0x033f1005,
471 	0xc47c, 0xffffffff, 0x10808020,
472 	0xc488, 0xffffffff, 0x00800008,
473 	0xc314, 0xffffffff, 0x00001000,
474 	0xc30c, 0xffffffff, 0x80010014
475 };
476 
477 static const u32 verde_golden_registers[] =
478 {
479 	0x9a10, 0x00010000, 0x00018208,
480 	0x9830, 0xffffffff, 0x00000000,
481 	0x9834, 0xf00fffff, 0x00000400,
482 	0x9838, 0x0002021c, 0x00020200,
483 	0xc78, 0x00000080, 0x00000000,
484 	0xd030, 0x000300c0, 0x00800040,
485 	0xd030, 0x000300c0, 0x00800040,
486 	0xd830, 0x000300c0, 0x00800040,
487 	0xd830, 0x000300c0, 0x00800040,
488 	0x5bb0, 0x000000f0, 0x00000070,
489 	0x5bc0, 0x00200000, 0x50100000,
490 	0x7030, 0x31000311, 0x00000011,
491 	0x2ae4, 0x00073ffe, 0x000022a2,
492 	0x2ae4, 0x00073ffe, 0x000022a2,
493 	0x2ae4, 0x00073ffe, 0x000022a2,
494 	0x240c, 0x000007ff, 0x00000000,
495 	0x240c, 0x000007ff, 0x00000000,
496 	0x240c, 0x000007ff, 0x00000000,
497 	0x8a14, 0xf000001f, 0x00000007,
498 	0x8a14, 0xf000001f, 0x00000007,
499 	0x8a14, 0xf000001f, 0x00000007,
500 	0x8b24, 0xffffffff, 0x00ffffff,
501 	0x8b10, 0x0000ff0f, 0x00000000,
502 	0x28a4c, 0x07ffffff, 0x4e000000,
503 	0x28350, 0x3f3f3fff, 0x0000124a,
504 	0x28350, 0x3f3f3fff, 0x0000124a,
505 	0x28350, 0x3f3f3fff, 0x0000124a,
506 	0x30, 0x000000ff, 0x0040,
507 	0x34, 0x00000040, 0x00004040,
508 	0x9100, 0x07ffffff, 0x03000000,
509 	0x9100, 0x07ffffff, 0x03000000,
510 	0x8e88, 0x01ff1f3f, 0x00000000,
511 	0x8e88, 0x01ff1f3f, 0x00000000,
512 	0x8e88, 0x01ff1f3f, 0x00000000,
513 	0x8e84, 0x01ff1f3f, 0x00000000,
514 	0x8e84, 0x01ff1f3f, 0x00000000,
515 	0x8e84, 0x01ff1f3f, 0x00000000,
516 	0x9060, 0x0000007f, 0x00000020,
517 	0x9508, 0x00010000, 0x00010000,
518 	0xac14, 0x000003ff, 0x00000003,
519 	0xac14, 0x000003ff, 0x00000003,
520 	0xac14, 0x000003ff, 0x00000003,
521 	0xac10, 0xffffffff, 0x00000000,
522 	0xac10, 0xffffffff, 0x00000000,
523 	0xac10, 0xffffffff, 0x00000000,
524 	0xac0c, 0xffffffff, 0x00001032,
525 	0xac0c, 0xffffffff, 0x00001032,
526 	0xac0c, 0xffffffff, 0x00001032,
527 	0x88d4, 0x0000001f, 0x00000010,
528 	0x88d4, 0x0000001f, 0x00000010,
529 	0x88d4, 0x0000001f, 0x00000010,
530 	0x15c0, 0x000c0fc0, 0x000c0400
531 };
532 
533 static const u32 oland_golden_rlc_registers[] =
534 {
535 	0xc424, 0xffffffff, 0x00601005,
536 	0xc47c, 0xffffffff, 0x10104040,
537 	0xc488, 0xffffffff, 0x0100000a,
538 	0xc314, 0xffffffff, 0x00000800,
539 	0xc30c, 0xffffffff, 0x800000f4
540 };
541 
542 static const u32 oland_golden_registers[] =
543 {
544 	0x9a10, 0x00010000, 0x00018208,
545 	0x9830, 0xffffffff, 0x00000000,
546 	0x9834, 0xf00fffff, 0x00000400,
547 	0x9838, 0x0002021c, 0x00020200,
548 	0xc78, 0x00000080, 0x00000000,
549 	0xd030, 0x000300c0, 0x00800040,
550 	0xd830, 0x000300c0, 0x00800040,
551 	0x5bb0, 0x000000f0, 0x00000070,
552 	0x5bc0, 0x00200000, 0x50100000,
553 	0x7030, 0x31000311, 0x00000011,
554 	0x2ae4, 0x00073ffe, 0x000022a2,
555 	0x240c, 0x000007ff, 0x00000000,
556 	0x8a14, 0xf000001f, 0x00000007,
557 	0x8b24, 0xffffffff, 0x00ffffff,
558 	0x8b10, 0x0000ff0f, 0x00000000,
559 	0x28a4c, 0x07ffffff, 0x4e000000,
560 	0x28350, 0x3f3f3fff, 0x00000082,
561 	0x30, 0x000000ff, 0x0040,
562 	0x34, 0x00000040, 0x00004040,
563 	0x9100, 0x07ffffff, 0x03000000,
564 	0x9060, 0x0000007f, 0x00000020,
565 	0x9508, 0x00010000, 0x00010000,
566 	0xac14, 0x000003ff, 0x000000f3,
567 	0xac10, 0xffffffff, 0x00000000,
568 	0xac0c, 0xffffffff, 0x00003210,
569 	0x88d4, 0x0000001f, 0x00000010,
570 	0x15c0, 0x000c0fc0, 0x000c0400
571 };
572 
573 static const u32 hainan_golden_registers[] =
574 {
575 	0x9a10, 0x00010000, 0x00018208,
576 	0x9830, 0xffffffff, 0x00000000,
577 	0x9834, 0xf00fffff, 0x00000400,
578 	0x9838, 0x0002021c, 0x00020200,
579 	0xd0c0, 0xff000fff, 0x00000100,
580 	0xd030, 0x000300c0, 0x00800040,
581 	0xd8c0, 0xff000fff, 0x00000100,
582 	0xd830, 0x000300c0, 0x00800040,
583 	0x2ae4, 0x00073ffe, 0x000022a2,
584 	0x240c, 0x000007ff, 0x00000000,
585 	0x8a14, 0xf000001f, 0x00000007,
586 	0x8b24, 0xffffffff, 0x00ffffff,
587 	0x8b10, 0x0000ff0f, 0x00000000,
588 	0x28a4c, 0x07ffffff, 0x4e000000,
589 	0x28350, 0x3f3f3fff, 0x00000000,
590 	0x30, 0x000000ff, 0x0040,
591 	0x34, 0x00000040, 0x00004040,
592 	0x9100, 0x03e00000, 0x03600000,
593 	0x9060, 0x0000007f, 0x00000020,
594 	0x9508, 0x00010000, 0x00010000,
595 	0xac14, 0x000003ff, 0x000000f1,
596 	0xac10, 0xffffffff, 0x00000000,
597 	0xac0c, 0xffffffff, 0x00003210,
598 	0x88d4, 0x0000001f, 0x00000010,
599 	0x15c0, 0x000c0fc0, 0x000c0400
600 };
601 
602 static const u32 hainan_golden_registers2[] =
603 {
604 	0x98f8, 0xffffffff, 0x02010001
605 };
606 
607 static const u32 tahiti_mgcg_cgcg_init[] =
608 {
609 	0xc400, 0xffffffff, 0xfffffffc,
610 	0x802c, 0xffffffff, 0xe0000000,
611 	0x9a60, 0xffffffff, 0x00000100,
612 	0x92a4, 0xffffffff, 0x00000100,
613 	0xc164, 0xffffffff, 0x00000100,
614 	0x9774, 0xffffffff, 0x00000100,
615 	0x8984, 0xffffffff, 0x06000100,
616 	0x8a18, 0xffffffff, 0x00000100,
617 	0x92a0, 0xffffffff, 0x00000100,
618 	0xc380, 0xffffffff, 0x00000100,
619 	0x8b28, 0xffffffff, 0x00000100,
620 	0x9144, 0xffffffff, 0x00000100,
621 	0x8d88, 0xffffffff, 0x00000100,
622 	0x8d8c, 0xffffffff, 0x00000100,
623 	0x9030, 0xffffffff, 0x00000100,
624 	0x9034, 0xffffffff, 0x00000100,
625 	0x9038, 0xffffffff, 0x00000100,
626 	0x903c, 0xffffffff, 0x00000100,
627 	0xad80, 0xffffffff, 0x00000100,
628 	0xac54, 0xffffffff, 0x00000100,
629 	0x897c, 0xffffffff, 0x06000100,
630 	0x9868, 0xffffffff, 0x00000100,
631 	0x9510, 0xffffffff, 0x00000100,
632 	0xaf04, 0xffffffff, 0x00000100,
633 	0xae04, 0xffffffff, 0x00000100,
634 	0x949c, 0xffffffff, 0x00000100,
635 	0x802c, 0xffffffff, 0xe0000000,
636 	0x9160, 0xffffffff, 0x00010000,
637 	0x9164, 0xffffffff, 0x00030002,
638 	0x9168, 0xffffffff, 0x00040007,
639 	0x916c, 0xffffffff, 0x00060005,
640 	0x9170, 0xffffffff, 0x00090008,
641 	0x9174, 0xffffffff, 0x00020001,
642 	0x9178, 0xffffffff, 0x00040003,
643 	0x917c, 0xffffffff, 0x00000007,
644 	0x9180, 0xffffffff, 0x00060005,
645 	0x9184, 0xffffffff, 0x00090008,
646 	0x9188, 0xffffffff, 0x00030002,
647 	0x918c, 0xffffffff, 0x00050004,
648 	0x9190, 0xffffffff, 0x00000008,
649 	0x9194, 0xffffffff, 0x00070006,
650 	0x9198, 0xffffffff, 0x000a0009,
651 	0x919c, 0xffffffff, 0x00040003,
652 	0x91a0, 0xffffffff, 0x00060005,
653 	0x91a4, 0xffffffff, 0x00000009,
654 	0x91a8, 0xffffffff, 0x00080007,
655 	0x91ac, 0xffffffff, 0x000b000a,
656 	0x91b0, 0xffffffff, 0x00050004,
657 	0x91b4, 0xffffffff, 0x00070006,
658 	0x91b8, 0xffffffff, 0x0008000b,
659 	0x91bc, 0xffffffff, 0x000a0009,
660 	0x91c0, 0xffffffff, 0x000d000c,
661 	0x91c4, 0xffffffff, 0x00060005,
662 	0x91c8, 0xffffffff, 0x00080007,
663 	0x91cc, 0xffffffff, 0x0000000b,
664 	0x91d0, 0xffffffff, 0x000a0009,
665 	0x91d4, 0xffffffff, 0x000d000c,
666 	0x91d8, 0xffffffff, 0x00070006,
667 	0x91dc, 0xffffffff, 0x00090008,
668 	0x91e0, 0xffffffff, 0x0000000c,
669 	0x91e4, 0xffffffff, 0x000b000a,
670 	0x91e8, 0xffffffff, 0x000e000d,
671 	0x91ec, 0xffffffff, 0x00080007,
672 	0x91f0, 0xffffffff, 0x000a0009,
673 	0x91f4, 0xffffffff, 0x0000000d,
674 	0x91f8, 0xffffffff, 0x000c000b,
675 	0x91fc, 0xffffffff, 0x000f000e,
676 	0x9200, 0xffffffff, 0x00090008,
677 	0x9204, 0xffffffff, 0x000b000a,
678 	0x9208, 0xffffffff, 0x000c000f,
679 	0x920c, 0xffffffff, 0x000e000d,
680 	0x9210, 0xffffffff, 0x00110010,
681 	0x9214, 0xffffffff, 0x000a0009,
682 	0x9218, 0xffffffff, 0x000c000b,
683 	0x921c, 0xffffffff, 0x0000000f,
684 	0x9220, 0xffffffff, 0x000e000d,
685 	0x9224, 0xffffffff, 0x00110010,
686 	0x9228, 0xffffffff, 0x000b000a,
687 	0x922c, 0xffffffff, 0x000d000c,
688 	0x9230, 0xffffffff, 0x00000010,
689 	0x9234, 0xffffffff, 0x000f000e,
690 	0x9238, 0xffffffff, 0x00120011,
691 	0x923c, 0xffffffff, 0x000c000b,
692 	0x9240, 0xffffffff, 0x000e000d,
693 	0x9244, 0xffffffff, 0x00000011,
694 	0x9248, 0xffffffff, 0x0010000f,
695 	0x924c, 0xffffffff, 0x00130012,
696 	0x9250, 0xffffffff, 0x000d000c,
697 	0x9254, 0xffffffff, 0x000f000e,
698 	0x9258, 0xffffffff, 0x00100013,
699 	0x925c, 0xffffffff, 0x00120011,
700 	0x9260, 0xffffffff, 0x00150014,
701 	0x9264, 0xffffffff, 0x000e000d,
702 	0x9268, 0xffffffff, 0x0010000f,
703 	0x926c, 0xffffffff, 0x00000013,
704 	0x9270, 0xffffffff, 0x00120011,
705 	0x9274, 0xffffffff, 0x00150014,
706 	0x9278, 0xffffffff, 0x000f000e,
707 	0x927c, 0xffffffff, 0x00110010,
708 	0x9280, 0xffffffff, 0x00000014,
709 	0x9284, 0xffffffff, 0x00130012,
710 	0x9288, 0xffffffff, 0x00160015,
711 	0x928c, 0xffffffff, 0x0010000f,
712 	0x9290, 0xffffffff, 0x00120011,
713 	0x9294, 0xffffffff, 0x00000015,
714 	0x9298, 0xffffffff, 0x00140013,
715 	0x929c, 0xffffffff, 0x00170016,
716 	0x9150, 0xffffffff, 0x96940200,
717 	0x8708, 0xffffffff, 0x00900100,
718 	0xc478, 0xffffffff, 0x00000080,
719 	0xc404, 0xffffffff, 0x0020003f,
720 	0x30, 0xffffffff, 0x0000001c,
721 	0x34, 0x000f0000, 0x000f0000,
722 	0x160c, 0xffffffff, 0x00000100,
723 	0x1024, 0xffffffff, 0x00000100,
724 	0x102c, 0x00000101, 0x00000000,
725 	0x20a8, 0xffffffff, 0x00000104,
726 	0x264c, 0x000c0000, 0x000c0000,
727 	0x2648, 0x000c0000, 0x000c0000,
728 	0x55e4, 0xff000fff, 0x00000100,
729 	0x55e8, 0x00000001, 0x00000001,
730 	0x2f50, 0x00000001, 0x00000001,
731 	0x30cc, 0xc0000fff, 0x00000104,
732 	0xc1e4, 0x00000001, 0x00000001,
733 	0xd0c0, 0xfffffff0, 0x00000100,
734 	0xd8c0, 0xfffffff0, 0x00000100
735 };
736 
737 static const u32 pitcairn_mgcg_cgcg_init[] =
738 {
739 	0xc400, 0xffffffff, 0xfffffffc,
740 	0x802c, 0xffffffff, 0xe0000000,
741 	0x9a60, 0xffffffff, 0x00000100,
742 	0x92a4, 0xffffffff, 0x00000100,
743 	0xc164, 0xffffffff, 0x00000100,
744 	0x9774, 0xffffffff, 0x00000100,
745 	0x8984, 0xffffffff, 0x06000100,
746 	0x8a18, 0xffffffff, 0x00000100,
747 	0x92a0, 0xffffffff, 0x00000100,
748 	0xc380, 0xffffffff, 0x00000100,
749 	0x8b28, 0xffffffff, 0x00000100,
750 	0x9144, 0xffffffff, 0x00000100,
751 	0x8d88, 0xffffffff, 0x00000100,
752 	0x8d8c, 0xffffffff, 0x00000100,
753 	0x9030, 0xffffffff, 0x00000100,
754 	0x9034, 0xffffffff, 0x00000100,
755 	0x9038, 0xffffffff, 0x00000100,
756 	0x903c, 0xffffffff, 0x00000100,
757 	0xad80, 0xffffffff, 0x00000100,
758 	0xac54, 0xffffffff, 0x00000100,
759 	0x897c, 0xffffffff, 0x06000100,
760 	0x9868, 0xffffffff, 0x00000100,
761 	0x9510, 0xffffffff, 0x00000100,
762 	0xaf04, 0xffffffff, 0x00000100,
763 	0xae04, 0xffffffff, 0x00000100,
764 	0x949c, 0xffffffff, 0x00000100,
765 	0x802c, 0xffffffff, 0xe0000000,
766 	0x9160, 0xffffffff, 0x00010000,
767 	0x9164, 0xffffffff, 0x00030002,
768 	0x9168, 0xffffffff, 0x00040007,
769 	0x916c, 0xffffffff, 0x00060005,
770 	0x9170, 0xffffffff, 0x00090008,
771 	0x9174, 0xffffffff, 0x00020001,
772 	0x9178, 0xffffffff, 0x00040003,
773 	0x917c, 0xffffffff, 0x00000007,
774 	0x9180, 0xffffffff, 0x00060005,
775 	0x9184, 0xffffffff, 0x00090008,
776 	0x9188, 0xffffffff, 0x00030002,
777 	0x918c, 0xffffffff, 0x00050004,
778 	0x9190, 0xffffffff, 0x00000008,
779 	0x9194, 0xffffffff, 0x00070006,
780 	0x9198, 0xffffffff, 0x000a0009,
781 	0x919c, 0xffffffff, 0x00040003,
782 	0x91a0, 0xffffffff, 0x00060005,
783 	0x91a4, 0xffffffff, 0x00000009,
784 	0x91a8, 0xffffffff, 0x00080007,
785 	0x91ac, 0xffffffff, 0x000b000a,
786 	0x91b0, 0xffffffff, 0x00050004,
787 	0x91b4, 0xffffffff, 0x00070006,
788 	0x91b8, 0xffffffff, 0x0008000b,
789 	0x91bc, 0xffffffff, 0x000a0009,
790 	0x91c0, 0xffffffff, 0x000d000c,
791 	0x9200, 0xffffffff, 0x00090008,
792 	0x9204, 0xffffffff, 0x000b000a,
793 	0x9208, 0xffffffff, 0x000c000f,
794 	0x920c, 0xffffffff, 0x000e000d,
795 	0x9210, 0xffffffff, 0x00110010,
796 	0x9214, 0xffffffff, 0x000a0009,
797 	0x9218, 0xffffffff, 0x000c000b,
798 	0x921c, 0xffffffff, 0x0000000f,
799 	0x9220, 0xffffffff, 0x000e000d,
800 	0x9224, 0xffffffff, 0x00110010,
801 	0x9228, 0xffffffff, 0x000b000a,
802 	0x922c, 0xffffffff, 0x000d000c,
803 	0x9230, 0xffffffff, 0x00000010,
804 	0x9234, 0xffffffff, 0x000f000e,
805 	0x9238, 0xffffffff, 0x00120011,
806 	0x923c, 0xffffffff, 0x000c000b,
807 	0x9240, 0xffffffff, 0x000e000d,
808 	0x9244, 0xffffffff, 0x00000011,
809 	0x9248, 0xffffffff, 0x0010000f,
810 	0x924c, 0xffffffff, 0x00130012,
811 	0x9250, 0xffffffff, 0x000d000c,
812 	0x9254, 0xffffffff, 0x000f000e,
813 	0x9258, 0xffffffff, 0x00100013,
814 	0x925c, 0xffffffff, 0x00120011,
815 	0x9260, 0xffffffff, 0x00150014,
816 	0x9150, 0xffffffff, 0x96940200,
817 	0x8708, 0xffffffff, 0x00900100,
818 	0xc478, 0xffffffff, 0x00000080,
819 	0xc404, 0xffffffff, 0x0020003f,
820 	0x30, 0xffffffff, 0x0000001c,
821 	0x34, 0x000f0000, 0x000f0000,
822 	0x160c, 0xffffffff, 0x00000100,
823 	0x1024, 0xffffffff, 0x00000100,
824 	0x102c, 0x00000101, 0x00000000,
825 	0x20a8, 0xffffffff, 0x00000104,
826 	0x55e4, 0xff000fff, 0x00000100,
827 	0x55e8, 0x00000001, 0x00000001,
828 	0x2f50, 0x00000001, 0x00000001,
829 	0x30cc, 0xc0000fff, 0x00000104,
830 	0xc1e4, 0x00000001, 0x00000001,
831 	0xd0c0, 0xfffffff0, 0x00000100,
832 	0xd8c0, 0xfffffff0, 0x00000100
833 };
834 
835 static const u32 verde_mgcg_cgcg_init[] =
836 {
837 	0xc400, 0xffffffff, 0xfffffffc,
838 	0x802c, 0xffffffff, 0xe0000000,
839 	0x9a60, 0xffffffff, 0x00000100,
840 	0x92a4, 0xffffffff, 0x00000100,
841 	0xc164, 0xffffffff, 0x00000100,
842 	0x9774, 0xffffffff, 0x00000100,
843 	0x8984, 0xffffffff, 0x06000100,
844 	0x8a18, 0xffffffff, 0x00000100,
845 	0x92a0, 0xffffffff, 0x00000100,
846 	0xc380, 0xffffffff, 0x00000100,
847 	0x8b28, 0xffffffff, 0x00000100,
848 	0x9144, 0xffffffff, 0x00000100,
849 	0x8d88, 0xffffffff, 0x00000100,
850 	0x8d8c, 0xffffffff, 0x00000100,
851 	0x9030, 0xffffffff, 0x00000100,
852 	0x9034, 0xffffffff, 0x00000100,
853 	0x9038, 0xffffffff, 0x00000100,
854 	0x903c, 0xffffffff, 0x00000100,
855 	0xad80, 0xffffffff, 0x00000100,
856 	0xac54, 0xffffffff, 0x00000100,
857 	0x897c, 0xffffffff, 0x06000100,
858 	0x9868, 0xffffffff, 0x00000100,
859 	0x9510, 0xffffffff, 0x00000100,
860 	0xaf04, 0xffffffff, 0x00000100,
861 	0xae04, 0xffffffff, 0x00000100,
862 	0x949c, 0xffffffff, 0x00000100,
863 	0x802c, 0xffffffff, 0xe0000000,
864 	0x9160, 0xffffffff, 0x00010000,
865 	0x9164, 0xffffffff, 0x00030002,
866 	0x9168, 0xffffffff, 0x00040007,
867 	0x916c, 0xffffffff, 0x00060005,
868 	0x9170, 0xffffffff, 0x00090008,
869 	0x9174, 0xffffffff, 0x00020001,
870 	0x9178, 0xffffffff, 0x00040003,
871 	0x917c, 0xffffffff, 0x00000007,
872 	0x9180, 0xffffffff, 0x00060005,
873 	0x9184, 0xffffffff, 0x00090008,
874 	0x9188, 0xffffffff, 0x00030002,
875 	0x918c, 0xffffffff, 0x00050004,
876 	0x9190, 0xffffffff, 0x00000008,
877 	0x9194, 0xffffffff, 0x00070006,
878 	0x9198, 0xffffffff, 0x000a0009,
879 	0x919c, 0xffffffff, 0x00040003,
880 	0x91a0, 0xffffffff, 0x00060005,
881 	0x91a4, 0xffffffff, 0x00000009,
882 	0x91a8, 0xffffffff, 0x00080007,
883 	0x91ac, 0xffffffff, 0x000b000a,
884 	0x91b0, 0xffffffff, 0x00050004,
885 	0x91b4, 0xffffffff, 0x00070006,
886 	0x91b8, 0xffffffff, 0x0008000b,
887 	0x91bc, 0xffffffff, 0x000a0009,
888 	0x91c0, 0xffffffff, 0x000d000c,
889 	0x9200, 0xffffffff, 0x00090008,
890 	0x9204, 0xffffffff, 0x000b000a,
891 	0x9208, 0xffffffff, 0x000c000f,
892 	0x920c, 0xffffffff, 0x000e000d,
893 	0x9210, 0xffffffff, 0x00110010,
894 	0x9214, 0xffffffff, 0x000a0009,
895 	0x9218, 0xffffffff, 0x000c000b,
896 	0x921c, 0xffffffff, 0x0000000f,
897 	0x9220, 0xffffffff, 0x000e000d,
898 	0x9224, 0xffffffff, 0x00110010,
899 	0x9228, 0xffffffff, 0x000b000a,
900 	0x922c, 0xffffffff, 0x000d000c,
901 	0x9230, 0xffffffff, 0x00000010,
902 	0x9234, 0xffffffff, 0x000f000e,
903 	0x9238, 0xffffffff, 0x00120011,
904 	0x923c, 0xffffffff, 0x000c000b,
905 	0x9240, 0xffffffff, 0x000e000d,
906 	0x9244, 0xffffffff, 0x00000011,
907 	0x9248, 0xffffffff, 0x0010000f,
908 	0x924c, 0xffffffff, 0x00130012,
909 	0x9250, 0xffffffff, 0x000d000c,
910 	0x9254, 0xffffffff, 0x000f000e,
911 	0x9258, 0xffffffff, 0x00100013,
912 	0x925c, 0xffffffff, 0x00120011,
913 	0x9260, 0xffffffff, 0x00150014,
914 	0x9150, 0xffffffff, 0x96940200,
915 	0x8708, 0xffffffff, 0x00900100,
916 	0xc478, 0xffffffff, 0x00000080,
917 	0xc404, 0xffffffff, 0x0020003f,
918 	0x30, 0xffffffff, 0x0000001c,
919 	0x34, 0x000f0000, 0x000f0000,
920 	0x160c, 0xffffffff, 0x00000100,
921 	0x1024, 0xffffffff, 0x00000100,
922 	0x102c, 0x00000101, 0x00000000,
923 	0x20a8, 0xffffffff, 0x00000104,
924 	0x264c, 0x000c0000, 0x000c0000,
925 	0x2648, 0x000c0000, 0x000c0000,
926 	0x55e4, 0xff000fff, 0x00000100,
927 	0x55e8, 0x00000001, 0x00000001,
928 	0x2f50, 0x00000001, 0x00000001,
929 	0x30cc, 0xc0000fff, 0x00000104,
930 	0xc1e4, 0x00000001, 0x00000001,
931 	0xd0c0, 0xfffffff0, 0x00000100,
932 	0xd8c0, 0xfffffff0, 0x00000100
933 };
934 
935 static const u32 oland_mgcg_cgcg_init[] =
936 {
937 	0xc400, 0xffffffff, 0xfffffffc,
938 	0x802c, 0xffffffff, 0xe0000000,
939 	0x9a60, 0xffffffff, 0x00000100,
940 	0x92a4, 0xffffffff, 0x00000100,
941 	0xc164, 0xffffffff, 0x00000100,
942 	0x9774, 0xffffffff, 0x00000100,
943 	0x8984, 0xffffffff, 0x06000100,
944 	0x8a18, 0xffffffff, 0x00000100,
945 	0x92a0, 0xffffffff, 0x00000100,
946 	0xc380, 0xffffffff, 0x00000100,
947 	0x8b28, 0xffffffff, 0x00000100,
948 	0x9144, 0xffffffff, 0x00000100,
949 	0x8d88, 0xffffffff, 0x00000100,
950 	0x8d8c, 0xffffffff, 0x00000100,
951 	0x9030, 0xffffffff, 0x00000100,
952 	0x9034, 0xffffffff, 0x00000100,
953 	0x9038, 0xffffffff, 0x00000100,
954 	0x903c, 0xffffffff, 0x00000100,
955 	0xad80, 0xffffffff, 0x00000100,
956 	0xac54, 0xffffffff, 0x00000100,
957 	0x897c, 0xffffffff, 0x06000100,
958 	0x9868, 0xffffffff, 0x00000100,
959 	0x9510, 0xffffffff, 0x00000100,
960 	0xaf04, 0xffffffff, 0x00000100,
961 	0xae04, 0xffffffff, 0x00000100,
962 	0x949c, 0xffffffff, 0x00000100,
963 	0x802c, 0xffffffff, 0xe0000000,
964 	0x9160, 0xffffffff, 0x00010000,
965 	0x9164, 0xffffffff, 0x00030002,
966 	0x9168, 0xffffffff, 0x00040007,
967 	0x916c, 0xffffffff, 0x00060005,
968 	0x9170, 0xffffffff, 0x00090008,
969 	0x9174, 0xffffffff, 0x00020001,
970 	0x9178, 0xffffffff, 0x00040003,
971 	0x917c, 0xffffffff, 0x00000007,
972 	0x9180, 0xffffffff, 0x00060005,
973 	0x9184, 0xffffffff, 0x00090008,
974 	0x9188, 0xffffffff, 0x00030002,
975 	0x918c, 0xffffffff, 0x00050004,
976 	0x9190, 0xffffffff, 0x00000008,
977 	0x9194, 0xffffffff, 0x00070006,
978 	0x9198, 0xffffffff, 0x000a0009,
979 	0x919c, 0xffffffff, 0x00040003,
980 	0x91a0, 0xffffffff, 0x00060005,
981 	0x91a4, 0xffffffff, 0x00000009,
982 	0x91a8, 0xffffffff, 0x00080007,
983 	0x91ac, 0xffffffff, 0x000b000a,
984 	0x91b0, 0xffffffff, 0x00050004,
985 	0x91b4, 0xffffffff, 0x00070006,
986 	0x91b8, 0xffffffff, 0x0008000b,
987 	0x91bc, 0xffffffff, 0x000a0009,
988 	0x91c0, 0xffffffff, 0x000d000c,
989 	0x91c4, 0xffffffff, 0x00060005,
990 	0x91c8, 0xffffffff, 0x00080007,
991 	0x91cc, 0xffffffff, 0x0000000b,
992 	0x91d0, 0xffffffff, 0x000a0009,
993 	0x91d4, 0xffffffff, 0x000d000c,
994 	0x9150, 0xffffffff, 0x96940200,
995 	0x8708, 0xffffffff, 0x00900100,
996 	0xc478, 0xffffffff, 0x00000080,
997 	0xc404, 0xffffffff, 0x0020003f,
998 	0x30, 0xffffffff, 0x0000001c,
999 	0x34, 0x000f0000, 0x000f0000,
1000 	0x160c, 0xffffffff, 0x00000100,
1001 	0x1024, 0xffffffff, 0x00000100,
1002 	0x102c, 0x00000101, 0x00000000,
1003 	0x20a8, 0xffffffff, 0x00000104,
1004 	0x264c, 0x000c0000, 0x000c0000,
1005 	0x2648, 0x000c0000, 0x000c0000,
1006 	0x55e4, 0xff000fff, 0x00000100,
1007 	0x55e8, 0x00000001, 0x00000001,
1008 	0x2f50, 0x00000001, 0x00000001,
1009 	0x30cc, 0xc0000fff, 0x00000104,
1010 	0xc1e4, 0x00000001, 0x00000001,
1011 	0xd0c0, 0xfffffff0, 0x00000100,
1012 	0xd8c0, 0xfffffff0, 0x00000100
1013 };
1014 
1015 static const u32 hainan_mgcg_cgcg_init[] =
1016 {
1017 	0xc400, 0xffffffff, 0xfffffffc,
1018 	0x802c, 0xffffffff, 0xe0000000,
1019 	0x9a60, 0xffffffff, 0x00000100,
1020 	0x92a4, 0xffffffff, 0x00000100,
1021 	0xc164, 0xffffffff, 0x00000100,
1022 	0x9774, 0xffffffff, 0x00000100,
1023 	0x8984, 0xffffffff, 0x06000100,
1024 	0x8a18, 0xffffffff, 0x00000100,
1025 	0x92a0, 0xffffffff, 0x00000100,
1026 	0xc380, 0xffffffff, 0x00000100,
1027 	0x8b28, 0xffffffff, 0x00000100,
1028 	0x9144, 0xffffffff, 0x00000100,
1029 	0x8d88, 0xffffffff, 0x00000100,
1030 	0x8d8c, 0xffffffff, 0x00000100,
1031 	0x9030, 0xffffffff, 0x00000100,
1032 	0x9034, 0xffffffff, 0x00000100,
1033 	0x9038, 0xffffffff, 0x00000100,
1034 	0x903c, 0xffffffff, 0x00000100,
1035 	0xad80, 0xffffffff, 0x00000100,
1036 	0xac54, 0xffffffff, 0x00000100,
1037 	0x897c, 0xffffffff, 0x06000100,
1038 	0x9868, 0xffffffff, 0x00000100,
1039 	0x9510, 0xffffffff, 0x00000100,
1040 	0xaf04, 0xffffffff, 0x00000100,
1041 	0xae04, 0xffffffff, 0x00000100,
1042 	0x949c, 0xffffffff, 0x00000100,
1043 	0x802c, 0xffffffff, 0xe0000000,
1044 	0x9160, 0xffffffff, 0x00010000,
1045 	0x9164, 0xffffffff, 0x00030002,
1046 	0x9168, 0xffffffff, 0x00040007,
1047 	0x916c, 0xffffffff, 0x00060005,
1048 	0x9170, 0xffffffff, 0x00090008,
1049 	0x9174, 0xffffffff, 0x00020001,
1050 	0x9178, 0xffffffff, 0x00040003,
1051 	0x917c, 0xffffffff, 0x00000007,
1052 	0x9180, 0xffffffff, 0x00060005,
1053 	0x9184, 0xffffffff, 0x00090008,
1054 	0x9188, 0xffffffff, 0x00030002,
1055 	0x918c, 0xffffffff, 0x00050004,
1056 	0x9190, 0xffffffff, 0x00000008,
1057 	0x9194, 0xffffffff, 0x00070006,
1058 	0x9198, 0xffffffff, 0x000a0009,
1059 	0x919c, 0xffffffff, 0x00040003,
1060 	0x91a0, 0xffffffff, 0x00060005,
1061 	0x91a4, 0xffffffff, 0x00000009,
1062 	0x91a8, 0xffffffff, 0x00080007,
1063 	0x91ac, 0xffffffff, 0x000b000a,
1064 	0x91b0, 0xffffffff, 0x00050004,
1065 	0x91b4, 0xffffffff, 0x00070006,
1066 	0x91b8, 0xffffffff, 0x0008000b,
1067 	0x91bc, 0xffffffff, 0x000a0009,
1068 	0x91c0, 0xffffffff, 0x000d000c,
1069 	0x91c4, 0xffffffff, 0x00060005,
1070 	0x91c8, 0xffffffff, 0x00080007,
1071 	0x91cc, 0xffffffff, 0x0000000b,
1072 	0x91d0, 0xffffffff, 0x000a0009,
1073 	0x91d4, 0xffffffff, 0x000d000c,
1074 	0x9150, 0xffffffff, 0x96940200,
1075 	0x8708, 0xffffffff, 0x00900100,
1076 	0xc478, 0xffffffff, 0x00000080,
1077 	0xc404, 0xffffffff, 0x0020003f,
1078 	0x30, 0xffffffff, 0x0000001c,
1079 	0x34, 0x000f0000, 0x000f0000,
1080 	0x160c, 0xffffffff, 0x00000100,
1081 	0x1024, 0xffffffff, 0x00000100,
1082 	0x20a8, 0xffffffff, 0x00000104,
1083 	0x264c, 0x000c0000, 0x000c0000,
1084 	0x2648, 0x000c0000, 0x000c0000,
1085 	0x2f50, 0x00000001, 0x00000001,
1086 	0x30cc, 0xc0000fff, 0x00000104,
1087 	0xc1e4, 0x00000001, 0x00000001,
1088 	0xd0c0, 0xfffffff0, 0x00000100,
1089 	0xd8c0, 0xfffffff0, 0x00000100
1090 };
1091 
1092 static u32 verde_pg_init[] =
1093 {
1094 	0x353c, 0xffffffff, 0x40000,
1095 	0x3538, 0xffffffff, 0x200010ff,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x7007,
1102 	0x3538, 0xffffffff, 0x300010ff,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x0,
1108 	0x353c, 0xffffffff, 0x400000,
1109 	0x3538, 0xffffffff, 0x100010ff,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x353c, 0xffffffff, 0x120200,
1116 	0x3538, 0xffffffff, 0x500010ff,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x0,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x353c, 0xffffffff, 0x0,
1122 	0x353c, 0xffffffff, 0x1e1e16,
1123 	0x3538, 0xffffffff, 0x600010ff,
1124 	0x353c, 0xffffffff, 0x0,
1125 	0x353c, 0xffffffff, 0x0,
1126 	0x353c, 0xffffffff, 0x0,
1127 	0x353c, 0xffffffff, 0x0,
1128 	0x353c, 0xffffffff, 0x0,
1129 	0x353c, 0xffffffff, 0x171f1e,
1130 	0x3538, 0xffffffff, 0x700010ff,
1131 	0x353c, 0xffffffff, 0x0,
1132 	0x353c, 0xffffffff, 0x0,
1133 	0x353c, 0xffffffff, 0x0,
1134 	0x353c, 0xffffffff, 0x0,
1135 	0x353c, 0xffffffff, 0x0,
1136 	0x353c, 0xffffffff, 0x0,
1137 	0x3538, 0xffffffff, 0x9ff,
1138 	0x3500, 0xffffffff, 0x0,
1139 	0x3504, 0xffffffff, 0x10000800,
1140 	0x3504, 0xffffffff, 0xf,
1141 	0x3504, 0xffffffff, 0xf,
1142 	0x3500, 0xffffffff, 0x4,
1143 	0x3504, 0xffffffff, 0x1000051e,
1144 	0x3504, 0xffffffff, 0xffff,
1145 	0x3504, 0xffffffff, 0xffff,
1146 	0x3500, 0xffffffff, 0x8,
1147 	0x3504, 0xffffffff, 0x80500,
1148 	0x3500, 0xffffffff, 0x12,
1149 	0x3504, 0xffffffff, 0x9050c,
1150 	0x3500, 0xffffffff, 0x1d,
1151 	0x3504, 0xffffffff, 0xb052c,
1152 	0x3500, 0xffffffff, 0x2a,
1153 	0x3504, 0xffffffff, 0x1053e,
1154 	0x3500, 0xffffffff, 0x2d,
1155 	0x3504, 0xffffffff, 0x10546,
1156 	0x3500, 0xffffffff, 0x30,
1157 	0x3504, 0xffffffff, 0xa054e,
1158 	0x3500, 0xffffffff, 0x3c,
1159 	0x3504, 0xffffffff, 0x1055f,
1160 	0x3500, 0xffffffff, 0x3f,
1161 	0x3504, 0xffffffff, 0x10567,
1162 	0x3500, 0xffffffff, 0x42,
1163 	0x3504, 0xffffffff, 0x1056f,
1164 	0x3500, 0xffffffff, 0x45,
1165 	0x3504, 0xffffffff, 0x10572,
1166 	0x3500, 0xffffffff, 0x48,
1167 	0x3504, 0xffffffff, 0x20575,
1168 	0x3500, 0xffffffff, 0x4c,
1169 	0x3504, 0xffffffff, 0x190801,
1170 	0x3500, 0xffffffff, 0x67,
1171 	0x3504, 0xffffffff, 0x1082a,
1172 	0x3500, 0xffffffff, 0x6a,
1173 	0x3504, 0xffffffff, 0x1b082d,
1174 	0x3500, 0xffffffff, 0x87,
1175 	0x3504, 0xffffffff, 0x310851,
1176 	0x3500, 0xffffffff, 0xba,
1177 	0x3504, 0xffffffff, 0x891,
1178 	0x3500, 0xffffffff, 0xbc,
1179 	0x3504, 0xffffffff, 0x893,
1180 	0x3500, 0xffffffff, 0xbe,
1181 	0x3504, 0xffffffff, 0x20895,
1182 	0x3500, 0xffffffff, 0xc2,
1183 	0x3504, 0xffffffff, 0x20899,
1184 	0x3500, 0xffffffff, 0xc6,
1185 	0x3504, 0xffffffff, 0x2089d,
1186 	0x3500, 0xffffffff, 0xca,
1187 	0x3504, 0xffffffff, 0x8a1,
1188 	0x3500, 0xffffffff, 0xcc,
1189 	0x3504, 0xffffffff, 0x8a3,
1190 	0x3500, 0xffffffff, 0xce,
1191 	0x3504, 0xffffffff, 0x308a5,
1192 	0x3500, 0xffffffff, 0xd3,
1193 	0x3504, 0xffffffff, 0x6d08cd,
1194 	0x3500, 0xffffffff, 0x142,
1195 	0x3504, 0xffffffff, 0x2000095a,
1196 	0x3504, 0xffffffff, 0x1,
1197 	0x3500, 0xffffffff, 0x144,
1198 	0x3504, 0xffffffff, 0x301f095b,
1199 	0x3500, 0xffffffff, 0x165,
1200 	0x3504, 0xffffffff, 0xc094d,
1201 	0x3500, 0xffffffff, 0x173,
1202 	0x3504, 0xffffffff, 0xf096d,
1203 	0x3500, 0xffffffff, 0x184,
1204 	0x3504, 0xffffffff, 0x15097f,
1205 	0x3500, 0xffffffff, 0x19b,
1206 	0x3504, 0xffffffff, 0xc0998,
1207 	0x3500, 0xffffffff, 0x1a9,
1208 	0x3504, 0xffffffff, 0x409a7,
1209 	0x3500, 0xffffffff, 0x1af,
1210 	0x3504, 0xffffffff, 0xcdc,
1211 	0x3500, 0xffffffff, 0x1b1,
1212 	0x3504, 0xffffffff, 0x800,
1213 	0x3508, 0xffffffff, 0x6c9b2000,
1214 	0x3510, 0xfc00, 0x2000,
1215 	0x3544, 0xffffffff, 0xfc0,
1216 	0x28d4, 0x00000100, 0x100
1217 };
1218 
si_init_golden_registers(struct radeon_device * rdev)1219 static void si_init_golden_registers(struct radeon_device *rdev)
1220 {
1221 	switch (rdev->family) {
1222 	case CHIP_TAHITI:
1223 		radeon_program_register_sequence(rdev,
1224 						 tahiti_golden_registers,
1225 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1226 		radeon_program_register_sequence(rdev,
1227 						 tahiti_golden_rlc_registers,
1228 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1229 		radeon_program_register_sequence(rdev,
1230 						 tahiti_mgcg_cgcg_init,
1231 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1232 		radeon_program_register_sequence(rdev,
1233 						 tahiti_golden_registers2,
1234 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1235 		break;
1236 	case CHIP_PITCAIRN:
1237 		radeon_program_register_sequence(rdev,
1238 						 pitcairn_golden_registers,
1239 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1240 		radeon_program_register_sequence(rdev,
1241 						 pitcairn_golden_rlc_registers,
1242 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1243 		radeon_program_register_sequence(rdev,
1244 						 pitcairn_mgcg_cgcg_init,
1245 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1246 		break;
1247 	case CHIP_VERDE:
1248 		radeon_program_register_sequence(rdev,
1249 						 verde_golden_registers,
1250 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1251 		radeon_program_register_sequence(rdev,
1252 						 verde_golden_rlc_registers,
1253 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1254 		radeon_program_register_sequence(rdev,
1255 						 verde_mgcg_cgcg_init,
1256 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1257 		radeon_program_register_sequence(rdev,
1258 						 verde_pg_init,
1259 						 (const u32)ARRAY_SIZE(verde_pg_init));
1260 		break;
1261 	case CHIP_OLAND:
1262 		radeon_program_register_sequence(rdev,
1263 						 oland_golden_registers,
1264 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1265 		radeon_program_register_sequence(rdev,
1266 						 oland_golden_rlc_registers,
1267 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1268 		radeon_program_register_sequence(rdev,
1269 						 oland_mgcg_cgcg_init,
1270 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1271 		break;
1272 	case CHIP_HAINAN:
1273 		radeon_program_register_sequence(rdev,
1274 						 hainan_golden_registers,
1275 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1276 		radeon_program_register_sequence(rdev,
1277 						 hainan_golden_registers2,
1278 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1279 		radeon_program_register_sequence(rdev,
1280 						 hainan_mgcg_cgcg_init,
1281 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1282 		break;
1283 	default:
1284 		break;
1285 	}
1286 }
1287 
1288 /**
1289  * si_get_allowed_info_register - fetch the register for the info ioctl
1290  *
1291  * @rdev: radeon_device pointer
1292  * @reg: register offset in bytes
1293  * @val: register value
1294  *
1295  * Returns 0 for success or -EINVAL for an invalid register
1296  *
1297  */
si_get_allowed_info_register(struct radeon_device * rdev,u32 reg,u32 * val)1298 int si_get_allowed_info_register(struct radeon_device *rdev,
1299 				 u32 reg, u32 *val)
1300 {
1301 	switch (reg) {
1302 	case GRBM_STATUS:
1303 	case GRBM_STATUS2:
1304 	case GRBM_STATUS_SE0:
1305 	case GRBM_STATUS_SE1:
1306 	case SRBM_STATUS:
1307 	case SRBM_STATUS2:
1308 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1309 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1310 	case UVD_STATUS:
1311 		*val = RREG32(reg);
1312 		return 0;
1313 	default:
1314 		return -EINVAL;
1315 	}
1316 }
1317 
1318 #define PCIE_BUS_CLK                10000
1319 #define TCLK                        (PCIE_BUS_CLK / 10)
1320 
1321 /**
1322  * si_get_xclk - get the xclk
1323  *
1324  * @rdev: radeon_device pointer
1325  *
1326  * Returns the reference clock used by the gfx engine
1327  * (SI).
1328  */
si_get_xclk(struct radeon_device * rdev)1329 u32 si_get_xclk(struct radeon_device *rdev)
1330 {
1331 	u32 reference_clock = rdev->clock.spll.reference_freq;
1332 	u32 tmp;
1333 
1334 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1335 	if (tmp & MUX_TCLK_TO_XCLK)
1336 		return TCLK;
1337 
1338 	tmp = RREG32(CG_CLKPIN_CNTL);
1339 	if (tmp & XTALIN_DIVIDE)
1340 		return reference_clock / 4;
1341 
1342 	return reference_clock;
1343 }
1344 
1345 /* get temperature in millidegrees */
si_get_temp(struct radeon_device * rdev)1346 int si_get_temp(struct radeon_device *rdev)
1347 {
1348 	u32 temp;
1349 	int actual_temp = 0;
1350 
1351 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1352 		CTF_TEMP_SHIFT;
1353 
1354 	if (temp & 0x200)
1355 		actual_temp = 255;
1356 	else
1357 		actual_temp = temp & 0x1ff;
1358 
1359 	actual_temp = (actual_temp * 1000);
1360 
1361 	return actual_temp;
1362 }
1363 
1364 #define TAHITI_IO_MC_REGS_SIZE 36
1365 
1366 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1367 	{0x0000006f, 0x03044000},
1368 	{0x00000070, 0x0480c018},
1369 	{0x00000071, 0x00000040},
1370 	{0x00000072, 0x01000000},
1371 	{0x00000074, 0x000000ff},
1372 	{0x00000075, 0x00143400},
1373 	{0x00000076, 0x08ec0800},
1374 	{0x00000077, 0x040000cc},
1375 	{0x00000079, 0x00000000},
1376 	{0x0000007a, 0x21000409},
1377 	{0x0000007c, 0x00000000},
1378 	{0x0000007d, 0xe8000000},
1379 	{0x0000007e, 0x044408a8},
1380 	{0x0000007f, 0x00000003},
1381 	{0x00000080, 0x00000000},
1382 	{0x00000081, 0x01000000},
1383 	{0x00000082, 0x02000000},
1384 	{0x00000083, 0x00000000},
1385 	{0x00000084, 0xe3f3e4f4},
1386 	{0x00000085, 0x00052024},
1387 	{0x00000087, 0x00000000},
1388 	{0x00000088, 0x66036603},
1389 	{0x00000089, 0x01000000},
1390 	{0x0000008b, 0x1c0a0000},
1391 	{0x0000008c, 0xff010000},
1392 	{0x0000008e, 0xffffefff},
1393 	{0x0000008f, 0xfff3efff},
1394 	{0x00000090, 0xfff3efbf},
1395 	{0x00000094, 0x00101101},
1396 	{0x00000095, 0x00000fff},
1397 	{0x00000096, 0x00116fff},
1398 	{0x00000097, 0x60010000},
1399 	{0x00000098, 0x10010000},
1400 	{0x00000099, 0x00006000},
1401 	{0x0000009a, 0x00001000},
1402 	{0x0000009f, 0x00a77400}
1403 };
1404 
1405 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1406 	{0x0000006f, 0x03044000},
1407 	{0x00000070, 0x0480c018},
1408 	{0x00000071, 0x00000040},
1409 	{0x00000072, 0x01000000},
1410 	{0x00000074, 0x000000ff},
1411 	{0x00000075, 0x00143400},
1412 	{0x00000076, 0x08ec0800},
1413 	{0x00000077, 0x040000cc},
1414 	{0x00000079, 0x00000000},
1415 	{0x0000007a, 0x21000409},
1416 	{0x0000007c, 0x00000000},
1417 	{0x0000007d, 0xe8000000},
1418 	{0x0000007e, 0x044408a8},
1419 	{0x0000007f, 0x00000003},
1420 	{0x00000080, 0x00000000},
1421 	{0x00000081, 0x01000000},
1422 	{0x00000082, 0x02000000},
1423 	{0x00000083, 0x00000000},
1424 	{0x00000084, 0xe3f3e4f4},
1425 	{0x00000085, 0x00052024},
1426 	{0x00000087, 0x00000000},
1427 	{0x00000088, 0x66036603},
1428 	{0x00000089, 0x01000000},
1429 	{0x0000008b, 0x1c0a0000},
1430 	{0x0000008c, 0xff010000},
1431 	{0x0000008e, 0xffffefff},
1432 	{0x0000008f, 0xfff3efff},
1433 	{0x00000090, 0xfff3efbf},
1434 	{0x00000094, 0x00101101},
1435 	{0x00000095, 0x00000fff},
1436 	{0x00000096, 0x00116fff},
1437 	{0x00000097, 0x60010000},
1438 	{0x00000098, 0x10010000},
1439 	{0x00000099, 0x00006000},
1440 	{0x0000009a, 0x00001000},
1441 	{0x0000009f, 0x00a47400}
1442 };
1443 
1444 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1445 	{0x0000006f, 0x03044000},
1446 	{0x00000070, 0x0480c018},
1447 	{0x00000071, 0x00000040},
1448 	{0x00000072, 0x01000000},
1449 	{0x00000074, 0x000000ff},
1450 	{0x00000075, 0x00143400},
1451 	{0x00000076, 0x08ec0800},
1452 	{0x00000077, 0x040000cc},
1453 	{0x00000079, 0x00000000},
1454 	{0x0000007a, 0x21000409},
1455 	{0x0000007c, 0x00000000},
1456 	{0x0000007d, 0xe8000000},
1457 	{0x0000007e, 0x044408a8},
1458 	{0x0000007f, 0x00000003},
1459 	{0x00000080, 0x00000000},
1460 	{0x00000081, 0x01000000},
1461 	{0x00000082, 0x02000000},
1462 	{0x00000083, 0x00000000},
1463 	{0x00000084, 0xe3f3e4f4},
1464 	{0x00000085, 0x00052024},
1465 	{0x00000087, 0x00000000},
1466 	{0x00000088, 0x66036603},
1467 	{0x00000089, 0x01000000},
1468 	{0x0000008b, 0x1c0a0000},
1469 	{0x0000008c, 0xff010000},
1470 	{0x0000008e, 0xffffefff},
1471 	{0x0000008f, 0xfff3efff},
1472 	{0x00000090, 0xfff3efbf},
1473 	{0x00000094, 0x00101101},
1474 	{0x00000095, 0x00000fff},
1475 	{0x00000096, 0x00116fff},
1476 	{0x00000097, 0x60010000},
1477 	{0x00000098, 0x10010000},
1478 	{0x00000099, 0x00006000},
1479 	{0x0000009a, 0x00001000},
1480 	{0x0000009f, 0x00a37400}
1481 };
1482 
1483 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1484 	{0x0000006f, 0x03044000},
1485 	{0x00000070, 0x0480c018},
1486 	{0x00000071, 0x00000040},
1487 	{0x00000072, 0x01000000},
1488 	{0x00000074, 0x000000ff},
1489 	{0x00000075, 0x00143400},
1490 	{0x00000076, 0x08ec0800},
1491 	{0x00000077, 0x040000cc},
1492 	{0x00000079, 0x00000000},
1493 	{0x0000007a, 0x21000409},
1494 	{0x0000007c, 0x00000000},
1495 	{0x0000007d, 0xe8000000},
1496 	{0x0000007e, 0x044408a8},
1497 	{0x0000007f, 0x00000003},
1498 	{0x00000080, 0x00000000},
1499 	{0x00000081, 0x01000000},
1500 	{0x00000082, 0x02000000},
1501 	{0x00000083, 0x00000000},
1502 	{0x00000084, 0xe3f3e4f4},
1503 	{0x00000085, 0x00052024},
1504 	{0x00000087, 0x00000000},
1505 	{0x00000088, 0x66036603},
1506 	{0x00000089, 0x01000000},
1507 	{0x0000008b, 0x1c0a0000},
1508 	{0x0000008c, 0xff010000},
1509 	{0x0000008e, 0xffffefff},
1510 	{0x0000008f, 0xfff3efff},
1511 	{0x00000090, 0xfff3efbf},
1512 	{0x00000094, 0x00101101},
1513 	{0x00000095, 0x00000fff},
1514 	{0x00000096, 0x00116fff},
1515 	{0x00000097, 0x60010000},
1516 	{0x00000098, 0x10010000},
1517 	{0x00000099, 0x00006000},
1518 	{0x0000009a, 0x00001000},
1519 	{0x0000009f, 0x00a17730}
1520 };
1521 
1522 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1523 	{0x0000006f, 0x03044000},
1524 	{0x00000070, 0x0480c018},
1525 	{0x00000071, 0x00000040},
1526 	{0x00000072, 0x01000000},
1527 	{0x00000074, 0x000000ff},
1528 	{0x00000075, 0x00143400},
1529 	{0x00000076, 0x08ec0800},
1530 	{0x00000077, 0x040000cc},
1531 	{0x00000079, 0x00000000},
1532 	{0x0000007a, 0x21000409},
1533 	{0x0000007c, 0x00000000},
1534 	{0x0000007d, 0xe8000000},
1535 	{0x0000007e, 0x044408a8},
1536 	{0x0000007f, 0x00000003},
1537 	{0x00000080, 0x00000000},
1538 	{0x00000081, 0x01000000},
1539 	{0x00000082, 0x02000000},
1540 	{0x00000083, 0x00000000},
1541 	{0x00000084, 0xe3f3e4f4},
1542 	{0x00000085, 0x00052024},
1543 	{0x00000087, 0x00000000},
1544 	{0x00000088, 0x66036603},
1545 	{0x00000089, 0x01000000},
1546 	{0x0000008b, 0x1c0a0000},
1547 	{0x0000008c, 0xff010000},
1548 	{0x0000008e, 0xffffefff},
1549 	{0x0000008f, 0xfff3efff},
1550 	{0x00000090, 0xfff3efbf},
1551 	{0x00000094, 0x00101101},
1552 	{0x00000095, 0x00000fff},
1553 	{0x00000096, 0x00116fff},
1554 	{0x00000097, 0x60010000},
1555 	{0x00000098, 0x10010000},
1556 	{0x00000099, 0x00006000},
1557 	{0x0000009a, 0x00001000},
1558 	{0x0000009f, 0x00a07730}
1559 };
1560 
1561 /* ucode loading */
si_mc_load_microcode(struct radeon_device * rdev)1562 int si_mc_load_microcode(struct radeon_device *rdev)
1563 {
1564 	const __be32 *fw_data = NULL;
1565 	const __le32 *new_fw_data = NULL;
1566 	u32 running;
1567 	u32 *io_mc_regs = NULL;
1568 	const __le32 *new_io_mc_regs = NULL;
1569 	int i, regs_size, ucode_size;
1570 
1571 	if (!rdev->mc_fw)
1572 		return -EINVAL;
1573 
1574 	if (rdev->new_fw) {
1575 		const struct mc_firmware_header_v1_0 *hdr =
1576 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1577 
1578 		radeon_ucode_print_mc_hdr(&hdr->header);
1579 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1580 		new_io_mc_regs = (const __le32 *)
1581 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1582 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1583 		new_fw_data = (const __le32 *)
1584 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1585 	} else {
1586 		ucode_size = rdev->mc_fw->datasize / 4;
1587 
1588 		switch (rdev->family) {
1589 		case CHIP_TAHITI:
1590 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1591 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1592 			break;
1593 		case CHIP_PITCAIRN:
1594 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1595 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1596 			break;
1597 		case CHIP_VERDE:
1598 		default:
1599 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1600 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1601 			break;
1602 		case CHIP_OLAND:
1603 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1604 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1605 			break;
1606 		case CHIP_HAINAN:
1607 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1608 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1609 			break;
1610 		}
1611 		fw_data = (const __be32 *)rdev->mc_fw->data;
1612 	}
1613 
1614 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1615 
1616 	if (running == 0) {
1617 		/* reset the engine and set to writable */
1618 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1619 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1620 
1621 		/* load mc io regs */
1622 		for (i = 0; i < regs_size; i++) {
1623 			if (rdev->new_fw) {
1624 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1625 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1626 			} else {
1627 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1628 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1629 			}
1630 		}
1631 		/* load the MC ucode */
1632 		for (i = 0; i < ucode_size; i++) {
1633 			if (rdev->new_fw)
1634 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1635 			else
1636 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1637 		}
1638 
1639 		/* put the engine back into the active state */
1640 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1641 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1642 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1643 
1644 		/* wait for training to complete */
1645 		for (i = 0; i < rdev->usec_timeout; i++) {
1646 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1647 				break;
1648 			udelay(1);
1649 		}
1650 		for (i = 0; i < rdev->usec_timeout; i++) {
1651 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1652 				break;
1653 			udelay(1);
1654 		}
1655 	}
1656 
1657 	return 0;
1658 }
1659 
si_init_microcode(struct radeon_device * rdev)1660 static int si_init_microcode(struct radeon_device *rdev)
1661 {
1662 	const char *chip_name;
1663 	const char *new_chip_name;
1664 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1665 	size_t smc_req_size, mc2_req_size;
1666 	char fw_name[30];
1667 	int err;
1668 	int new_fw = 0;
1669 	bool new_smc = false;
1670 	bool si58_fw = false;
1671 	bool banks2_fw = false;
1672 
1673 	DRM_DEBUG("\n");
1674 
1675 	switch (rdev->family) {
1676 	case CHIP_TAHITI:
1677 		chip_name = "TAHITI";
1678 		new_chip_name = "tahiti";
1679 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1680 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1681 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1682 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1683 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1684 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1685 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1686 		break;
1687 	case CHIP_PITCAIRN:
1688 		chip_name = "PITCAIRN";
1689 		if ((rdev->pdev->revision == 0x81) &&
1690 		    ((rdev->pdev->device == 0x6810) ||
1691 		     (rdev->pdev->device == 0x6811)))
1692 			new_smc = true;
1693 		new_chip_name = "pitcairn";
1694 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1695 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1696 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1697 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1698 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1699 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1700 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1701 		break;
1702 	case CHIP_VERDE:
1703 		chip_name = "VERDE";
1704 		if (((rdev->pdev->device == 0x6820) &&
1705 		     ((rdev->pdev->revision == 0x81) ||
1706 		      (rdev->pdev->revision == 0x83))) ||
1707 		    ((rdev->pdev->device == 0x6821) &&
1708 		     ((rdev->pdev->revision == 0x83) ||
1709 		      (rdev->pdev->revision == 0x87))) ||
1710 		    ((rdev->pdev->revision == 0x87) &&
1711 		     ((rdev->pdev->device == 0x6823) ||
1712 		      (rdev->pdev->device == 0x682b))))
1713 			new_smc = true;
1714 		new_chip_name = "verde";
1715 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1716 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1717 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1718 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1719 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1720 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1721 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1722 		break;
1723 	case CHIP_OLAND:
1724 		chip_name = "OLAND";
1725 		if (((rdev->pdev->revision == 0x81) &&
1726 		     ((rdev->pdev->device == 0x6600) ||
1727 		      (rdev->pdev->device == 0x6604) ||
1728 		      (rdev->pdev->device == 0x6605) ||
1729 		      (rdev->pdev->device == 0x6610))) ||
1730 		    ((rdev->pdev->revision == 0x83) &&
1731 		     (rdev->pdev->device == 0x6610)))
1732 			new_smc = true;
1733 		new_chip_name = "oland";
1734 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1735 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1736 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1737 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1738 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1739 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1740 		break;
1741 	case CHIP_HAINAN:
1742 		chip_name = "HAINAN";
1743 		if (((rdev->pdev->revision == 0x81) &&
1744 		     (rdev->pdev->device == 0x6660)) ||
1745 		    ((rdev->pdev->revision == 0x83) &&
1746 		     ((rdev->pdev->device == 0x6660) ||
1747 		      (rdev->pdev->device == 0x6663) ||
1748 		      (rdev->pdev->device == 0x6665) ||
1749 		      (rdev->pdev->device == 0x6667))))
1750 			new_smc = true;
1751 		else if ((rdev->pdev->revision == 0xc3) &&
1752 			 (rdev->pdev->device == 0x6665))
1753 			banks2_fw = true;
1754 		new_chip_name = "hainan";
1755 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1756 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1757 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1758 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1759 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1760 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1761 		break;
1762 	default: BUG();
1763 	}
1764 
1765 	/* this memory configuration requires special firmware */
1766 	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1767 		si58_fw = true;
1768 
1769 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1770 
1771 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1772 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1773 	if (err) {
1774 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1775 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1776 		if (err)
1777 			goto out;
1778 		if (rdev->pfp_fw->datasize != pfp_req_size) {
1779 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1780 			       rdev->pfp_fw->datasize, fw_name);
1781 			err = -EINVAL;
1782 			goto out;
1783 		}
1784 	} else {
1785 		err = radeon_ucode_validate(rdev->pfp_fw);
1786 		if (err) {
1787 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1788 			       fw_name);
1789 			goto out;
1790 		} else {
1791 			new_fw++;
1792 		}
1793 	}
1794 
1795 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
1796 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1797 	if (err) {
1798 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1799 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1800 		if (err)
1801 			goto out;
1802 		if (rdev->me_fw->datasize != me_req_size) {
1803 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1804 			       rdev->me_fw->datasize, fw_name);
1805 			err = -EINVAL;
1806 		}
1807 	} else {
1808 		err = radeon_ucode_validate(rdev->me_fw);
1809 		if (err) {
1810 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1811 			       fw_name);
1812 			goto out;
1813 		} else {
1814 			new_fw++;
1815 		}
1816 	}
1817 
1818 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
1819 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1820 	if (err) {
1821 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1822 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1823 		if (err)
1824 			goto out;
1825 		if (rdev->ce_fw->datasize != ce_req_size) {
1826 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1827 			       rdev->ce_fw->datasize, fw_name);
1828 			err = -EINVAL;
1829 		}
1830 	} else {
1831 		err = radeon_ucode_validate(rdev->ce_fw);
1832 		if (err) {
1833 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1834 			       fw_name);
1835 			goto out;
1836 		} else {
1837 			new_fw++;
1838 		}
1839 	}
1840 
1841 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
1842 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1843 	if (err) {
1844 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
1845 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1846 		if (err)
1847 			goto out;
1848 		if (rdev->rlc_fw->datasize != rlc_req_size) {
1849 			pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1850 			       rdev->rlc_fw->datasize, fw_name);
1851 			err = -EINVAL;
1852 		}
1853 	} else {
1854 		err = radeon_ucode_validate(rdev->rlc_fw);
1855 		if (err) {
1856 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1857 			       fw_name);
1858 			goto out;
1859 		} else {
1860 			new_fw++;
1861 		}
1862 	}
1863 
1864 	if (si58_fw)
1865 		snprintf(fw_name, sizeof(fw_name), "radeonkmsfw_si58_mc");
1866 	else
1867 		snprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
1868 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1869 	if (err) {
1870 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
1871 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1872 		if (err) {
1873 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1874 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1875 			if (err)
1876 				goto out;
1877 		}
1878 		if ((rdev->mc_fw->datasize != mc_req_size) &&
1879 		    (rdev->mc_fw->datasize != mc2_req_size)) {
1880 			pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1881 			       rdev->mc_fw->datasize, fw_name);
1882 			err = -EINVAL;
1883 		}
1884 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
1885 	} else {
1886 		err = radeon_ucode_validate(rdev->mc_fw);
1887 		if (err) {
1888 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1889 			       fw_name);
1890 			goto out;
1891 		} else {
1892 			new_fw++;
1893 		}
1894 	}
1895 
1896 	if (banks2_fw)
1897 		snprintf(fw_name, sizeof(fw_name), "radeonkmsfw_banks_k_2_smc");
1898 	else if (new_smc)
1899 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_k_smc", new_chip_name);
1900 	else
1901 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
1902 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1903 	if (err) {
1904 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1905 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1906 		if (err) {
1907 			pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1908 			release_firmware(rdev->smc_fw);
1909 			rdev->smc_fw = NULL;
1910 			err = 0;
1911 		} else if (rdev->smc_fw->datasize != smc_req_size) {
1912 			pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1913 			       rdev->smc_fw->datasize, fw_name);
1914 			err = -EINVAL;
1915 		}
1916 	} else {
1917 		err = radeon_ucode_validate(rdev->smc_fw);
1918 		if (err) {
1919 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1920 			       fw_name);
1921 			goto out;
1922 		} else {
1923 			new_fw++;
1924 		}
1925 	}
1926 
1927 	if (new_fw == 0) {
1928 		rdev->new_fw = false;
1929 	} else if (new_fw < 6) {
1930 		pr_err("si_fw: mixing new and old firmware!\n");
1931 		err = -EINVAL;
1932 	} else {
1933 		rdev->new_fw = true;
1934 	}
1935 out:
1936 	if (err) {
1937 		if (err != -EINVAL)
1938 			pr_err("si_cp: Failed to load firmware \"%s\"\n",
1939 			       fw_name);
1940 		release_firmware(rdev->pfp_fw);
1941 		rdev->pfp_fw = NULL;
1942 		release_firmware(rdev->me_fw);
1943 		rdev->me_fw = NULL;
1944 		release_firmware(rdev->ce_fw);
1945 		rdev->ce_fw = NULL;
1946 		release_firmware(rdev->rlc_fw);
1947 		rdev->rlc_fw = NULL;
1948 		release_firmware(rdev->mc_fw);
1949 		rdev->mc_fw = NULL;
1950 		release_firmware(rdev->smc_fw);
1951 		rdev->smc_fw = NULL;
1952 	}
1953 	return err;
1954 }
1955 
1956 /* watermark setup */
dce6_line_buffer_adjust(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,struct drm_display_mode * mode,struct drm_display_mode * other_mode)1957 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1958 				   struct radeon_crtc *radeon_crtc,
1959 				   struct drm_display_mode *mode,
1960 				   struct drm_display_mode *other_mode)
1961 {
1962 	u32 tmp, buffer_alloc, i;
1963 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1964 	/*
1965 	 * Line Buffer Setup
1966 	 * There are 3 line buffers, each one shared by 2 display controllers.
1967 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1968 	 * the display controllers.  The paritioning is done via one of four
1969 	 * preset allocations specified in bits 21:20:
1970 	 *  0 - half lb
1971 	 *  2 - whole lb, other crtc must be disabled
1972 	 */
1973 	/* this can get tricky if we have two large displays on a paired group
1974 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1975 	 * non-linked crtcs for maximum line buffer allocation.
1976 	 */
1977 	if (radeon_crtc->base.enabled && mode) {
1978 		if (other_mode) {
1979 			tmp = 0; /* 1/2 */
1980 			buffer_alloc = 1;
1981 		} else {
1982 			tmp = 2; /* whole */
1983 			buffer_alloc = 2;
1984 		}
1985 	} else {
1986 		tmp = 0;
1987 		buffer_alloc = 0;
1988 	}
1989 
1990 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1991 	       DC_LB_MEMORY_CONFIG(tmp));
1992 
1993 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1994 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1995 	for (i = 0; i < rdev->usec_timeout; i++) {
1996 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1997 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1998 			break;
1999 		udelay(1);
2000 	}
2001 
2002 	if (radeon_crtc->base.enabled && mode) {
2003 		switch (tmp) {
2004 		case 0:
2005 		default:
2006 			return 4096 * 2;
2007 		case 2:
2008 			return 8192 * 2;
2009 		}
2010 	}
2011 
2012 	/* controller not enabled, so no lb used */
2013 	return 0;
2014 }
2015 
si_get_number_of_dram_channels(struct radeon_device * rdev)2016 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2017 {
2018 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2019 
2020 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2021 	case 0:
2022 	default:
2023 		return 1;
2024 	case 1:
2025 		return 2;
2026 	case 2:
2027 		return 4;
2028 	case 3:
2029 		return 8;
2030 	case 4:
2031 		return 3;
2032 	case 5:
2033 		return 6;
2034 	case 6:
2035 		return 10;
2036 	case 7:
2037 		return 12;
2038 	case 8:
2039 		return 16;
2040 	}
2041 }
2042 
2043 struct dce6_wm_params {
2044 	u32 dram_channels; /* number of dram channels */
2045 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2046 	u32 sclk;          /* engine clock in kHz */
2047 	u32 disp_clk;      /* display clock in kHz */
2048 	u32 src_width;     /* viewport width */
2049 	u32 active_time;   /* active display time in ns */
2050 	u32 blank_time;    /* blank time in ns */
2051 	bool interlaced;    /* mode is interlaced */
2052 	fixed20_12 vsc;    /* vertical scale ratio */
2053 	u32 num_heads;     /* number of active crtcs */
2054 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2055 	u32 lb_size;       /* line buffer allocated to pipe */
2056 	u32 vtaps;         /* vertical scaler taps */
2057 };
2058 
dce6_dram_bandwidth(struct dce6_wm_params * wm)2059 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2060 {
2061 	/* Calculate raw DRAM Bandwidth */
2062 	fixed20_12 dram_efficiency; /* 0.7 */
2063 	fixed20_12 yclk, dram_channels, bandwidth;
2064 	fixed20_12 a;
2065 
2066 	a.full = dfixed_const(1000);
2067 	yclk.full = dfixed_const(wm->yclk);
2068 	yclk.full = dfixed_div(yclk, a);
2069 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2070 	a.full = dfixed_const(10);
2071 	dram_efficiency.full = dfixed_const(7);
2072 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2073 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2074 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2075 
2076 	return dfixed_trunc(bandwidth);
2077 }
2078 
dce6_dram_bandwidth_for_display(struct dce6_wm_params * wm)2079 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2080 {
2081 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2082 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2083 	fixed20_12 yclk, dram_channels, bandwidth;
2084 	fixed20_12 a;
2085 
2086 	a.full = dfixed_const(1000);
2087 	yclk.full = dfixed_const(wm->yclk);
2088 	yclk.full = dfixed_div(yclk, a);
2089 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2090 	a.full = dfixed_const(10);
2091 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2092 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2093 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2094 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2095 
2096 	return dfixed_trunc(bandwidth);
2097 }
2098 
dce6_data_return_bandwidth(struct dce6_wm_params * wm)2099 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2100 {
2101 	/* Calculate the display Data return Bandwidth */
2102 	fixed20_12 return_efficiency; /* 0.8 */
2103 	fixed20_12 sclk, bandwidth;
2104 	fixed20_12 a;
2105 
2106 	a.full = dfixed_const(1000);
2107 	sclk.full = dfixed_const(wm->sclk);
2108 	sclk.full = dfixed_div(sclk, a);
2109 	a.full = dfixed_const(10);
2110 	return_efficiency.full = dfixed_const(8);
2111 	return_efficiency.full = dfixed_div(return_efficiency, a);
2112 	a.full = dfixed_const(32);
2113 	bandwidth.full = dfixed_mul(a, sclk);
2114 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2115 
2116 	return dfixed_trunc(bandwidth);
2117 }
2118 
dce6_get_dmif_bytes_per_request(struct dce6_wm_params * wm)2119 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2120 {
2121 	return 32;
2122 }
2123 
dce6_dmif_request_bandwidth(struct dce6_wm_params * wm)2124 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2125 {
2126 	/* Calculate the DMIF Request Bandwidth */
2127 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2128 	fixed20_12 disp_clk, sclk, bandwidth;
2129 	fixed20_12 a, b1, b2;
2130 	u32 min_bandwidth;
2131 
2132 	a.full = dfixed_const(1000);
2133 	disp_clk.full = dfixed_const(wm->disp_clk);
2134 	disp_clk.full = dfixed_div(disp_clk, a);
2135 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2136 	b1.full = dfixed_mul(a, disp_clk);
2137 
2138 	a.full = dfixed_const(1000);
2139 	sclk.full = dfixed_const(wm->sclk);
2140 	sclk.full = dfixed_div(sclk, a);
2141 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2142 	b2.full = dfixed_mul(a, sclk);
2143 
2144 	a.full = dfixed_const(10);
2145 	disp_clk_request_efficiency.full = dfixed_const(8);
2146 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2147 
2148 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2149 
2150 	a.full = dfixed_const(min_bandwidth);
2151 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2152 
2153 	return dfixed_trunc(bandwidth);
2154 }
2155 
dce6_available_bandwidth(struct dce6_wm_params * wm)2156 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2157 {
2158 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2159 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2160 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2161 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2162 
2163 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2164 }
2165 
dce6_average_bandwidth(struct dce6_wm_params * wm)2166 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2167 {
2168 	/* Calculate the display mode Average Bandwidth
2169 	 * DisplayMode should contain the source and destination dimensions,
2170 	 * timing, etc.
2171 	 */
2172 	fixed20_12 bpp;
2173 	fixed20_12 line_time;
2174 	fixed20_12 src_width;
2175 	fixed20_12 bandwidth;
2176 	fixed20_12 a;
2177 
2178 	a.full = dfixed_const(1000);
2179 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2180 	line_time.full = dfixed_div(line_time, a);
2181 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2182 	src_width.full = dfixed_const(wm->src_width);
2183 	bandwidth.full = dfixed_mul(src_width, bpp);
2184 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2185 	bandwidth.full = dfixed_div(bandwidth, line_time);
2186 
2187 	return dfixed_trunc(bandwidth);
2188 }
2189 
dce6_latency_watermark(struct dce6_wm_params * wm)2190 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2191 {
2192 	/* First calcualte the latency in ns */
2193 	u32 mc_latency = 2000; /* 2000 ns. */
2194 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2195 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2196 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2197 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2198 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2199 		(wm->num_heads * cursor_line_pair_return_time);
2200 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2201 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2202 	u32 tmp, dmif_size = 12288;
2203 	fixed20_12 a, b, c;
2204 
2205 	if (wm->num_heads == 0)
2206 		return 0;
2207 
2208 	a.full = dfixed_const(2);
2209 	b.full = dfixed_const(1);
2210 	if ((wm->vsc.full > a.full) ||
2211 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2212 	    (wm->vtaps >= 5) ||
2213 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2214 		max_src_lines_per_dst_line = 4;
2215 	else
2216 		max_src_lines_per_dst_line = 2;
2217 
2218 	a.full = dfixed_const(available_bandwidth);
2219 	b.full = dfixed_const(wm->num_heads);
2220 	a.full = dfixed_div(a, b);
2221 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2222 	tmp = min(dfixed_trunc(a), tmp);
2223 
2224 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2225 
2226 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2227 	b.full = dfixed_const(1000);
2228 	c.full = dfixed_const(lb_fill_bw);
2229 	b.full = dfixed_div(c, b);
2230 	a.full = dfixed_div(a, b);
2231 	line_fill_time = dfixed_trunc(a);
2232 
2233 	if (line_fill_time < wm->active_time)
2234 		return latency;
2235 	else
2236 		return latency + (line_fill_time - wm->active_time);
2237 
2238 }
2239 
dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params * wm)2240 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2241 {
2242 	if (dce6_average_bandwidth(wm) <=
2243 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2244 		return true;
2245 	else
2246 		return false;
2247 };
2248 
dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params * wm)2249 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2250 {
2251 	if (dce6_average_bandwidth(wm) <=
2252 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2253 		return true;
2254 	else
2255 		return false;
2256 };
2257 
dce6_check_latency_hiding(struct dce6_wm_params * wm)2258 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2259 {
2260 	u32 lb_partitions = wm->lb_size / wm->src_width;
2261 	u32 line_time = wm->active_time + wm->blank_time;
2262 	u32 latency_tolerant_lines;
2263 	u32 latency_hiding;
2264 	fixed20_12 a;
2265 
2266 	a.full = dfixed_const(1);
2267 	if (wm->vsc.full > a.full)
2268 		latency_tolerant_lines = 1;
2269 	else {
2270 		if (lb_partitions <= (wm->vtaps + 1))
2271 			latency_tolerant_lines = 1;
2272 		else
2273 			latency_tolerant_lines = 2;
2274 	}
2275 
2276 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2277 
2278 	if (dce6_latency_watermark(wm) <= latency_hiding)
2279 		return true;
2280 	else
2281 		return false;
2282 }
2283 
dce6_program_watermarks(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,u32 lb_size,u32 num_heads)2284 static void dce6_program_watermarks(struct radeon_device *rdev,
2285 					 struct radeon_crtc *radeon_crtc,
2286 					 u32 lb_size, u32 num_heads)
2287 {
2288 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2289 	struct dce6_wm_params wm_low, wm_high;
2290 	u32 dram_channels;
2291 	u32 active_time;
2292 	u32 line_time = 0;
2293 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2294 	u32 priority_a_mark = 0, priority_b_mark = 0;
2295 	u32 priority_a_cnt = PRIORITY_OFF;
2296 	u32 priority_b_cnt = PRIORITY_OFF;
2297 	u32 tmp, arb_control3;
2298 	fixed20_12 a, b, c;
2299 
2300 	if (radeon_crtc->base.enabled && num_heads && mode) {
2301 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2302 					    (u32)mode->clock);
2303 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2304 					  (u32)mode->clock);
2305 		line_time = min(line_time, (u32)65535);
2306 		priority_a_cnt = 0;
2307 		priority_b_cnt = 0;
2308 
2309 		if (rdev->family == CHIP_ARUBA)
2310 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2311 		else
2312 			dram_channels = si_get_number_of_dram_channels(rdev);
2313 
2314 		/* watermark for high clocks */
2315 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2316 			wm_high.yclk =
2317 				radeon_dpm_get_mclk(rdev, false) * 10;
2318 			wm_high.sclk =
2319 				radeon_dpm_get_sclk(rdev, false) * 10;
2320 		} else {
2321 			wm_high.yclk = rdev->pm.current_mclk * 10;
2322 			wm_high.sclk = rdev->pm.current_sclk * 10;
2323 		}
2324 
2325 		wm_high.disp_clk = mode->clock;
2326 		wm_high.src_width = mode->crtc_hdisplay;
2327 		wm_high.active_time = active_time;
2328 		wm_high.blank_time = line_time - wm_high.active_time;
2329 		wm_high.interlaced = false;
2330 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2331 			wm_high.interlaced = true;
2332 		wm_high.vsc = radeon_crtc->vsc;
2333 		wm_high.vtaps = 1;
2334 		if (radeon_crtc->rmx_type != RMX_OFF)
2335 			wm_high.vtaps = 2;
2336 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2337 		wm_high.lb_size = lb_size;
2338 		wm_high.dram_channels = dram_channels;
2339 		wm_high.num_heads = num_heads;
2340 
2341 		/* watermark for low clocks */
2342 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2343 			wm_low.yclk =
2344 				radeon_dpm_get_mclk(rdev, true) * 10;
2345 			wm_low.sclk =
2346 				radeon_dpm_get_sclk(rdev, true) * 10;
2347 		} else {
2348 			wm_low.yclk = rdev->pm.current_mclk * 10;
2349 			wm_low.sclk = rdev->pm.current_sclk * 10;
2350 		}
2351 
2352 		wm_low.disp_clk = mode->clock;
2353 		wm_low.src_width = mode->crtc_hdisplay;
2354 		wm_low.active_time = active_time;
2355 		wm_low.blank_time = line_time - wm_low.active_time;
2356 		wm_low.interlaced = false;
2357 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2358 			wm_low.interlaced = true;
2359 		wm_low.vsc = radeon_crtc->vsc;
2360 		wm_low.vtaps = 1;
2361 		if (radeon_crtc->rmx_type != RMX_OFF)
2362 			wm_low.vtaps = 2;
2363 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2364 		wm_low.lb_size = lb_size;
2365 		wm_low.dram_channels = dram_channels;
2366 		wm_low.num_heads = num_heads;
2367 
2368 		/* set for high clocks */
2369 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2370 		/* set for low clocks */
2371 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2372 
2373 		/* possibly force display priority to high */
2374 		/* should really do this at mode validation time... */
2375 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2376 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2377 		    !dce6_check_latency_hiding(&wm_high) ||
2378 		    (rdev->disp_priority == 2)) {
2379 			DRM_DEBUG_KMS("force priority to high\n");
2380 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2381 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2382 		}
2383 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2384 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2385 		    !dce6_check_latency_hiding(&wm_low) ||
2386 		    (rdev->disp_priority == 2)) {
2387 			DRM_DEBUG_KMS("force priority to high\n");
2388 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2389 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2390 		}
2391 
2392 		a.full = dfixed_const(1000);
2393 		b.full = dfixed_const(mode->clock);
2394 		b.full = dfixed_div(b, a);
2395 		c.full = dfixed_const(latency_watermark_a);
2396 		c.full = dfixed_mul(c, b);
2397 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2398 		c.full = dfixed_div(c, a);
2399 		a.full = dfixed_const(16);
2400 		c.full = dfixed_div(c, a);
2401 		priority_a_mark = dfixed_trunc(c);
2402 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2403 
2404 		a.full = dfixed_const(1000);
2405 		b.full = dfixed_const(mode->clock);
2406 		b.full = dfixed_div(b, a);
2407 		c.full = dfixed_const(latency_watermark_b);
2408 		c.full = dfixed_mul(c, b);
2409 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2410 		c.full = dfixed_div(c, a);
2411 		a.full = dfixed_const(16);
2412 		c.full = dfixed_div(c, a);
2413 		priority_b_mark = dfixed_trunc(c);
2414 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2415 
2416 		/* Save number of lines the linebuffer leads before the scanout */
2417 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2418 	}
2419 
2420 	/* select wm A */
2421 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2422 	tmp = arb_control3;
2423 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2424 	tmp |= LATENCY_WATERMARK_MASK(1);
2425 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2426 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2427 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2428 		LATENCY_HIGH_WATERMARK(line_time)));
2429 	/* select wm B */
2430 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2431 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2432 	tmp |= LATENCY_WATERMARK_MASK(2);
2433 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2434 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2435 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2436 		LATENCY_HIGH_WATERMARK(line_time)));
2437 	/* restore original selection */
2438 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2439 
2440 	/* write the priority marks */
2441 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2442 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2443 
2444 	/* save values for DPM */
2445 	radeon_crtc->line_time = line_time;
2446 	radeon_crtc->wm_high = latency_watermark_a;
2447 	radeon_crtc->wm_low = latency_watermark_b;
2448 }
2449 
dce6_bandwidth_update(struct radeon_device * rdev)2450 void dce6_bandwidth_update(struct radeon_device *rdev)
2451 {
2452 	struct drm_display_mode *mode0 = NULL;
2453 	struct drm_display_mode *mode1 = NULL;
2454 	u32 num_heads = 0, lb_size;
2455 	int i;
2456 
2457 	if (!rdev->mode_info.mode_config_initialized)
2458 		return;
2459 
2460 	radeon_update_display_priority(rdev);
2461 
2462 	for (i = 0; i < rdev->num_crtc; i++) {
2463 		if (rdev->mode_info.crtcs[i]->base.enabled)
2464 			num_heads++;
2465 	}
2466 	for (i = 0; i < rdev->num_crtc; i += 2) {
2467 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2468 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2469 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2470 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2471 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2472 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2473 	}
2474 }
2475 
2476 /*
2477  * Core functions
2478  */
si_tiling_mode_table_init(struct radeon_device * rdev)2479 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2480 {
2481 	u32 *tile = rdev->config.si.tile_mode_array;
2482 	const u32 num_tile_mode_states =
2483 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2484 	u32 reg_offset, split_equal_to_row_size;
2485 
2486 	switch (rdev->config.si.mem_row_size_in_kb) {
2487 	case 1:
2488 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2489 		break;
2490 	case 2:
2491 	default:
2492 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2493 		break;
2494 	case 4:
2495 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2496 		break;
2497 	}
2498 
2499 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2500 		tile[reg_offset] = 0;
2501 
2502 	switch(rdev->family) {
2503 	case CHIP_TAHITI:
2504 	case CHIP_PITCAIRN:
2505 		/* non-AA compressed depth or any compressed stencil */
2506 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2507 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2508 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2509 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2510 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2511 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2513 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2514 		/* 2xAA/4xAA compressed depth only */
2515 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2518 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2519 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2520 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2523 		/* 8xAA compressed depth only */
2524 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2526 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2527 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2528 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2529 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2531 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2532 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2533 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2534 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2535 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2536 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2537 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2538 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2541 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2542 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2544 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2545 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2546 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2547 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2550 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2551 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2554 			   TILE_SPLIT(split_equal_to_row_size) |
2555 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2556 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2559 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2560 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2562 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2563 			   TILE_SPLIT(split_equal_to_row_size) |
2564 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2565 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2567 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2568 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2569 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2571 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2572 			   TILE_SPLIT(split_equal_to_row_size) |
2573 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2574 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2576 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2577 		/* 1D and 1D Array Surfaces */
2578 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2579 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2580 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2581 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2582 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2583 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2586 		/* Displayable maps. */
2587 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2588 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2589 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2590 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2591 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2592 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2594 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2595 		/* Display 8bpp. */
2596 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2598 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2599 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2600 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2601 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2602 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2603 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2604 		/* Display 16bpp. */
2605 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2608 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2609 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2610 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2612 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2613 		/* Display 32bpp. */
2614 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2615 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2616 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2617 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2618 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2619 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2622 		/* Thin. */
2623 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2625 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2627 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2628 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2631 		/* Thin 8 bpp. */
2632 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2634 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2635 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2636 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2637 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2639 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2640 		/* Thin 16 bpp. */
2641 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2643 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2644 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2646 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2648 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2649 		/* Thin 32 bpp. */
2650 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2652 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2653 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2654 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2655 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2658 		/* Thin 64 bpp. */
2659 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2661 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662 			   TILE_SPLIT(split_equal_to_row_size) |
2663 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2664 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2666 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2667 		/* 8 bpp PRT. */
2668 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2670 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2671 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2672 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2673 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2674 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2675 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2676 		/* 16 bpp PRT */
2677 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2679 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2680 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2681 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2682 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2683 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2684 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2685 		/* 32 bpp PRT */
2686 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2688 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2689 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2690 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2691 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2693 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2694 		/* 64 bpp PRT */
2695 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2697 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2698 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2699 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2700 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2701 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2702 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2703 		/* 128 bpp PRT */
2704 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2706 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2707 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2708 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2709 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2711 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2712 
2713 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2714 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2715 		break;
2716 
2717 	case CHIP_VERDE:
2718 	case CHIP_OLAND:
2719 	case CHIP_HAINAN:
2720 		/* non-AA compressed depth or any compressed stencil */
2721 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2723 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2724 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2725 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2726 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2729 		/* 2xAA/4xAA compressed depth only */
2730 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2731 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2732 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2733 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2734 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2735 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2737 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2738 		/* 8xAA compressed depth only */
2739 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2741 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2742 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2743 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2744 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2747 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2748 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2750 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2751 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2752 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2753 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2755 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2756 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2757 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2759 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2760 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2761 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2762 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2764 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2765 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2766 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2767 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2768 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2769 			   TILE_SPLIT(split_equal_to_row_size) |
2770 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2771 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2773 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2774 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2775 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2777 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778 			   TILE_SPLIT(split_equal_to_row_size) |
2779 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2780 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2783 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2784 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 			   TILE_SPLIT(split_equal_to_row_size) |
2788 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2789 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2791 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2792 		/* 1D and 1D Array Surfaces */
2793 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2794 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2795 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2796 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2797 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2798 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2800 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2801 		/* Displayable maps. */
2802 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2803 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2804 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2805 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2806 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2807 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2808 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2809 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2810 		/* Display 8bpp. */
2811 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2815 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2816 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2819 		/* Display 16bpp. */
2820 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2822 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2823 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2824 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2825 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2827 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2828 		/* Display 32bpp. */
2829 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2832 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2833 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2834 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2836 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2837 		/* Thin. */
2838 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2839 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2840 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2841 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2842 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2843 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2845 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2846 		/* Thin 8 bpp. */
2847 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2849 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2851 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2852 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2854 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2855 		/* Thin 16 bpp. */
2856 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2857 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2858 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2859 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2860 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2861 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2863 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2864 		/* Thin 32 bpp. */
2865 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2866 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2867 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2868 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2869 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2870 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2872 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2873 		/* Thin 64 bpp. */
2874 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2875 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2876 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2877 			   TILE_SPLIT(split_equal_to_row_size) |
2878 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2879 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2882 		/* 8 bpp PRT. */
2883 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2884 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2885 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2886 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2887 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2888 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2889 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2890 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2891 		/* 16 bpp PRT */
2892 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2894 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2895 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2896 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2897 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2898 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2899 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2900 		/* 32 bpp PRT */
2901 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2903 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2904 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2905 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2906 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2908 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2909 		/* 64 bpp PRT */
2910 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2912 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2913 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2914 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2915 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2917 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2918 		/* 128 bpp PRT */
2919 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2921 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2922 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2923 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2924 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2926 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2927 
2928 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2929 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2930 		break;
2931 
2932 	default:
2933 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2934 	}
2935 }
2936 
si_select_se_sh(struct radeon_device * rdev,u32 se_num,u32 sh_num)2937 static void si_select_se_sh(struct radeon_device *rdev,
2938 			    u32 se_num, u32 sh_num)
2939 {
2940 	u32 data = INSTANCE_BROADCAST_WRITES;
2941 
2942 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2943 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2944 	else if (se_num == 0xffffffff)
2945 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2946 	else if (sh_num == 0xffffffff)
2947 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2948 	else
2949 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2950 	WREG32(GRBM_GFX_INDEX, data);
2951 }
2952 
si_create_bitmask(u32 bit_width)2953 static u32 si_create_bitmask(u32 bit_width)
2954 {
2955 	u32 i, mask = 0;
2956 
2957 	for (i = 0; i < bit_width; i++) {
2958 		mask <<= 1;
2959 		mask |= 1;
2960 	}
2961 	return mask;
2962 }
2963 
si_get_cu_enabled(struct radeon_device * rdev,u32 cu_per_sh)2964 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2965 {
2966 	u32 data, mask;
2967 
2968 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2969 	if (data & 1)
2970 		data &= INACTIVE_CUS_MASK;
2971 	else
2972 		data = 0;
2973 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2974 
2975 	data >>= INACTIVE_CUS_SHIFT;
2976 
2977 	mask = si_create_bitmask(cu_per_sh);
2978 
2979 	return ~data & mask;
2980 }
2981 
si_setup_spi(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 cu_per_sh)2982 static void si_setup_spi(struct radeon_device *rdev,
2983 			 u32 se_num, u32 sh_per_se,
2984 			 u32 cu_per_sh)
2985 {
2986 	int i, j, k;
2987 	u32 data, mask, active_cu;
2988 
2989 	for (i = 0; i < se_num; i++) {
2990 		for (j = 0; j < sh_per_se; j++) {
2991 			si_select_se_sh(rdev, i, j);
2992 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2993 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2994 
2995 			mask = 1;
2996 			for (k = 0; k < 16; k++) {
2997 				mask <<= k;
2998 				if (active_cu & mask) {
2999 					data &= ~mask;
3000 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3001 					break;
3002 				}
3003 			}
3004 		}
3005 	}
3006 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3007 }
3008 
si_get_rb_disabled(struct radeon_device * rdev,u32 max_rb_num_per_se,u32 sh_per_se)3009 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3010 			      u32 max_rb_num_per_se,
3011 			      u32 sh_per_se)
3012 {
3013 	u32 data, mask;
3014 
3015 	data = RREG32(CC_RB_BACKEND_DISABLE);
3016 	if (data & 1)
3017 		data &= BACKEND_DISABLE_MASK;
3018 	else
3019 		data = 0;
3020 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3021 
3022 	data >>= BACKEND_DISABLE_SHIFT;
3023 
3024 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3025 
3026 	return data & mask;
3027 }
3028 
si_setup_rb(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 max_rb_num_per_se)3029 static void si_setup_rb(struct radeon_device *rdev,
3030 			u32 se_num, u32 sh_per_se,
3031 			u32 max_rb_num_per_se)
3032 {
3033 	int i, j;
3034 	u32 data, mask;
3035 	u32 disabled_rbs = 0;
3036 	u32 enabled_rbs = 0;
3037 
3038 	for (i = 0; i < se_num; i++) {
3039 		for (j = 0; j < sh_per_se; j++) {
3040 			si_select_se_sh(rdev, i, j);
3041 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3042 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3043 		}
3044 	}
3045 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3046 
3047 	mask = 1;
3048 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3049 		if (!(disabled_rbs & mask))
3050 			enabled_rbs |= mask;
3051 		mask <<= 1;
3052 	}
3053 
3054 	rdev->config.si.backend_enable_mask = enabled_rbs;
3055 
3056 	for (i = 0; i < se_num; i++) {
3057 		si_select_se_sh(rdev, i, 0xffffffff);
3058 		data = 0;
3059 		for (j = 0; j < sh_per_se; j++) {
3060 			switch (enabled_rbs & 3) {
3061 			case 1:
3062 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3063 				break;
3064 			case 2:
3065 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3066 				break;
3067 			case 3:
3068 			default:
3069 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3070 				break;
3071 			}
3072 			enabled_rbs >>= 2;
3073 		}
3074 		WREG32(PA_SC_RASTER_CONFIG, data);
3075 	}
3076 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3077 }
3078 
si_gpu_init(struct radeon_device * rdev)3079 static void si_gpu_init(struct radeon_device *rdev)
3080 {
3081 	u32 gb_addr_config = 0;
3082 	u32 mc_shared_chmap, mc_arb_ramcfg;
3083 	u32 sx_debug_1;
3084 	u32 hdp_host_path_cntl;
3085 	u32 tmp;
3086 	int i, j;
3087 
3088 	switch (rdev->family) {
3089 	case CHIP_TAHITI:
3090 		rdev->config.si.max_shader_engines = 2;
3091 		rdev->config.si.max_tile_pipes = 12;
3092 		rdev->config.si.max_cu_per_sh = 8;
3093 		rdev->config.si.max_sh_per_se = 2;
3094 		rdev->config.si.max_backends_per_se = 4;
3095 		rdev->config.si.max_texture_channel_caches = 12;
3096 		rdev->config.si.max_gprs = 256;
3097 		rdev->config.si.max_gs_threads = 32;
3098 		rdev->config.si.max_hw_contexts = 8;
3099 
3100 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3101 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3102 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3103 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3104 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3105 		break;
3106 	case CHIP_PITCAIRN:
3107 		rdev->config.si.max_shader_engines = 2;
3108 		rdev->config.si.max_tile_pipes = 8;
3109 		rdev->config.si.max_cu_per_sh = 5;
3110 		rdev->config.si.max_sh_per_se = 2;
3111 		rdev->config.si.max_backends_per_se = 4;
3112 		rdev->config.si.max_texture_channel_caches = 8;
3113 		rdev->config.si.max_gprs = 256;
3114 		rdev->config.si.max_gs_threads = 32;
3115 		rdev->config.si.max_hw_contexts = 8;
3116 
3117 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3118 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3119 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3120 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3121 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3122 		break;
3123 	case CHIP_VERDE:
3124 	default:
3125 		rdev->config.si.max_shader_engines = 1;
3126 		rdev->config.si.max_tile_pipes = 4;
3127 		rdev->config.si.max_cu_per_sh = 5;
3128 		rdev->config.si.max_sh_per_se = 2;
3129 		rdev->config.si.max_backends_per_se = 4;
3130 		rdev->config.si.max_texture_channel_caches = 4;
3131 		rdev->config.si.max_gprs = 256;
3132 		rdev->config.si.max_gs_threads = 32;
3133 		rdev->config.si.max_hw_contexts = 8;
3134 
3135 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3136 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3137 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3138 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3139 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3140 		break;
3141 	case CHIP_OLAND:
3142 		rdev->config.si.max_shader_engines = 1;
3143 		rdev->config.si.max_tile_pipes = 4;
3144 		rdev->config.si.max_cu_per_sh = 6;
3145 		rdev->config.si.max_sh_per_se = 1;
3146 		rdev->config.si.max_backends_per_se = 2;
3147 		rdev->config.si.max_texture_channel_caches = 4;
3148 		rdev->config.si.max_gprs = 256;
3149 		rdev->config.si.max_gs_threads = 16;
3150 		rdev->config.si.max_hw_contexts = 8;
3151 
3152 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3153 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3154 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3155 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3156 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3157 		break;
3158 	case CHIP_HAINAN:
3159 		rdev->config.si.max_shader_engines = 1;
3160 		rdev->config.si.max_tile_pipes = 4;
3161 		rdev->config.si.max_cu_per_sh = 5;
3162 		rdev->config.si.max_sh_per_se = 1;
3163 		rdev->config.si.max_backends_per_se = 1;
3164 		rdev->config.si.max_texture_channel_caches = 2;
3165 		rdev->config.si.max_gprs = 256;
3166 		rdev->config.si.max_gs_threads = 16;
3167 		rdev->config.si.max_hw_contexts = 8;
3168 
3169 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3170 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3171 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3172 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3173 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3174 		break;
3175 	}
3176 
3177 	/* Initialize HDP */
3178 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3179 		WREG32((0x2c14 + j), 0x00000000);
3180 		WREG32((0x2c18 + j), 0x00000000);
3181 		WREG32((0x2c1c + j), 0x00000000);
3182 		WREG32((0x2c20 + j), 0x00000000);
3183 		WREG32((0x2c24 + j), 0x00000000);
3184 	}
3185 
3186 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3187 	WREG32(SRBM_INT_CNTL, 1);
3188 	WREG32(SRBM_INT_ACK, 1);
3189 
3190 	evergreen_fix_pci_max_read_req_size(rdev);
3191 
3192 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3193 
3194 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3195 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3196 
3197 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3198 	rdev->config.si.mem_max_burst_length_bytes = 256;
3199 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3200 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3201 	if (rdev->config.si.mem_row_size_in_kb > 4)
3202 		rdev->config.si.mem_row_size_in_kb = 4;
3203 	/* XXX use MC settings? */
3204 	rdev->config.si.shader_engine_tile_size = 32;
3205 	rdev->config.si.num_gpus = 1;
3206 	rdev->config.si.multi_gpu_tile_size = 64;
3207 
3208 	/* fix up row size */
3209 	gb_addr_config &= ~ROW_SIZE_MASK;
3210 	switch (rdev->config.si.mem_row_size_in_kb) {
3211 	case 1:
3212 	default:
3213 		gb_addr_config |= ROW_SIZE(0);
3214 		break;
3215 	case 2:
3216 		gb_addr_config |= ROW_SIZE(1);
3217 		break;
3218 	case 4:
3219 		gb_addr_config |= ROW_SIZE(2);
3220 		break;
3221 	}
3222 
3223 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3224 	 * not have bank info, so create a custom tiling dword.
3225 	 * bits 3:0   num_pipes
3226 	 * bits 7:4   num_banks
3227 	 * bits 11:8  group_size
3228 	 * bits 15:12 row_size
3229 	 */
3230 	rdev->config.si.tile_config = 0;
3231 	switch (rdev->config.si.num_tile_pipes) {
3232 	case 1:
3233 		rdev->config.si.tile_config |= (0 << 0);
3234 		break;
3235 	case 2:
3236 		rdev->config.si.tile_config |= (1 << 0);
3237 		break;
3238 	case 4:
3239 		rdev->config.si.tile_config |= (2 << 0);
3240 		break;
3241 	case 8:
3242 	default:
3243 		/* XXX what about 12? */
3244 		rdev->config.si.tile_config |= (3 << 0);
3245 		break;
3246 	}
3247 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3248 	case 0: /* four banks */
3249 		rdev->config.si.tile_config |= 0 << 4;
3250 		break;
3251 	case 1: /* eight banks */
3252 		rdev->config.si.tile_config |= 1 << 4;
3253 		break;
3254 	case 2: /* sixteen banks */
3255 	default:
3256 		rdev->config.si.tile_config |= 2 << 4;
3257 		break;
3258 	}
3259 	rdev->config.si.tile_config |=
3260 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3261 	rdev->config.si.tile_config |=
3262 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3263 
3264 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3265 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3266 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3267 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3268 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3269 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3270 	if (rdev->has_uvd) {
3271 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3272 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3273 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3274 	}
3275 
3276 	si_tiling_mode_table_init(rdev);
3277 
3278 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3279 		    rdev->config.si.max_sh_per_se,
3280 		    rdev->config.si.max_backends_per_se);
3281 
3282 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3283 		     rdev->config.si.max_sh_per_se,
3284 		     rdev->config.si.max_cu_per_sh);
3285 
3286 	rdev->config.si.active_cus = 0;
3287 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3288 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3289 			rdev->config.si.active_cus +=
3290 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3291 		}
3292 	}
3293 
3294 	/* set HW defaults for 3D engine */
3295 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3296 				     ROQ_IB2_START(0x2b)));
3297 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3298 
3299 	sx_debug_1 = RREG32(SX_DEBUG_1);
3300 	WREG32(SX_DEBUG_1, sx_debug_1);
3301 
3302 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3303 
3304 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3305 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3306 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3307 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3308 
3309 	WREG32(VGT_NUM_INSTANCES, 1);
3310 
3311 	WREG32(CP_PERFMON_CNTL, 0);
3312 
3313 	WREG32(SQ_CONFIG, 0);
3314 
3315 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3316 					  FORCE_EOV_MAX_REZ_CNT(255)));
3317 
3318 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3319 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3320 
3321 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3322 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3323 
3324 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3325 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3326 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3327 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3328 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3329 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3330 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3331 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3332 
3333 	tmp = RREG32(HDP_MISC_CNTL);
3334 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3335 	WREG32(HDP_MISC_CNTL, tmp);
3336 
3337 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3338 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3339 
3340 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3341 
3342 	udelay(50);
3343 }
3344 
3345 /*
3346  * GPU scratch registers helpers function.
3347  */
si_scratch_init(struct radeon_device * rdev)3348 static void si_scratch_init(struct radeon_device *rdev)
3349 {
3350 	int i;
3351 
3352 	rdev->scratch.num_reg = 7;
3353 	rdev->scratch.reg_base = SCRATCH_REG0;
3354 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3355 		rdev->scratch.free[i] = true;
3356 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3357 	}
3358 }
3359 
si_fence_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3360 void si_fence_ring_emit(struct radeon_device *rdev,
3361 			struct radeon_fence *fence)
3362 {
3363 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3364 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3365 
3366 	/* flush read cache over gart */
3367 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3368 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3369 	radeon_ring_write(ring, 0);
3370 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3371 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3372 			  PACKET3_TC_ACTION_ENA |
3373 			  PACKET3_SH_KCACHE_ACTION_ENA |
3374 			  PACKET3_SH_ICACHE_ACTION_ENA);
3375 	radeon_ring_write(ring, 0xFFFFFFFF);
3376 	radeon_ring_write(ring, 0);
3377 	radeon_ring_write(ring, 10); /* poll interval */
3378 	/* EVENT_WRITE_EOP - flush caches, send int */
3379 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3380 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3381 	radeon_ring_write(ring, lower_32_bits(addr));
3382 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3383 	radeon_ring_write(ring, fence->seq);
3384 	radeon_ring_write(ring, 0);
3385 }
3386 
3387 /*
3388  * IB stuff
3389  */
si_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)3390 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3391 {
3392 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3393 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3394 	u32 header;
3395 
3396 	if (ib->is_const_ib) {
3397 		/* set switch buffer packet before const IB */
3398 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3399 		radeon_ring_write(ring, 0);
3400 
3401 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3402 	} else {
3403 		u32 next_rptr;
3404 		if (ring->rptr_save_reg) {
3405 			next_rptr = ring->wptr + 3 + 4 + 8;
3406 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3407 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3408 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3409 			radeon_ring_write(ring, next_rptr);
3410 		} else if (rdev->wb.enabled) {
3411 			next_rptr = ring->wptr + 5 + 4 + 8;
3412 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3413 			radeon_ring_write(ring, (1 << 8));
3414 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3415 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3416 			radeon_ring_write(ring, next_rptr);
3417 		}
3418 
3419 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3420 	}
3421 
3422 	radeon_ring_write(ring, header);
3423 	radeon_ring_write(ring,
3424 #ifdef __BIG_ENDIAN
3425 			  (2 << 0) |
3426 #endif
3427 			  (ib->gpu_addr & 0xFFFFFFFC));
3428 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3429 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3430 
3431 	if (!ib->is_const_ib) {
3432 		/* flush read cache over gart for this vmid */
3433 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3434 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3435 		radeon_ring_write(ring, vm_id);
3436 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3437 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3438 				  PACKET3_TC_ACTION_ENA |
3439 				  PACKET3_SH_KCACHE_ACTION_ENA |
3440 				  PACKET3_SH_ICACHE_ACTION_ENA);
3441 		radeon_ring_write(ring, 0xFFFFFFFF);
3442 		radeon_ring_write(ring, 0);
3443 		radeon_ring_write(ring, 10); /* poll interval */
3444 	}
3445 }
3446 
3447 /*
3448  * CP.
3449  */
si_cp_enable(struct radeon_device * rdev,bool enable)3450 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3451 {
3452 	if (enable)
3453 		WREG32(CP_ME_CNTL, 0);
3454 	else {
3455 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3456 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3457 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3458 		WREG32(SCRATCH_UMSK, 0);
3459 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3460 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3461 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3462 	}
3463 	udelay(50);
3464 }
3465 
si_cp_load_microcode(struct radeon_device * rdev)3466 static int si_cp_load_microcode(struct radeon_device *rdev)
3467 {
3468 	int i;
3469 
3470 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3471 		return -EINVAL;
3472 
3473 	si_cp_enable(rdev, false);
3474 
3475 	if (rdev->new_fw) {
3476 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3477 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3478 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3479 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3480 		const struct gfx_firmware_header_v1_0 *me_hdr =
3481 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3482 		const __le32 *fw_data;
3483 		u32 fw_size;
3484 
3485 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3486 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3487 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3488 
3489 		/* PFP */
3490 		fw_data = (const __le32 *)
3491 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3492 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3493 		WREG32(CP_PFP_UCODE_ADDR, 0);
3494 		for (i = 0; i < fw_size; i++)
3495 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3496 		WREG32(CP_PFP_UCODE_ADDR, 0);
3497 
3498 		/* CE */
3499 		fw_data = (const __le32 *)
3500 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3501 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3502 		WREG32(CP_CE_UCODE_ADDR, 0);
3503 		for (i = 0; i < fw_size; i++)
3504 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3505 		WREG32(CP_CE_UCODE_ADDR, 0);
3506 
3507 		/* ME */
3508 		fw_data = (const __be32 *)
3509 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3510 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3511 		WREG32(CP_ME_RAM_WADDR, 0);
3512 		for (i = 0; i < fw_size; i++)
3513 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3514 		WREG32(CP_ME_RAM_WADDR, 0);
3515 	} else {
3516 		const __be32 *fw_data;
3517 
3518 		/* PFP */
3519 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3520 		WREG32(CP_PFP_UCODE_ADDR, 0);
3521 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3522 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3523 		WREG32(CP_PFP_UCODE_ADDR, 0);
3524 
3525 		/* CE */
3526 		fw_data = (const __be32 *)rdev->ce_fw->data;
3527 		WREG32(CP_CE_UCODE_ADDR, 0);
3528 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3529 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3530 		WREG32(CP_CE_UCODE_ADDR, 0);
3531 
3532 		/* ME */
3533 		fw_data = (const __be32 *)rdev->me_fw->data;
3534 		WREG32(CP_ME_RAM_WADDR, 0);
3535 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3536 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3537 		WREG32(CP_ME_RAM_WADDR, 0);
3538 	}
3539 
3540 	WREG32(CP_PFP_UCODE_ADDR, 0);
3541 	WREG32(CP_CE_UCODE_ADDR, 0);
3542 	WREG32(CP_ME_RAM_WADDR, 0);
3543 	WREG32(CP_ME_RAM_RADDR, 0);
3544 	return 0;
3545 }
3546 
si_cp_start(struct radeon_device * rdev)3547 static int si_cp_start(struct radeon_device *rdev)
3548 {
3549 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3550 	int r, i;
3551 
3552 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3553 	if (r) {
3554 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3555 		return r;
3556 	}
3557 	/* init the CP */
3558 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3559 	radeon_ring_write(ring, 0x1);
3560 	radeon_ring_write(ring, 0x0);
3561 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3562 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3563 	radeon_ring_write(ring, 0);
3564 	radeon_ring_write(ring, 0);
3565 
3566 	/* init the CE partitions */
3567 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3568 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3569 	radeon_ring_write(ring, 0xc000);
3570 	radeon_ring_write(ring, 0xe000);
3571 	radeon_ring_unlock_commit(rdev, ring, false);
3572 
3573 	si_cp_enable(rdev, true);
3574 
3575 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3576 	if (r) {
3577 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3578 		return r;
3579 	}
3580 
3581 	/* setup clear context state */
3582 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3583 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3584 
3585 	for (i = 0; i < si_default_size; i++)
3586 		radeon_ring_write(ring, si_default_state[i]);
3587 
3588 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3589 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3590 
3591 	/* set clear context state */
3592 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3593 	radeon_ring_write(ring, 0);
3594 
3595 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3596 	radeon_ring_write(ring, 0x00000316);
3597 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3598 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3599 
3600 	radeon_ring_unlock_commit(rdev, ring, false);
3601 
3602 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3603 		ring = &rdev->ring[i];
3604 		r = radeon_ring_lock(rdev, ring, 2);
3605 
3606 		/* clear the compute context state */
3607 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3608 		radeon_ring_write(ring, 0);
3609 
3610 		radeon_ring_unlock_commit(rdev, ring, false);
3611 	}
3612 
3613 	return 0;
3614 }
3615 
si_cp_fini(struct radeon_device * rdev)3616 static void si_cp_fini(struct radeon_device *rdev)
3617 {
3618 	struct radeon_ring *ring;
3619 	si_cp_enable(rdev, false);
3620 
3621 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3622 	radeon_ring_fini(rdev, ring);
3623 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3624 
3625 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3626 	radeon_ring_fini(rdev, ring);
3627 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3628 
3629 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3630 	radeon_ring_fini(rdev, ring);
3631 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3632 }
3633 
si_cp_resume(struct radeon_device * rdev)3634 static int si_cp_resume(struct radeon_device *rdev)
3635 {
3636 	struct radeon_ring *ring;
3637 	u32 tmp;
3638 	u32 rb_bufsz;
3639 	int r;
3640 
3641 	si_enable_gui_idle_interrupt(rdev, false);
3642 
3643 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3644 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3645 
3646 	/* Set the write pointer delay */
3647 	WREG32(CP_RB_WPTR_DELAY, 0);
3648 
3649 	WREG32(CP_DEBUG, 0);
3650 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3651 
3652 	/* ring 0 - compute and gfx */
3653 	/* Set ring buffer size */
3654 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3655 	rb_bufsz = order_base_2(ring->ring_size / 8);
3656 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3657 #ifdef __BIG_ENDIAN
3658 	tmp |= BUF_SWAP_32BIT;
3659 #endif
3660 	WREG32(CP_RB0_CNTL, tmp);
3661 
3662 	/* Initialize the ring buffer's read and write pointers */
3663 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3664 	ring->wptr = 0;
3665 	WREG32(CP_RB0_WPTR, ring->wptr);
3666 
3667 	/* set the wb address whether it's enabled or not */
3668 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3669 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3670 
3671 	if (rdev->wb.enabled)
3672 		WREG32(SCRATCH_UMSK, 0xff);
3673 	else {
3674 		tmp |= RB_NO_UPDATE;
3675 		WREG32(SCRATCH_UMSK, 0);
3676 	}
3677 
3678 	mdelay(1);
3679 	WREG32(CP_RB0_CNTL, tmp);
3680 
3681 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3682 
3683 	/* ring1  - compute only */
3684 	/* Set ring buffer size */
3685 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3686 	rb_bufsz = order_base_2(ring->ring_size / 8);
3687 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3688 #ifdef __BIG_ENDIAN
3689 	tmp |= BUF_SWAP_32BIT;
3690 #endif
3691 	WREG32(CP_RB1_CNTL, tmp);
3692 
3693 	/* Initialize the ring buffer's read and write pointers */
3694 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3695 	ring->wptr = 0;
3696 	WREG32(CP_RB1_WPTR, ring->wptr);
3697 
3698 	/* set the wb address whether it's enabled or not */
3699 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3700 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3701 
3702 	mdelay(1);
3703 	WREG32(CP_RB1_CNTL, tmp);
3704 
3705 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3706 
3707 	/* ring2 - compute only */
3708 	/* Set ring buffer size */
3709 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3710 	rb_bufsz = order_base_2(ring->ring_size / 8);
3711 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3712 #ifdef __BIG_ENDIAN
3713 	tmp |= BUF_SWAP_32BIT;
3714 #endif
3715 	WREG32(CP_RB2_CNTL, tmp);
3716 
3717 	/* Initialize the ring buffer's read and write pointers */
3718 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3719 	ring->wptr = 0;
3720 	WREG32(CP_RB2_WPTR, ring->wptr);
3721 
3722 	/* set the wb address whether it's enabled or not */
3723 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3724 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3725 
3726 	mdelay(1);
3727 	WREG32(CP_RB2_CNTL, tmp);
3728 
3729 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3730 
3731 	/* start the rings */
3732 	si_cp_start(rdev);
3733 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3734 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3735 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3736 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3737 	if (r) {
3738 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3739 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3740 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3741 		return r;
3742 	}
3743 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3744 	if (r) {
3745 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3746 	}
3747 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3748 	if (r) {
3749 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3750 	}
3751 
3752 	si_enable_gui_idle_interrupt(rdev, true);
3753 
3754 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3755 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3756 
3757 	return 0;
3758 }
3759 
si_gpu_check_soft_reset(struct radeon_device * rdev)3760 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3761 {
3762 	u32 reset_mask = 0;
3763 	u32 tmp;
3764 
3765 	/* GRBM_STATUS */
3766 	tmp = RREG32(GRBM_STATUS);
3767 	if (tmp & (PA_BUSY | SC_BUSY |
3768 		   BCI_BUSY | SX_BUSY |
3769 		   TA_BUSY | VGT_BUSY |
3770 		   DB_BUSY | CB_BUSY |
3771 		   GDS_BUSY | SPI_BUSY |
3772 		   IA_BUSY | IA_BUSY_NO_DMA))
3773 		reset_mask |= RADEON_RESET_GFX;
3774 
3775 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3776 		   CP_BUSY | CP_COHERENCY_BUSY))
3777 		reset_mask |= RADEON_RESET_CP;
3778 
3779 	if (tmp & GRBM_EE_BUSY)
3780 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3781 
3782 	/* GRBM_STATUS2 */
3783 	tmp = RREG32(GRBM_STATUS2);
3784 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3785 		reset_mask |= RADEON_RESET_RLC;
3786 
3787 	/* DMA_STATUS_REG 0 */
3788 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3789 	if (!(tmp & DMA_IDLE))
3790 		reset_mask |= RADEON_RESET_DMA;
3791 
3792 	/* DMA_STATUS_REG 1 */
3793 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3794 	if (!(tmp & DMA_IDLE))
3795 		reset_mask |= RADEON_RESET_DMA1;
3796 
3797 	/* SRBM_STATUS2 */
3798 	tmp = RREG32(SRBM_STATUS2);
3799 	if (tmp & DMA_BUSY)
3800 		reset_mask |= RADEON_RESET_DMA;
3801 
3802 	if (tmp & DMA1_BUSY)
3803 		reset_mask |= RADEON_RESET_DMA1;
3804 
3805 	/* SRBM_STATUS */
3806 	tmp = RREG32(SRBM_STATUS);
3807 
3808 	if (tmp & IH_BUSY)
3809 		reset_mask |= RADEON_RESET_IH;
3810 
3811 	if (tmp & SEM_BUSY)
3812 		reset_mask |= RADEON_RESET_SEM;
3813 
3814 	if (tmp & GRBM_RQ_PENDING)
3815 		reset_mask |= RADEON_RESET_GRBM;
3816 
3817 	if (tmp & VMC_BUSY)
3818 		reset_mask |= RADEON_RESET_VMC;
3819 
3820 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3821 		   MCC_BUSY | MCD_BUSY))
3822 		reset_mask |= RADEON_RESET_MC;
3823 
3824 	if (evergreen_is_display_hung(rdev))
3825 		reset_mask |= RADEON_RESET_DISPLAY;
3826 
3827 	/* VM_L2_STATUS */
3828 	tmp = RREG32(VM_L2_STATUS);
3829 	if (tmp & L2_BUSY)
3830 		reset_mask |= RADEON_RESET_VMC;
3831 
3832 	/* Skip MC reset as it's mostly likely not hung, just busy */
3833 	if (reset_mask & RADEON_RESET_MC) {
3834 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3835 		reset_mask &= ~RADEON_RESET_MC;
3836 	}
3837 
3838 	return reset_mask;
3839 }
3840 
si_gpu_soft_reset(struct radeon_device * rdev,u32 reset_mask)3841 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3842 {
3843 	struct evergreen_mc_save save;
3844 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3845 	u32 tmp;
3846 
3847 	if (reset_mask == 0)
3848 		return;
3849 
3850 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3851 
3852 	evergreen_print_gpu_status_regs(rdev);
3853 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3854 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3855 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3856 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3857 
3858 	/* disable PG/CG */
3859 	si_fini_pg(rdev);
3860 	si_fini_cg(rdev);
3861 
3862 	/* stop the rlc */
3863 	si_rlc_stop(rdev);
3864 
3865 	/* Disable CP parsing/prefetching */
3866 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3867 
3868 	if (reset_mask & RADEON_RESET_DMA) {
3869 		/* dma0 */
3870 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3871 		tmp &= ~DMA_RB_ENABLE;
3872 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3873 	}
3874 	if (reset_mask & RADEON_RESET_DMA1) {
3875 		/* dma1 */
3876 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3877 		tmp &= ~DMA_RB_ENABLE;
3878 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3879 	}
3880 
3881 	udelay(50);
3882 
3883 	evergreen_mc_stop(rdev, &save);
3884 	if (evergreen_mc_wait_for_idle(rdev)) {
3885 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3886 	}
3887 
3888 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3889 		grbm_soft_reset = SOFT_RESET_CB |
3890 			SOFT_RESET_DB |
3891 			SOFT_RESET_GDS |
3892 			SOFT_RESET_PA |
3893 			SOFT_RESET_SC |
3894 			SOFT_RESET_BCI |
3895 			SOFT_RESET_SPI |
3896 			SOFT_RESET_SX |
3897 			SOFT_RESET_TC |
3898 			SOFT_RESET_TA |
3899 			SOFT_RESET_VGT |
3900 			SOFT_RESET_IA;
3901 	}
3902 
3903 	if (reset_mask & RADEON_RESET_CP) {
3904 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3905 
3906 		srbm_soft_reset |= SOFT_RESET_GRBM;
3907 	}
3908 
3909 	if (reset_mask & RADEON_RESET_DMA)
3910 		srbm_soft_reset |= SOFT_RESET_DMA;
3911 
3912 	if (reset_mask & RADEON_RESET_DMA1)
3913 		srbm_soft_reset |= SOFT_RESET_DMA1;
3914 
3915 	if (reset_mask & RADEON_RESET_DISPLAY)
3916 		srbm_soft_reset |= SOFT_RESET_DC;
3917 
3918 	if (reset_mask & RADEON_RESET_RLC)
3919 		grbm_soft_reset |= SOFT_RESET_RLC;
3920 
3921 	if (reset_mask & RADEON_RESET_SEM)
3922 		srbm_soft_reset |= SOFT_RESET_SEM;
3923 
3924 	if (reset_mask & RADEON_RESET_IH)
3925 		srbm_soft_reset |= SOFT_RESET_IH;
3926 
3927 	if (reset_mask & RADEON_RESET_GRBM)
3928 		srbm_soft_reset |= SOFT_RESET_GRBM;
3929 
3930 	if (reset_mask & RADEON_RESET_VMC)
3931 		srbm_soft_reset |= SOFT_RESET_VMC;
3932 
3933 	if (reset_mask & RADEON_RESET_MC)
3934 		srbm_soft_reset |= SOFT_RESET_MC;
3935 
3936 	if (grbm_soft_reset) {
3937 		tmp = RREG32(GRBM_SOFT_RESET);
3938 		tmp |= grbm_soft_reset;
3939 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3940 		WREG32(GRBM_SOFT_RESET, tmp);
3941 		tmp = RREG32(GRBM_SOFT_RESET);
3942 
3943 		udelay(50);
3944 
3945 		tmp &= ~grbm_soft_reset;
3946 		WREG32(GRBM_SOFT_RESET, tmp);
3947 		tmp = RREG32(GRBM_SOFT_RESET);
3948 	}
3949 
3950 	if (srbm_soft_reset) {
3951 		tmp = RREG32(SRBM_SOFT_RESET);
3952 		tmp |= srbm_soft_reset;
3953 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3954 		WREG32(SRBM_SOFT_RESET, tmp);
3955 		tmp = RREG32(SRBM_SOFT_RESET);
3956 
3957 		udelay(50);
3958 
3959 		tmp &= ~srbm_soft_reset;
3960 		WREG32(SRBM_SOFT_RESET, tmp);
3961 		tmp = RREG32(SRBM_SOFT_RESET);
3962 	}
3963 
3964 	/* Wait a little for things to settle down */
3965 	udelay(50);
3966 
3967 	evergreen_mc_resume(rdev, &save);
3968 	udelay(50);
3969 
3970 	evergreen_print_gpu_status_regs(rdev);
3971 }
3972 
si_set_clk_bypass_mode(struct radeon_device * rdev)3973 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3974 {
3975 	u32 tmp, i;
3976 
3977 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3978 	tmp |= SPLL_BYPASS_EN;
3979 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3980 
3981 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3982 	tmp |= SPLL_CTLREQ_CHG;
3983 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3984 
3985 	for (i = 0; i < rdev->usec_timeout; i++) {
3986 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3987 			break;
3988 		udelay(1);
3989 	}
3990 
3991 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3992 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3993 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3994 
3995 	tmp = RREG32(MPLL_CNTL_MODE);
3996 	tmp &= ~MPLL_MCLK_SEL;
3997 	WREG32(MPLL_CNTL_MODE, tmp);
3998 }
3999 
si_spll_powerdown(struct radeon_device * rdev)4000 static void si_spll_powerdown(struct radeon_device *rdev)
4001 {
4002 	u32 tmp;
4003 
4004 	tmp = RREG32(SPLL_CNTL_MODE);
4005 	tmp |= SPLL_SW_DIR_CONTROL;
4006 	WREG32(SPLL_CNTL_MODE, tmp);
4007 
4008 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4009 	tmp |= SPLL_RESET;
4010 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4011 
4012 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4013 	tmp |= SPLL_SLEEP;
4014 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4015 
4016 	tmp = RREG32(SPLL_CNTL_MODE);
4017 	tmp &= ~SPLL_SW_DIR_CONTROL;
4018 	WREG32(SPLL_CNTL_MODE, tmp);
4019 }
4020 
si_gpu_pci_config_reset(struct radeon_device * rdev)4021 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4022 {
4023 	struct evergreen_mc_save save;
4024 	u32 tmp, i;
4025 
4026 	dev_info(rdev->dev, "GPU pci config reset\n");
4027 
4028 	/* disable dpm? */
4029 
4030 	/* disable cg/pg */
4031 	si_fini_pg(rdev);
4032 	si_fini_cg(rdev);
4033 
4034 	/* Disable CP parsing/prefetching */
4035 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4036 	/* dma0 */
4037 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4038 	tmp &= ~DMA_RB_ENABLE;
4039 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4040 	/* dma1 */
4041 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4042 	tmp &= ~DMA_RB_ENABLE;
4043 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4044 	/* XXX other engines? */
4045 
4046 	/* halt the rlc, disable cp internal ints */
4047 	si_rlc_stop(rdev);
4048 
4049 	udelay(50);
4050 
4051 	/* disable mem access */
4052 	evergreen_mc_stop(rdev, &save);
4053 	if (evergreen_mc_wait_for_idle(rdev)) {
4054 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4055 	}
4056 
4057 	/* set mclk/sclk to bypass */
4058 	si_set_clk_bypass_mode(rdev);
4059 	/* powerdown spll */
4060 	si_spll_powerdown(rdev);
4061 	/* disable BM */
4062 	pci_clear_master(rdev->pdev);
4063 	/* reset */
4064 	radeon_pci_config_reset(rdev);
4065 	/* wait for asic to come out of reset */
4066 	for (i = 0; i < rdev->usec_timeout; i++) {
4067 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4068 			break;
4069 		udelay(1);
4070 	}
4071 }
4072 
si_asic_reset(struct radeon_device * rdev,bool hard)4073 int si_asic_reset(struct radeon_device *rdev, bool hard)
4074 {
4075 	u32 reset_mask;
4076 
4077 	if (hard) {
4078 		si_gpu_pci_config_reset(rdev);
4079 		return 0;
4080 	}
4081 
4082 	reset_mask = si_gpu_check_soft_reset(rdev);
4083 
4084 	if (reset_mask)
4085 		r600_set_bios_scratch_engine_hung(rdev, true);
4086 
4087 	/* try soft reset */
4088 	si_gpu_soft_reset(rdev, reset_mask);
4089 
4090 	reset_mask = si_gpu_check_soft_reset(rdev);
4091 
4092 	/* try pci config reset */
4093 	if (reset_mask && radeon_hard_reset)
4094 		si_gpu_pci_config_reset(rdev);
4095 
4096 	reset_mask = si_gpu_check_soft_reset(rdev);
4097 
4098 	if (!reset_mask)
4099 		r600_set_bios_scratch_engine_hung(rdev, false);
4100 
4101 	return 0;
4102 }
4103 
4104 /**
4105  * si_gfx_is_lockup - Check if the GFX engine is locked up
4106  *
4107  * @rdev: radeon_device pointer
4108  * @ring: radeon_ring structure holding ring information
4109  *
4110  * Check if the GFX engine is locked up.
4111  * Returns true if the engine appears to be locked up, false if not.
4112  */
si_gfx_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)4113 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4114 {
4115 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4116 
4117 	if (!(reset_mask & (RADEON_RESET_GFX |
4118 			    RADEON_RESET_COMPUTE |
4119 			    RADEON_RESET_CP))) {
4120 		radeon_ring_lockup_update(rdev, ring);
4121 		return false;
4122 	}
4123 	return radeon_ring_test_lockup(rdev, ring);
4124 }
4125 
4126 /* MC */
si_mc_program(struct radeon_device * rdev)4127 static void si_mc_program(struct radeon_device *rdev)
4128 {
4129 	struct evergreen_mc_save save;
4130 	u32 tmp;
4131 	int i, j;
4132 
4133 	/* Initialize HDP */
4134 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4135 		WREG32((0x2c14 + j), 0x00000000);
4136 		WREG32((0x2c18 + j), 0x00000000);
4137 		WREG32((0x2c1c + j), 0x00000000);
4138 		WREG32((0x2c20 + j), 0x00000000);
4139 		WREG32((0x2c24 + j), 0x00000000);
4140 	}
4141 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4142 
4143 	evergreen_mc_stop(rdev, &save);
4144 	if (radeon_mc_wait_for_idle(rdev)) {
4145 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4146 	}
4147 	if (!ASIC_IS_NODCE(rdev))
4148 		/* Lockout access through VGA aperture*/
4149 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4150 	/* Update configuration */
4151 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4152 	       rdev->mc.vram_start >> 12);
4153 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4154 	       rdev->mc.vram_end >> 12);
4155 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4156 	       rdev->vram_scratch.gpu_addr >> 12);
4157 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4158 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4159 	WREG32(MC_VM_FB_LOCATION, tmp);
4160 	/* XXX double check these! */
4161 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4162 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4163 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4164 	WREG32(MC_VM_AGP_BASE, 0);
4165 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4166 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4167 	if (radeon_mc_wait_for_idle(rdev)) {
4168 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4169 	}
4170 	evergreen_mc_resume(rdev, &save);
4171 	if (!ASIC_IS_NODCE(rdev)) {
4172 		/* we need to own VRAM, so turn off the VGA renderer here
4173 		 * to stop it overwriting our objects */
4174 		rv515_vga_render_disable(rdev);
4175 	}
4176 }
4177 
si_vram_gtt_location(struct radeon_device * rdev,struct radeon_mc * mc)4178 void si_vram_gtt_location(struct radeon_device *rdev,
4179 			  struct radeon_mc *mc)
4180 {
4181 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4182 		/* leave room for at least 1024M GTT */
4183 		dev_warn(rdev->dev, "limiting VRAM\n");
4184 		mc->real_vram_size = 0xFFC0000000ULL;
4185 		mc->mc_vram_size = 0xFFC0000000ULL;
4186 	}
4187 	radeon_vram_location(rdev, &rdev->mc, 0);
4188 	rdev->mc.gtt_base_align = 0;
4189 	radeon_gtt_location(rdev, mc);
4190 }
4191 
si_mc_init(struct radeon_device * rdev)4192 static int si_mc_init(struct radeon_device *rdev)
4193 {
4194 	u32 tmp;
4195 	int chansize, numchan;
4196 
4197 	/* Get VRAM informations */
4198 	rdev->mc.vram_is_ddr = true;
4199 	tmp = RREG32(MC_ARB_RAMCFG);
4200 	if (tmp & CHANSIZE_OVERRIDE) {
4201 		chansize = 16;
4202 	} else if (tmp & CHANSIZE_MASK) {
4203 		chansize = 64;
4204 	} else {
4205 		chansize = 32;
4206 	}
4207 	tmp = RREG32(MC_SHARED_CHMAP);
4208 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4209 	case 0:
4210 	default:
4211 		numchan = 1;
4212 		break;
4213 	case 1:
4214 		numchan = 2;
4215 		break;
4216 	case 2:
4217 		numchan = 4;
4218 		break;
4219 	case 3:
4220 		numchan = 8;
4221 		break;
4222 	case 4:
4223 		numchan = 3;
4224 		break;
4225 	case 5:
4226 		numchan = 6;
4227 		break;
4228 	case 6:
4229 		numchan = 10;
4230 		break;
4231 	case 7:
4232 		numchan = 12;
4233 		break;
4234 	case 8:
4235 		numchan = 16;
4236 		break;
4237 	}
4238 	rdev->mc.vram_width = numchan * chansize;
4239 	/* Could aper size report 0 ? */
4240 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4241 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4242 	/* size in MB on si */
4243 	tmp = RREG32(CONFIG_MEMSIZE);
4244 	/* some boards may have garbage in the upper 16 bits */
4245 	if (tmp & 0xffff0000) {
4246 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4247 		if (tmp & 0xffff)
4248 			tmp &= 0xffff;
4249 	}
4250 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4251 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4252 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4253 	si_vram_gtt_location(rdev, &rdev->mc);
4254 	radeon_update_bandwidth_info(rdev);
4255 
4256 	return 0;
4257 }
4258 
4259 /*
4260  * GART
4261  */
si_pcie_gart_tlb_flush(struct radeon_device * rdev)4262 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4263 {
4264 	/* flush hdp cache */
4265 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4266 
4267 	/* bits 0-15 are the VM contexts0-15 */
4268 	WREG32(VM_INVALIDATE_REQUEST, 1);
4269 }
4270 
si_pcie_gart_enable(struct radeon_device * rdev)4271 static int si_pcie_gart_enable(struct radeon_device *rdev)
4272 {
4273 	int r, i;
4274 
4275 	if (rdev->gart.robj == NULL) {
4276 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4277 		return -EINVAL;
4278 	}
4279 	r = radeon_gart_table_vram_pin(rdev);
4280 	if (r)
4281 		return r;
4282 	/* Setup TLB control */
4283 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4284 	       (0xA << 7) |
4285 	       ENABLE_L1_TLB |
4286 	       ENABLE_L1_FRAGMENT_PROCESSING |
4287 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4288 	       ENABLE_ADVANCED_DRIVER_MODEL |
4289 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4290 	/* Setup L2 cache */
4291 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4292 	       ENABLE_L2_FRAGMENT_PROCESSING |
4293 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4294 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4295 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4296 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4297 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4298 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4299 	       BANK_SELECT(4) |
4300 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4301 	/* setup context0 */
4302 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4303 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4304 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4305 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4306 			(u32)(rdev->dummy_page.addr >> 12));
4307 	WREG32(VM_CONTEXT0_CNTL2, 0);
4308 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4309 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4310 
4311 	WREG32(0x15D4, 0);
4312 	WREG32(0x15D8, 0);
4313 	WREG32(0x15DC, 0);
4314 
4315 	/* empty context1-15 */
4316 	/* set vm size, must be a multiple of 4 */
4317 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4318 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4319 	/* Assign the pt base to something valid for now; the pts used for
4320 	 * the VMs are determined by the application and setup and assigned
4321 	 * on the fly in the vm part of radeon_gart.c
4322 	 */
4323 	for (i = 1; i < 16; i++) {
4324 		if (i < 8)
4325 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4326 			       rdev->vm_manager.saved_table_addr[i]);
4327 		else
4328 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4329 			       rdev->vm_manager.saved_table_addr[i]);
4330 	}
4331 
4332 	/* enable context1-15 */
4333 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4334 	       (u32)(rdev->dummy_page.addr >> 12));
4335 	WREG32(VM_CONTEXT1_CNTL2, 4);
4336 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4337 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4338 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4339 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4340 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4341 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4342 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4343 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4344 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4345 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4346 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4347 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4348 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4349 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4350 
4351 	si_pcie_gart_tlb_flush(rdev);
4352 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4353 		 (unsigned)(rdev->mc.gtt_size >> 20),
4354 		 (unsigned long long)rdev->gart.table_addr);
4355 	rdev->gart.ready = true;
4356 	return 0;
4357 }
4358 
si_pcie_gart_disable(struct radeon_device * rdev)4359 static void si_pcie_gart_disable(struct radeon_device *rdev)
4360 {
4361 	unsigned i;
4362 
4363 	for (i = 1; i < 16; ++i) {
4364 		uint32_t reg;
4365 		if (i < 8)
4366 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4367 		else
4368 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4369 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4370 	}
4371 
4372 	/* Disable all tables */
4373 	WREG32(VM_CONTEXT0_CNTL, 0);
4374 	WREG32(VM_CONTEXT1_CNTL, 0);
4375 	/* Setup TLB control */
4376 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4377 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4378 	/* Setup L2 cache */
4379 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4380 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4381 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4382 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4383 	WREG32(VM_L2_CNTL2, 0);
4384 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4385 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4386 	radeon_gart_table_vram_unpin(rdev);
4387 }
4388 
si_pcie_gart_fini(struct radeon_device * rdev)4389 static void si_pcie_gart_fini(struct radeon_device *rdev)
4390 {
4391 	si_pcie_gart_disable(rdev);
4392 	radeon_gart_table_vram_free(rdev);
4393 	radeon_gart_fini(rdev);
4394 }
4395 
4396 /* vm parser */
si_vm_reg_valid(u32 reg)4397 static bool si_vm_reg_valid(u32 reg)
4398 {
4399 	/* context regs are fine */
4400 	if (reg >= 0x28000)
4401 		return true;
4402 
4403 	/* shader regs are also fine */
4404 	if (reg >= 0xB000 && reg < 0xC000)
4405 		return true;
4406 
4407 	/* check config regs */
4408 	switch (reg) {
4409 	case GRBM_GFX_INDEX:
4410 	case CP_STRMOUT_CNTL:
4411 	case VGT_VTX_VECT_EJECT_REG:
4412 	case VGT_CACHE_INVALIDATION:
4413 	case VGT_ESGS_RING_SIZE:
4414 	case VGT_GSVS_RING_SIZE:
4415 	case VGT_GS_VERTEX_REUSE:
4416 	case VGT_PRIMITIVE_TYPE:
4417 	case VGT_INDEX_TYPE:
4418 	case VGT_NUM_INDICES:
4419 	case VGT_NUM_INSTANCES:
4420 	case VGT_TF_RING_SIZE:
4421 	case VGT_HS_OFFCHIP_PARAM:
4422 	case VGT_TF_MEMORY_BASE:
4423 	case PA_CL_ENHANCE:
4424 	case PA_SU_LINE_STIPPLE_VALUE:
4425 	case PA_SC_LINE_STIPPLE_STATE:
4426 	case PA_SC_ENHANCE:
4427 	case SQC_CACHES:
4428 	case SPI_STATIC_THREAD_MGMT_1:
4429 	case SPI_STATIC_THREAD_MGMT_2:
4430 	case SPI_STATIC_THREAD_MGMT_3:
4431 	case SPI_PS_MAX_WAVE_ID:
4432 	case SPI_CONFIG_CNTL:
4433 	case SPI_CONFIG_CNTL_1:
4434 	case TA_CNTL_AUX:
4435 	case TA_CS_BC_BASE_ADDR:
4436 		return true;
4437 	default:
4438 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4439 		return false;
4440 	}
4441 }
4442 
si_vm_packet3_ce_check(struct radeon_device * rdev,u32 * ib,struct radeon_cs_packet * pkt)4443 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4444 				  u32 *ib, struct radeon_cs_packet *pkt)
4445 {
4446 	switch (pkt->opcode) {
4447 	case PACKET3_NOP:
4448 	case PACKET3_SET_BASE:
4449 	case PACKET3_SET_CE_DE_COUNTERS:
4450 	case PACKET3_LOAD_CONST_RAM:
4451 	case PACKET3_WRITE_CONST_RAM:
4452 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4453 	case PACKET3_DUMP_CONST_RAM:
4454 	case PACKET3_INCREMENT_CE_COUNTER:
4455 	case PACKET3_WAIT_ON_DE_COUNTER:
4456 	case PACKET3_CE_WRITE:
4457 		break;
4458 	default:
4459 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4460 		return -EINVAL;
4461 	}
4462 	return 0;
4463 }
4464 
si_vm_packet3_cp_dma_check(u32 * ib,u32 idx)4465 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4466 {
4467 	u32 start_reg, reg, i;
4468 	u32 command = ib[idx + 4];
4469 	u32 info = ib[idx + 1];
4470 	u32 idx_value = ib[idx];
4471 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4472 		/* src address space is register */
4473 		if (((info & 0x60000000) >> 29) == 0) {
4474 			start_reg = idx_value << 2;
4475 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4476 				reg = start_reg;
4477 				if (!si_vm_reg_valid(reg)) {
4478 					DRM_ERROR("CP DMA Bad SRC register\n");
4479 					return -EINVAL;
4480 				}
4481 			} else {
4482 				for (i = 0; i < (command & 0x1fffff); i++) {
4483 					reg = start_reg + (4 * i);
4484 					if (!si_vm_reg_valid(reg)) {
4485 						DRM_ERROR("CP DMA Bad SRC register\n");
4486 						return -EINVAL;
4487 					}
4488 				}
4489 			}
4490 		}
4491 	}
4492 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4493 		/* dst address space is register */
4494 		if (((info & 0x00300000) >> 20) == 0) {
4495 			start_reg = ib[idx + 2];
4496 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4497 				reg = start_reg;
4498 				if (!si_vm_reg_valid(reg)) {
4499 					DRM_ERROR("CP DMA Bad DST register\n");
4500 					return -EINVAL;
4501 				}
4502 			} else {
4503 				for (i = 0; i < (command & 0x1fffff); i++) {
4504 					reg = start_reg + (4 * i);
4505 				if (!si_vm_reg_valid(reg)) {
4506 						DRM_ERROR("CP DMA Bad DST register\n");
4507 						return -EINVAL;
4508 					}
4509 				}
4510 			}
4511 		}
4512 	}
4513 	return 0;
4514 }
4515 
si_vm_packet3_gfx_check(struct radeon_device * rdev,u32 * ib,struct radeon_cs_packet * pkt)4516 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4517 				   u32 *ib, struct radeon_cs_packet *pkt)
4518 {
4519 	int r;
4520 	u32 idx = pkt->idx + 1;
4521 	u32 idx_value = ib[idx];
4522 	u32 start_reg, end_reg, reg, i;
4523 
4524 	switch (pkt->opcode) {
4525 	case PACKET3_NOP:
4526 	case PACKET3_SET_BASE:
4527 	case PACKET3_CLEAR_STATE:
4528 	case PACKET3_INDEX_BUFFER_SIZE:
4529 	case PACKET3_DISPATCH_DIRECT:
4530 	case PACKET3_DISPATCH_INDIRECT:
4531 	case PACKET3_ALLOC_GDS:
4532 	case PACKET3_WRITE_GDS_RAM:
4533 	case PACKET3_ATOMIC_GDS:
4534 	case PACKET3_ATOMIC:
4535 	case PACKET3_OCCLUSION_QUERY:
4536 	case PACKET3_SET_PREDICATION:
4537 	case PACKET3_COND_EXEC:
4538 	case PACKET3_PRED_EXEC:
4539 	case PACKET3_DRAW_INDIRECT:
4540 	case PACKET3_DRAW_INDEX_INDIRECT:
4541 	case PACKET3_INDEX_BASE:
4542 	case PACKET3_DRAW_INDEX_2:
4543 	case PACKET3_CONTEXT_CONTROL:
4544 	case PACKET3_INDEX_TYPE:
4545 	case PACKET3_DRAW_INDIRECT_MULTI:
4546 	case PACKET3_DRAW_INDEX_AUTO:
4547 	case PACKET3_DRAW_INDEX_IMMD:
4548 	case PACKET3_NUM_INSTANCES:
4549 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4550 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4551 	case PACKET3_DRAW_INDEX_OFFSET_2:
4552 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4553 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4554 	case PACKET3_MPEG_INDEX:
4555 	case PACKET3_WAIT_REG_MEM:
4556 	case PACKET3_MEM_WRITE:
4557 	case PACKET3_PFP_SYNC_ME:
4558 	case PACKET3_SURFACE_SYNC:
4559 	case PACKET3_EVENT_WRITE:
4560 	case PACKET3_EVENT_WRITE_EOP:
4561 	case PACKET3_EVENT_WRITE_EOS:
4562 	case PACKET3_SET_CONTEXT_REG:
4563 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4564 	case PACKET3_SET_SH_REG:
4565 	case PACKET3_SET_SH_REG_OFFSET:
4566 	case PACKET3_INCREMENT_DE_COUNTER:
4567 	case PACKET3_WAIT_ON_CE_COUNTER:
4568 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4569 	case PACKET3_ME_WRITE:
4570 		break;
4571 	case PACKET3_COPY_DATA:
4572 		if ((idx_value & 0xf00) == 0) {
4573 			reg = ib[idx + 3] * 4;
4574 			if (!si_vm_reg_valid(reg))
4575 				return -EINVAL;
4576 		}
4577 		break;
4578 	case PACKET3_WRITE_DATA:
4579 		if ((idx_value & 0xf00) == 0) {
4580 			start_reg = ib[idx + 1] * 4;
4581 			if (idx_value & 0x10000) {
4582 				if (!si_vm_reg_valid(start_reg))
4583 					return -EINVAL;
4584 			} else {
4585 				for (i = 0; i < (pkt->count - 2); i++) {
4586 					reg = start_reg + (4 * i);
4587 					if (!si_vm_reg_valid(reg))
4588 						return -EINVAL;
4589 				}
4590 			}
4591 		}
4592 		break;
4593 	case PACKET3_COND_WRITE:
4594 		if (idx_value & 0x100) {
4595 			reg = ib[idx + 5] * 4;
4596 			if (!si_vm_reg_valid(reg))
4597 				return -EINVAL;
4598 		}
4599 		break;
4600 	case PACKET3_COPY_DW:
4601 		if (idx_value & 0x2) {
4602 			reg = ib[idx + 3] * 4;
4603 			if (!si_vm_reg_valid(reg))
4604 				return -EINVAL;
4605 		}
4606 		break;
4607 	case PACKET3_SET_CONFIG_REG:
4608 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4609 		end_reg = 4 * pkt->count + start_reg - 4;
4610 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4611 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4612 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4613 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4614 			return -EINVAL;
4615 		}
4616 		for (i = 0; i < pkt->count; i++) {
4617 			reg = start_reg + (4 * i);
4618 			if (!si_vm_reg_valid(reg))
4619 				return -EINVAL;
4620 		}
4621 		break;
4622 	case PACKET3_CP_DMA:
4623 		r = si_vm_packet3_cp_dma_check(ib, idx);
4624 		if (r)
4625 			return r;
4626 		break;
4627 	default:
4628 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4629 		return -EINVAL;
4630 	}
4631 	return 0;
4632 }
4633 
si_vm_packet3_compute_check(struct radeon_device * rdev,u32 * ib,struct radeon_cs_packet * pkt)4634 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4635 				       u32 *ib, struct radeon_cs_packet *pkt)
4636 {
4637 	int r;
4638 	u32 idx = pkt->idx + 1;
4639 	u32 idx_value = ib[idx];
4640 	u32 start_reg, reg, i;
4641 
4642 	switch (pkt->opcode) {
4643 	case PACKET3_NOP:
4644 	case PACKET3_SET_BASE:
4645 	case PACKET3_CLEAR_STATE:
4646 	case PACKET3_DISPATCH_DIRECT:
4647 	case PACKET3_DISPATCH_INDIRECT:
4648 	case PACKET3_ALLOC_GDS:
4649 	case PACKET3_WRITE_GDS_RAM:
4650 	case PACKET3_ATOMIC_GDS:
4651 	case PACKET3_ATOMIC:
4652 	case PACKET3_OCCLUSION_QUERY:
4653 	case PACKET3_SET_PREDICATION:
4654 	case PACKET3_COND_EXEC:
4655 	case PACKET3_PRED_EXEC:
4656 	case PACKET3_CONTEXT_CONTROL:
4657 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4658 	case PACKET3_WAIT_REG_MEM:
4659 	case PACKET3_MEM_WRITE:
4660 	case PACKET3_PFP_SYNC_ME:
4661 	case PACKET3_SURFACE_SYNC:
4662 	case PACKET3_EVENT_WRITE:
4663 	case PACKET3_EVENT_WRITE_EOP:
4664 	case PACKET3_EVENT_WRITE_EOS:
4665 	case PACKET3_SET_CONTEXT_REG:
4666 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4667 	case PACKET3_SET_SH_REG:
4668 	case PACKET3_SET_SH_REG_OFFSET:
4669 	case PACKET3_INCREMENT_DE_COUNTER:
4670 	case PACKET3_WAIT_ON_CE_COUNTER:
4671 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4672 	case PACKET3_ME_WRITE:
4673 		break;
4674 	case PACKET3_COPY_DATA:
4675 		if ((idx_value & 0xf00) == 0) {
4676 			reg = ib[idx + 3] * 4;
4677 			if (!si_vm_reg_valid(reg))
4678 				return -EINVAL;
4679 		}
4680 		break;
4681 	case PACKET3_WRITE_DATA:
4682 		if ((idx_value & 0xf00) == 0) {
4683 			start_reg = ib[idx + 1] * 4;
4684 			if (idx_value & 0x10000) {
4685 				if (!si_vm_reg_valid(start_reg))
4686 					return -EINVAL;
4687 			} else {
4688 				for (i = 0; i < (pkt->count - 2); i++) {
4689 					reg = start_reg + (4 * i);
4690 					if (!si_vm_reg_valid(reg))
4691 						return -EINVAL;
4692 				}
4693 			}
4694 		}
4695 		break;
4696 	case PACKET3_COND_WRITE:
4697 		if (idx_value & 0x100) {
4698 			reg = ib[idx + 5] * 4;
4699 			if (!si_vm_reg_valid(reg))
4700 				return -EINVAL;
4701 		}
4702 		break;
4703 	case PACKET3_COPY_DW:
4704 		if (idx_value & 0x2) {
4705 			reg = ib[idx + 3] * 4;
4706 			if (!si_vm_reg_valid(reg))
4707 				return -EINVAL;
4708 		}
4709 		break;
4710 	case PACKET3_CP_DMA:
4711 		r = si_vm_packet3_cp_dma_check(ib, idx);
4712 		if (r)
4713 			return r;
4714 		break;
4715 	default:
4716 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4717 		return -EINVAL;
4718 	}
4719 	return 0;
4720 }
4721 
si_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)4722 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4723 {
4724 	int ret = 0;
4725 	u32 idx = 0, i;
4726 	struct radeon_cs_packet pkt;
4727 
4728 	do {
4729 		pkt.idx = idx;
4730 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4731 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4732 		pkt.one_reg_wr = 0;
4733 		switch (pkt.type) {
4734 		case RADEON_PACKET_TYPE0:
4735 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4736 			ret = -EINVAL;
4737 			break;
4738 		case RADEON_PACKET_TYPE2:
4739 			idx += 1;
4740 			break;
4741 		case RADEON_PACKET_TYPE3:
4742 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4743 			if (ib->is_const_ib)
4744 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4745 			else {
4746 				switch (ib->ring) {
4747 				case RADEON_RING_TYPE_GFX_INDEX:
4748 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4749 					break;
4750 				case CAYMAN_RING_TYPE_CP1_INDEX:
4751 				case CAYMAN_RING_TYPE_CP2_INDEX:
4752 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4753 					break;
4754 				default:
4755 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4756 					ret = -EINVAL;
4757 					break;
4758 				}
4759 			}
4760 			idx += pkt.count + 2;
4761 			break;
4762 		default:
4763 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4764 			ret = -EINVAL;
4765 			break;
4766 		}
4767 		if (ret) {
4768 			for (i = 0; i < ib->length_dw; i++) {
4769 				if (i == idx)
4770 					printk("\t0x%08x <---\n", ib->ptr[i]);
4771 				else
4772 					printk("\t0x%08x\n", ib->ptr[i]);
4773 			}
4774 			break;
4775 		}
4776 	} while (idx < ib->length_dw);
4777 
4778 	return ret;
4779 }
4780 
4781 /*
4782  * vm
4783  */
si_vm_init(struct radeon_device * rdev)4784 int si_vm_init(struct radeon_device *rdev)
4785 {
4786 	/* number of VMs */
4787 	rdev->vm_manager.nvm = 16;
4788 	/* base offset of vram pages */
4789 	rdev->vm_manager.vram_base_offset = 0;
4790 
4791 	return 0;
4792 }
4793 
si_vm_fini(struct radeon_device * rdev)4794 void si_vm_fini(struct radeon_device *rdev)
4795 {
4796 }
4797 
4798 /**
4799  * si_vm_decode_fault - print human readable fault info
4800  *
4801  * @rdev: radeon_device pointer
4802  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4803  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4804  *
4805  * Print human readable fault information (SI).
4806  */
si_vm_decode_fault(struct radeon_device * rdev,u32 status,u32 addr)4807 static void si_vm_decode_fault(struct radeon_device *rdev,
4808 			       u32 status, u32 addr)
4809 {
4810 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4811 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4812 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4813 	char *block;
4814 
4815 	if (rdev->family == CHIP_TAHITI) {
4816 		switch (mc_id) {
4817 		case 160:
4818 		case 144:
4819 		case 96:
4820 		case 80:
4821 		case 224:
4822 		case 208:
4823 		case 32:
4824 		case 16:
4825 			block = "CB";
4826 			break;
4827 		case 161:
4828 		case 145:
4829 		case 97:
4830 		case 81:
4831 		case 225:
4832 		case 209:
4833 		case 33:
4834 		case 17:
4835 			block = "CB_FMASK";
4836 			break;
4837 		case 162:
4838 		case 146:
4839 		case 98:
4840 		case 82:
4841 		case 226:
4842 		case 210:
4843 		case 34:
4844 		case 18:
4845 			block = "CB_CMASK";
4846 			break;
4847 		case 163:
4848 		case 147:
4849 		case 99:
4850 		case 83:
4851 		case 227:
4852 		case 211:
4853 		case 35:
4854 		case 19:
4855 			block = "CB_IMMED";
4856 			break;
4857 		case 164:
4858 		case 148:
4859 		case 100:
4860 		case 84:
4861 		case 228:
4862 		case 212:
4863 		case 36:
4864 		case 20:
4865 			block = "DB";
4866 			break;
4867 		case 165:
4868 		case 149:
4869 		case 101:
4870 		case 85:
4871 		case 229:
4872 		case 213:
4873 		case 37:
4874 		case 21:
4875 			block = "DB_HTILE";
4876 			break;
4877 		case 167:
4878 		case 151:
4879 		case 103:
4880 		case 87:
4881 		case 231:
4882 		case 215:
4883 		case 39:
4884 		case 23:
4885 			block = "DB_STEN";
4886 			break;
4887 		case 72:
4888 		case 68:
4889 		case 64:
4890 		case 8:
4891 		case 4:
4892 		case 0:
4893 		case 136:
4894 		case 132:
4895 		case 128:
4896 		case 200:
4897 		case 196:
4898 		case 192:
4899 			block = "TC";
4900 			break;
4901 		case 112:
4902 		case 48:
4903 			block = "CP";
4904 			break;
4905 		case 49:
4906 		case 177:
4907 		case 50:
4908 		case 178:
4909 			block = "SH";
4910 			break;
4911 		case 53:
4912 		case 190:
4913 			block = "VGT";
4914 			break;
4915 		case 117:
4916 			block = "IH";
4917 			break;
4918 		case 51:
4919 		case 115:
4920 			block = "RLC";
4921 			break;
4922 		case 119:
4923 		case 183:
4924 			block = "DMA0";
4925 			break;
4926 		case 61:
4927 			block = "DMA1";
4928 			break;
4929 		case 248:
4930 		case 120:
4931 			block = "HDP";
4932 			break;
4933 		default:
4934 			block = "unknown";
4935 			break;
4936 		}
4937 	} else {
4938 		switch (mc_id) {
4939 		case 32:
4940 		case 16:
4941 		case 96:
4942 		case 80:
4943 		case 160:
4944 		case 144:
4945 		case 224:
4946 		case 208:
4947 			block = "CB";
4948 			break;
4949 		case 33:
4950 		case 17:
4951 		case 97:
4952 		case 81:
4953 		case 161:
4954 		case 145:
4955 		case 225:
4956 		case 209:
4957 			block = "CB_FMASK";
4958 			break;
4959 		case 34:
4960 		case 18:
4961 		case 98:
4962 		case 82:
4963 		case 162:
4964 		case 146:
4965 		case 226:
4966 		case 210:
4967 			block = "CB_CMASK";
4968 			break;
4969 		case 35:
4970 		case 19:
4971 		case 99:
4972 		case 83:
4973 		case 163:
4974 		case 147:
4975 		case 227:
4976 		case 211:
4977 			block = "CB_IMMED";
4978 			break;
4979 		case 36:
4980 		case 20:
4981 		case 100:
4982 		case 84:
4983 		case 164:
4984 		case 148:
4985 		case 228:
4986 		case 212:
4987 			block = "DB";
4988 			break;
4989 		case 37:
4990 		case 21:
4991 		case 101:
4992 		case 85:
4993 		case 165:
4994 		case 149:
4995 		case 229:
4996 		case 213:
4997 			block = "DB_HTILE";
4998 			break;
4999 		case 39:
5000 		case 23:
5001 		case 103:
5002 		case 87:
5003 		case 167:
5004 		case 151:
5005 		case 231:
5006 		case 215:
5007 			block = "DB_STEN";
5008 			break;
5009 		case 72:
5010 		case 68:
5011 		case 8:
5012 		case 4:
5013 		case 136:
5014 		case 132:
5015 		case 200:
5016 		case 196:
5017 			block = "TC";
5018 			break;
5019 		case 112:
5020 		case 48:
5021 			block = "CP";
5022 			break;
5023 		case 49:
5024 		case 177:
5025 		case 50:
5026 		case 178:
5027 			block = "SH";
5028 			break;
5029 		case 53:
5030 			block = "VGT";
5031 			break;
5032 		case 117:
5033 			block = "IH";
5034 			break;
5035 		case 51:
5036 		case 115:
5037 			block = "RLC";
5038 			break;
5039 		case 119:
5040 		case 183:
5041 			block = "DMA0";
5042 			break;
5043 		case 61:
5044 			block = "DMA1";
5045 			break;
5046 		case 248:
5047 		case 120:
5048 			block = "HDP";
5049 			break;
5050 		default:
5051 			block = "unknown";
5052 			break;
5053 		}
5054 	}
5055 
5056 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5057 	       protections, vmid, addr,
5058 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5059 	       block, mc_id);
5060 }
5061 
si_vm_flush(struct radeon_device * rdev,struct radeon_ring * ring,unsigned vm_id,uint64_t pd_addr)5062 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5063 		 unsigned vm_id, uint64_t pd_addr)
5064 {
5065 	/* write new base address */
5066 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5067 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5068 				 WRITE_DATA_DST_SEL(0)));
5069 
5070 	if (vm_id < 8) {
5071 		radeon_ring_write(ring,
5072 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5073 	} else {
5074 		radeon_ring_write(ring,
5075 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5076 	}
5077 	radeon_ring_write(ring, 0);
5078 	radeon_ring_write(ring, pd_addr >> 12);
5079 
5080 	/* flush hdp cache */
5081 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5082 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5083 				 WRITE_DATA_DST_SEL(0)));
5084 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5085 	radeon_ring_write(ring, 0);
5086 	radeon_ring_write(ring, 0x1);
5087 
5088 	/* bits 0-15 are the VM contexts0-15 */
5089 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5090 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5091 				 WRITE_DATA_DST_SEL(0)));
5092 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5093 	radeon_ring_write(ring, 0);
5094 	radeon_ring_write(ring, 1 << vm_id);
5095 
5096 	/* wait for the invalidate to complete */
5097 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5098 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5099 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5100 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5101 	radeon_ring_write(ring, 0);
5102 	radeon_ring_write(ring, 0); /* ref */
5103 	radeon_ring_write(ring, 0); /* mask */
5104 	radeon_ring_write(ring, 0x20); /* poll interval */
5105 
5106 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5107 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5108 	radeon_ring_write(ring, 0x0);
5109 }
5110 
5111 /*
5112  *  Power and clock gating
5113  */
si_wait_for_rlc_serdes(struct radeon_device * rdev)5114 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5115 {
5116 	int i;
5117 
5118 	for (i = 0; i < rdev->usec_timeout; i++) {
5119 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5120 			break;
5121 		udelay(1);
5122 	}
5123 
5124 	for (i = 0; i < rdev->usec_timeout; i++) {
5125 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5126 			break;
5127 		udelay(1);
5128 	}
5129 }
5130 
si_enable_gui_idle_interrupt(struct radeon_device * rdev,bool enable)5131 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5132 					 bool enable)
5133 {
5134 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5135 	u32 mask;
5136 	int i;
5137 
5138 	if (enable)
5139 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5140 	else
5141 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5142 	WREG32(CP_INT_CNTL_RING0, tmp);
5143 
5144 	if (!enable) {
5145 		/* read a gfx register */
5146 		tmp = RREG32(DB_DEPTH_INFO);
5147 
5148 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5149 		for (i = 0; i < rdev->usec_timeout; i++) {
5150 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5151 				break;
5152 			udelay(1);
5153 		}
5154 	}
5155 }
5156 
si_set_uvd_dcm(struct radeon_device * rdev,bool sw_mode)5157 static void si_set_uvd_dcm(struct radeon_device *rdev,
5158 			   bool sw_mode)
5159 {
5160 	u32 tmp, tmp2;
5161 
5162 	tmp = RREG32(UVD_CGC_CTRL);
5163 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5164 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5165 
5166 	if (sw_mode) {
5167 		tmp &= ~0x7ffff800;
5168 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5169 	} else {
5170 		tmp |= 0x7ffff800;
5171 		tmp2 = 0;
5172 	}
5173 
5174 	WREG32(UVD_CGC_CTRL, tmp);
5175 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5176 }
5177 
si_init_uvd_internal_cg(struct radeon_device * rdev)5178 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5179 {
5180 	bool hw_mode = true;
5181 
5182 	if (hw_mode) {
5183 		si_set_uvd_dcm(rdev, false);
5184 	} else {
5185 		u32 tmp = RREG32(UVD_CGC_CTRL);
5186 		tmp &= ~DCM;
5187 		WREG32(UVD_CGC_CTRL, tmp);
5188 	}
5189 }
5190 
si_halt_rlc(struct radeon_device * rdev)5191 static u32 si_halt_rlc(struct radeon_device *rdev)
5192 {
5193 	u32 data, orig;
5194 
5195 	orig = data = RREG32(RLC_CNTL);
5196 
5197 	if (data & RLC_ENABLE) {
5198 		data &= ~RLC_ENABLE;
5199 		WREG32(RLC_CNTL, data);
5200 
5201 		si_wait_for_rlc_serdes(rdev);
5202 	}
5203 
5204 	return orig;
5205 }
5206 
si_update_rlc(struct radeon_device * rdev,u32 rlc)5207 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5208 {
5209 	u32 tmp;
5210 
5211 	tmp = RREG32(RLC_CNTL);
5212 	if (tmp != rlc)
5213 		WREG32(RLC_CNTL, rlc);
5214 }
5215 
si_enable_dma_pg(struct radeon_device * rdev,bool enable)5216 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5217 {
5218 	u32 data, orig;
5219 
5220 	orig = data = RREG32(DMA_PG);
5221 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5222 		data |= PG_CNTL_ENABLE;
5223 	else
5224 		data &= ~PG_CNTL_ENABLE;
5225 	if (orig != data)
5226 		WREG32(DMA_PG, data);
5227 }
5228 
si_init_dma_pg(struct radeon_device * rdev)5229 static void si_init_dma_pg(struct radeon_device *rdev)
5230 {
5231 	u32 tmp;
5232 
5233 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5234 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5235 
5236 	for (tmp = 0; tmp < 5; tmp++)
5237 		WREG32(DMA_PGFSM_WRITE, 0);
5238 }
5239 
si_enable_gfx_cgpg(struct radeon_device * rdev,bool enable)5240 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5241 			       bool enable)
5242 {
5243 	u32 tmp;
5244 
5245 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5246 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5247 		WREG32(RLC_TTOP_D, tmp);
5248 
5249 		tmp = RREG32(RLC_PG_CNTL);
5250 		tmp |= GFX_PG_ENABLE;
5251 		WREG32(RLC_PG_CNTL, tmp);
5252 
5253 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5254 		tmp |= AUTO_PG_EN;
5255 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5256 	} else {
5257 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5258 		tmp &= ~AUTO_PG_EN;
5259 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5260 
5261 		tmp = RREG32(DB_RENDER_CONTROL);
5262 	}
5263 }
5264 
si_init_gfx_cgpg(struct radeon_device * rdev)5265 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5266 {
5267 	u32 tmp;
5268 
5269 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5270 
5271 	tmp = RREG32(RLC_PG_CNTL);
5272 	tmp |= GFX_PG_SRC;
5273 	WREG32(RLC_PG_CNTL, tmp);
5274 
5275 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5276 
5277 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5278 
5279 	tmp &= ~GRBM_REG_SGIT_MASK;
5280 	tmp |= GRBM_REG_SGIT(0x700);
5281 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5282 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5283 }
5284 
si_get_cu_active_bitmap(struct radeon_device * rdev,u32 se,u32 sh)5285 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5286 {
5287 	u32 mask = 0, tmp, tmp1;
5288 	int i;
5289 
5290 	si_select_se_sh(rdev, se, sh);
5291 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5292 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5293 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5294 
5295 	tmp &= 0xffff0000;
5296 
5297 	tmp |= tmp1;
5298 	tmp >>= 16;
5299 
5300 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5301 		mask <<= 1;
5302 		mask |= 1;
5303 	}
5304 
5305 	return (~tmp) & mask;
5306 }
5307 
si_init_ao_cu_mask(struct radeon_device * rdev)5308 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5309 {
5310 	u32 i, j, k, active_cu_number = 0;
5311 	u32 mask, counter, cu_bitmap;
5312 	u32 tmp = 0;
5313 
5314 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5315 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5316 			mask = 1;
5317 			cu_bitmap = 0;
5318 			counter  = 0;
5319 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5320 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5321 					if (counter < 2)
5322 						cu_bitmap |= mask;
5323 					counter++;
5324 				}
5325 				mask <<= 1;
5326 			}
5327 
5328 			active_cu_number += counter;
5329 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5330 		}
5331 	}
5332 
5333 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5334 
5335 	tmp = RREG32(RLC_MAX_PG_CU);
5336 	tmp &= ~MAX_PU_CU_MASK;
5337 	tmp |= MAX_PU_CU(active_cu_number);
5338 	WREG32(RLC_MAX_PG_CU, tmp);
5339 }
5340 
si_enable_cgcg(struct radeon_device * rdev,bool enable)5341 static void si_enable_cgcg(struct radeon_device *rdev,
5342 			   bool enable)
5343 {
5344 	u32 data, orig, tmp;
5345 
5346 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5347 
5348 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5349 		si_enable_gui_idle_interrupt(rdev, true);
5350 
5351 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5352 
5353 		tmp = si_halt_rlc(rdev);
5354 
5355 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5356 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5357 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5358 
5359 		si_wait_for_rlc_serdes(rdev);
5360 
5361 		si_update_rlc(rdev, tmp);
5362 
5363 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5364 
5365 		data |= CGCG_EN | CGLS_EN;
5366 	} else {
5367 		si_enable_gui_idle_interrupt(rdev, false);
5368 
5369 		RREG32(CB_CGTT_SCLK_CTRL);
5370 		RREG32(CB_CGTT_SCLK_CTRL);
5371 		RREG32(CB_CGTT_SCLK_CTRL);
5372 		RREG32(CB_CGTT_SCLK_CTRL);
5373 
5374 		data &= ~(CGCG_EN | CGLS_EN);
5375 	}
5376 
5377 	if (orig != data)
5378 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5379 }
5380 
si_enable_mgcg(struct radeon_device * rdev,bool enable)5381 static void si_enable_mgcg(struct radeon_device *rdev,
5382 			   bool enable)
5383 {
5384 	u32 data, orig, tmp = 0;
5385 
5386 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5387 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5388 		data = 0x96940200;
5389 		if (orig != data)
5390 			WREG32(CGTS_SM_CTRL_REG, data);
5391 
5392 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5393 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5394 			data |= CP_MEM_LS_EN;
5395 			if (orig != data)
5396 				WREG32(CP_MEM_SLP_CNTL, data);
5397 		}
5398 
5399 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5400 		data &= 0xffffffc0;
5401 		if (orig != data)
5402 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5403 
5404 		tmp = si_halt_rlc(rdev);
5405 
5406 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5407 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5408 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5409 
5410 		si_update_rlc(rdev, tmp);
5411 	} else {
5412 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5413 		data |= 0x00000003;
5414 		if (orig != data)
5415 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5416 
5417 		data = RREG32(CP_MEM_SLP_CNTL);
5418 		if (data & CP_MEM_LS_EN) {
5419 			data &= ~CP_MEM_LS_EN;
5420 			WREG32(CP_MEM_SLP_CNTL, data);
5421 		}
5422 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5423 		data |= LS_OVERRIDE | OVERRIDE;
5424 		if (orig != data)
5425 			WREG32(CGTS_SM_CTRL_REG, data);
5426 
5427 		tmp = si_halt_rlc(rdev);
5428 
5429 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5430 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5431 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5432 
5433 		si_update_rlc(rdev, tmp);
5434 	}
5435 }
5436 
si_enable_uvd_mgcg(struct radeon_device * rdev,bool enable)5437 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5438 			       bool enable)
5439 {
5440 	u32 orig, data, tmp;
5441 
5442 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5443 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5444 		tmp |= 0x3fff;
5445 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5446 
5447 		orig = data = RREG32(UVD_CGC_CTRL);
5448 		data |= DCM;
5449 		if (orig != data)
5450 			WREG32(UVD_CGC_CTRL, data);
5451 
5452 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5453 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5454 	} else {
5455 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5456 		tmp &= ~0x3fff;
5457 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5458 
5459 		orig = data = RREG32(UVD_CGC_CTRL);
5460 		data &= ~DCM;
5461 		if (orig != data)
5462 			WREG32(UVD_CGC_CTRL, data);
5463 
5464 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5465 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5466 	}
5467 }
5468 
5469 static const u32 mc_cg_registers[] =
5470 {
5471 	MC_HUB_MISC_HUB_CG,
5472 	MC_HUB_MISC_SIP_CG,
5473 	MC_HUB_MISC_VM_CG,
5474 	MC_XPB_CLK_GAT,
5475 	ATC_MISC_CG,
5476 	MC_CITF_MISC_WR_CG,
5477 	MC_CITF_MISC_RD_CG,
5478 	MC_CITF_MISC_VM_CG,
5479 	VM_L2_CG,
5480 };
5481 
si_enable_mc_ls(struct radeon_device * rdev,bool enable)5482 static void si_enable_mc_ls(struct radeon_device *rdev,
5483 			    bool enable)
5484 {
5485 	int i;
5486 	u32 orig, data;
5487 
5488 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5489 		orig = data = RREG32(mc_cg_registers[i]);
5490 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5491 			data |= MC_LS_ENABLE;
5492 		else
5493 			data &= ~MC_LS_ENABLE;
5494 		if (data != orig)
5495 			WREG32(mc_cg_registers[i], data);
5496 	}
5497 }
5498 
si_enable_mc_mgcg(struct radeon_device * rdev,bool enable)5499 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5500 			       bool enable)
5501 {
5502 	int i;
5503 	u32 orig, data;
5504 
5505 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5506 		orig = data = RREG32(mc_cg_registers[i]);
5507 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5508 			data |= MC_CG_ENABLE;
5509 		else
5510 			data &= ~MC_CG_ENABLE;
5511 		if (data != orig)
5512 			WREG32(mc_cg_registers[i], data);
5513 	}
5514 }
5515 
si_enable_dma_mgcg(struct radeon_device * rdev,bool enable)5516 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5517 			       bool enable)
5518 {
5519 	u32 orig, data, offset;
5520 	int i;
5521 
5522 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5523 		for (i = 0; i < 2; i++) {
5524 			if (i == 0)
5525 				offset = DMA0_REGISTER_OFFSET;
5526 			else
5527 				offset = DMA1_REGISTER_OFFSET;
5528 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5529 			data &= ~MEM_POWER_OVERRIDE;
5530 			if (data != orig)
5531 				WREG32(DMA_POWER_CNTL + offset, data);
5532 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5533 		}
5534 	} else {
5535 		for (i = 0; i < 2; i++) {
5536 			if (i == 0)
5537 				offset = DMA0_REGISTER_OFFSET;
5538 			else
5539 				offset = DMA1_REGISTER_OFFSET;
5540 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5541 			data |= MEM_POWER_OVERRIDE;
5542 			if (data != orig)
5543 				WREG32(DMA_POWER_CNTL + offset, data);
5544 
5545 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5546 			data = 0xff000000;
5547 			if (data != orig)
5548 				WREG32(DMA_CLK_CTRL + offset, data);
5549 		}
5550 	}
5551 }
5552 
si_enable_bif_mgls(struct radeon_device * rdev,bool enable)5553 static void si_enable_bif_mgls(struct radeon_device *rdev,
5554 			       bool enable)
5555 {
5556 	u32 orig, data;
5557 
5558 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5559 
5560 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5561 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5562 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5563 	else
5564 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5565 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5566 
5567 	if (orig != data)
5568 		WREG32_PCIE(PCIE_CNTL2, data);
5569 }
5570 
si_enable_hdp_mgcg(struct radeon_device * rdev,bool enable)5571 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5572 			       bool enable)
5573 {
5574 	u32 orig, data;
5575 
5576 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5577 
5578 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5579 		data &= ~CLOCK_GATING_DIS;
5580 	else
5581 		data |= CLOCK_GATING_DIS;
5582 
5583 	if (orig != data)
5584 		WREG32(HDP_HOST_PATH_CNTL, data);
5585 }
5586 
si_enable_hdp_ls(struct radeon_device * rdev,bool enable)5587 static void si_enable_hdp_ls(struct radeon_device *rdev,
5588 			     bool enable)
5589 {
5590 	u32 orig, data;
5591 
5592 	orig = data = RREG32(HDP_MEM_POWER_LS);
5593 
5594 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5595 		data |= HDP_LS_ENABLE;
5596 	else
5597 		data &= ~HDP_LS_ENABLE;
5598 
5599 	if (orig != data)
5600 		WREG32(HDP_MEM_POWER_LS, data);
5601 }
5602 
si_update_cg(struct radeon_device * rdev,u32 block,bool enable)5603 static void si_update_cg(struct radeon_device *rdev,
5604 			 u32 block, bool enable)
5605 {
5606 	if (block & RADEON_CG_BLOCK_GFX) {
5607 		si_enable_gui_idle_interrupt(rdev, false);
5608 		/* order matters! */
5609 		if (enable) {
5610 			si_enable_mgcg(rdev, true);
5611 			si_enable_cgcg(rdev, true);
5612 		} else {
5613 			si_enable_cgcg(rdev, false);
5614 			si_enable_mgcg(rdev, false);
5615 		}
5616 		si_enable_gui_idle_interrupt(rdev, true);
5617 	}
5618 
5619 	if (block & RADEON_CG_BLOCK_MC) {
5620 		si_enable_mc_mgcg(rdev, enable);
5621 		si_enable_mc_ls(rdev, enable);
5622 	}
5623 
5624 	if (block & RADEON_CG_BLOCK_SDMA) {
5625 		si_enable_dma_mgcg(rdev, enable);
5626 	}
5627 
5628 	if (block & RADEON_CG_BLOCK_BIF) {
5629 		si_enable_bif_mgls(rdev, enable);
5630 	}
5631 
5632 	if (block & RADEON_CG_BLOCK_UVD) {
5633 		if (rdev->has_uvd) {
5634 			si_enable_uvd_mgcg(rdev, enable);
5635 		}
5636 	}
5637 
5638 	if (block & RADEON_CG_BLOCK_HDP) {
5639 		si_enable_hdp_mgcg(rdev, enable);
5640 		si_enable_hdp_ls(rdev, enable);
5641 	}
5642 }
5643 
si_init_cg(struct radeon_device * rdev)5644 static void si_init_cg(struct radeon_device *rdev)
5645 {
5646 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5647 			    RADEON_CG_BLOCK_MC |
5648 			    RADEON_CG_BLOCK_SDMA |
5649 			    RADEON_CG_BLOCK_BIF |
5650 			    RADEON_CG_BLOCK_HDP), true);
5651 	if (rdev->has_uvd) {
5652 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5653 		si_init_uvd_internal_cg(rdev);
5654 	}
5655 }
5656 
si_fini_cg(struct radeon_device * rdev)5657 static void si_fini_cg(struct radeon_device *rdev)
5658 {
5659 	if (rdev->has_uvd) {
5660 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5661 	}
5662 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5663 			    RADEON_CG_BLOCK_MC |
5664 			    RADEON_CG_BLOCK_SDMA |
5665 			    RADEON_CG_BLOCK_BIF |
5666 			    RADEON_CG_BLOCK_HDP), false);
5667 }
5668 
si_get_csb_size(struct radeon_device * rdev)5669 u32 si_get_csb_size(struct radeon_device *rdev)
5670 {
5671 	u32 count = 0;
5672 	const struct cs_section_def *sect = NULL;
5673 	const struct cs_extent_def *ext = NULL;
5674 
5675 	if (rdev->rlc.cs_data == NULL)
5676 		return 0;
5677 
5678 	/* begin clear state */
5679 	count += 2;
5680 	/* context control state */
5681 	count += 3;
5682 
5683 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5684 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5685 			if (sect->id == SECT_CONTEXT)
5686 				count += 2 + ext->reg_count;
5687 			else
5688 				return 0;
5689 		}
5690 	}
5691 	/* pa_sc_raster_config */
5692 	count += 3;
5693 	/* end clear state */
5694 	count += 2;
5695 	/* clear state */
5696 	count += 2;
5697 
5698 	return count;
5699 }
5700 
si_get_csb_buffer(struct radeon_device * rdev,volatile u32 * buffer)5701 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5702 {
5703 	u32 count = 0, i;
5704 	const struct cs_section_def *sect = NULL;
5705 	const struct cs_extent_def *ext = NULL;
5706 
5707 	if (rdev->rlc.cs_data == NULL)
5708 		return;
5709 	if (buffer == NULL)
5710 		return;
5711 
5712 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5713 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5714 
5715 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5716 	buffer[count++] = cpu_to_le32(0x80000000);
5717 	buffer[count++] = cpu_to_le32(0x80000000);
5718 
5719 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5720 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5721 			if (sect->id == SECT_CONTEXT) {
5722 				buffer[count++] =
5723 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5724 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5725 				for (i = 0; i < ext->reg_count; i++)
5726 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5727 			} else {
5728 				return;
5729 			}
5730 		}
5731 	}
5732 
5733 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5734 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5735 	switch (rdev->family) {
5736 	case CHIP_TAHITI:
5737 	case CHIP_PITCAIRN:
5738 		buffer[count++] = cpu_to_le32(0x2a00126a);
5739 		break;
5740 	case CHIP_VERDE:
5741 		buffer[count++] = cpu_to_le32(0x0000124a);
5742 		break;
5743 	case CHIP_OLAND:
5744 		buffer[count++] = cpu_to_le32(0x00000082);
5745 		break;
5746 	case CHIP_HAINAN:
5747 		buffer[count++] = cpu_to_le32(0x00000000);
5748 		break;
5749 	default:
5750 		buffer[count++] = cpu_to_le32(0x00000000);
5751 		break;
5752 	}
5753 
5754 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5755 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5756 
5757 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5758 	buffer[count++] = cpu_to_le32(0);
5759 }
5760 
si_init_pg(struct radeon_device * rdev)5761 static void si_init_pg(struct radeon_device *rdev)
5762 {
5763 	if (rdev->pg_flags) {
5764 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5765 			si_init_dma_pg(rdev);
5766 		}
5767 		si_init_ao_cu_mask(rdev);
5768 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5769 			si_init_gfx_cgpg(rdev);
5770 		} else {
5771 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5772 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5773 		}
5774 		si_enable_dma_pg(rdev, true);
5775 		si_enable_gfx_cgpg(rdev, true);
5776 	} else {
5777 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5778 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5779 	}
5780 }
5781 
si_fini_pg(struct radeon_device * rdev)5782 static void si_fini_pg(struct radeon_device *rdev)
5783 {
5784 	if (rdev->pg_flags) {
5785 		si_enable_dma_pg(rdev, false);
5786 		si_enable_gfx_cgpg(rdev, false);
5787 	}
5788 }
5789 
5790 /*
5791  * RLC
5792  */
si_rlc_reset(struct radeon_device * rdev)5793 void si_rlc_reset(struct radeon_device *rdev)
5794 {
5795 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5796 
5797 	tmp |= SOFT_RESET_RLC;
5798 	WREG32(GRBM_SOFT_RESET, tmp);
5799 	udelay(50);
5800 	tmp &= ~SOFT_RESET_RLC;
5801 	WREG32(GRBM_SOFT_RESET, tmp);
5802 	udelay(50);
5803 }
5804 
si_rlc_stop(struct radeon_device * rdev)5805 static void si_rlc_stop(struct radeon_device *rdev)
5806 {
5807 	WREG32(RLC_CNTL, 0);
5808 
5809 	si_enable_gui_idle_interrupt(rdev, false);
5810 
5811 	si_wait_for_rlc_serdes(rdev);
5812 }
5813 
si_rlc_start(struct radeon_device * rdev)5814 static void si_rlc_start(struct radeon_device *rdev)
5815 {
5816 	WREG32(RLC_CNTL, RLC_ENABLE);
5817 
5818 	si_enable_gui_idle_interrupt(rdev, true);
5819 
5820 	udelay(50);
5821 }
5822 
si_lbpw_supported(struct radeon_device * rdev)5823 static bool si_lbpw_supported(struct radeon_device *rdev)
5824 {
5825 	u32 tmp;
5826 
5827 	/* Enable LBPW only for DDR3 */
5828 	tmp = RREG32(MC_SEQ_MISC0);
5829 	if ((tmp & 0xF0000000) == 0xB0000000)
5830 		return true;
5831 	return false;
5832 }
5833 
si_enable_lbpw(struct radeon_device * rdev,bool enable)5834 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5835 {
5836 	u32 tmp;
5837 
5838 	tmp = RREG32(RLC_LB_CNTL);
5839 	if (enable)
5840 		tmp |= LOAD_BALANCE_ENABLE;
5841 	else
5842 		tmp &= ~LOAD_BALANCE_ENABLE;
5843 	WREG32(RLC_LB_CNTL, tmp);
5844 
5845 	if (!enable) {
5846 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5847 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5848 	}
5849 }
5850 
si_rlc_resume(struct radeon_device * rdev)5851 static int si_rlc_resume(struct radeon_device *rdev)
5852 {
5853 	u32 i;
5854 
5855 	if (!rdev->rlc_fw)
5856 		return -EINVAL;
5857 
5858 	si_rlc_stop(rdev);
5859 
5860 	si_rlc_reset(rdev);
5861 
5862 	si_init_pg(rdev);
5863 
5864 	si_init_cg(rdev);
5865 
5866 	WREG32(RLC_RL_BASE, 0);
5867 	WREG32(RLC_RL_SIZE, 0);
5868 	WREG32(RLC_LB_CNTL, 0);
5869 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5870 	WREG32(RLC_LB_CNTR_INIT, 0);
5871 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5872 
5873 	WREG32(RLC_MC_CNTL, 0);
5874 	WREG32(RLC_UCODE_CNTL, 0);
5875 
5876 	if (rdev->new_fw) {
5877 		const struct rlc_firmware_header_v1_0 *hdr =
5878 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5879 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5880 		const __le32 *fw_data = (const __le32 *)
5881 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5882 
5883 		radeon_ucode_print_rlc_hdr(&hdr->header);
5884 
5885 		for (i = 0; i < fw_size; i++) {
5886 			WREG32(RLC_UCODE_ADDR, i);
5887 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5888 		}
5889 	} else {
5890 		const __be32 *fw_data =
5891 			(const __be32 *)rdev->rlc_fw->data;
5892 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5893 			WREG32(RLC_UCODE_ADDR, i);
5894 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5895 		}
5896 	}
5897 	WREG32(RLC_UCODE_ADDR, 0);
5898 
5899 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5900 
5901 	si_rlc_start(rdev);
5902 
5903 	return 0;
5904 }
5905 
si_enable_interrupts(struct radeon_device * rdev)5906 static void si_enable_interrupts(struct radeon_device *rdev)
5907 {
5908 	u32 ih_cntl = RREG32(IH_CNTL);
5909 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5910 
5911 	ih_cntl |= ENABLE_INTR;
5912 	ih_rb_cntl |= IH_RB_ENABLE;
5913 	WREG32(IH_CNTL, ih_cntl);
5914 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5915 	rdev->ih.enabled = true;
5916 }
5917 
si_disable_interrupts(struct radeon_device * rdev)5918 static void si_disable_interrupts(struct radeon_device *rdev)
5919 {
5920 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5921 	u32 ih_cntl = RREG32(IH_CNTL);
5922 
5923 	ih_rb_cntl &= ~IH_RB_ENABLE;
5924 	ih_cntl &= ~ENABLE_INTR;
5925 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5926 	WREG32(IH_CNTL, ih_cntl);
5927 	/* set rptr, wptr to 0 */
5928 	WREG32(IH_RB_RPTR, 0);
5929 	WREG32(IH_RB_WPTR, 0);
5930 	rdev->ih.enabled = false;
5931 	rdev->ih.rptr = 0;
5932 }
5933 
si_disable_interrupt_state(struct radeon_device * rdev)5934 static void si_disable_interrupt_state(struct radeon_device *rdev)
5935 {
5936 	int i;
5937 	u32 tmp;
5938 
5939 	tmp = RREG32(CP_INT_CNTL_RING0) &
5940 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5941 	WREG32(CP_INT_CNTL_RING0, tmp);
5942 	WREG32(CP_INT_CNTL_RING1, 0);
5943 	WREG32(CP_INT_CNTL_RING2, 0);
5944 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5945 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5946 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5947 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5948 	WREG32(GRBM_INT_CNTL, 0);
5949 	WREG32(SRBM_INT_CNTL, 0);
5950 	for (i = 0; i < rdev->num_crtc; i++)
5951 		WREG32(INT_MASK + crtc_offsets[i], 0);
5952 	for (i = 0; i < rdev->num_crtc; i++)
5953 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5954 
5955 	if (!ASIC_IS_NODCE(rdev)) {
5956 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5957 
5958 		for (i = 0; i < 6; i++)
5959 			WREG32_AND(DC_HPDx_INT_CONTROL(i),
5960 				   DC_HPDx_INT_POLARITY);
5961 	}
5962 }
5963 
si_irq_init(struct radeon_device * rdev)5964 static int si_irq_init(struct radeon_device *rdev)
5965 {
5966 	int ret = 0;
5967 	int rb_bufsz;
5968 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5969 
5970 	/* allocate ring */
5971 	ret = r600_ih_ring_alloc(rdev);
5972 	if (ret)
5973 		return ret;
5974 
5975 	/* disable irqs */
5976 	si_disable_interrupts(rdev);
5977 
5978 	/* init rlc */
5979 	ret = si_rlc_resume(rdev);
5980 	if (ret) {
5981 		r600_ih_ring_fini(rdev);
5982 		return ret;
5983 	}
5984 
5985 	/* setup interrupt control */
5986 	/* set dummy read address to ring address */
5987 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5988 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5989 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5990 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5991 	 */
5992 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5993 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5994 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5995 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5996 
5997 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5998 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5999 
6000 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6001 		      IH_WPTR_OVERFLOW_CLEAR |
6002 		      (rb_bufsz << 1));
6003 
6004 	if (rdev->wb.enabled)
6005 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6006 
6007 	/* set the writeback address whether it's enabled or not */
6008 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6009 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6010 
6011 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6012 
6013 	/* set rptr, wptr to 0 */
6014 	WREG32(IH_RB_RPTR, 0);
6015 	WREG32(IH_RB_WPTR, 0);
6016 
6017 	/* Default settings for IH_CNTL (disabled at first) */
6018 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6019 	/* RPTR_REARM only works if msi's are enabled */
6020 	if (rdev->msi_enabled)
6021 		ih_cntl |= RPTR_REARM;
6022 	WREG32(IH_CNTL, ih_cntl);
6023 
6024 	/* force the active interrupt state to all disabled */
6025 	si_disable_interrupt_state(rdev);
6026 
6027 	pci_set_master(rdev->pdev);
6028 
6029 	/* enable irqs */
6030 	si_enable_interrupts(rdev);
6031 
6032 	return ret;
6033 }
6034 
6035 /* The order we write back each register here is important */
si_irq_set(struct radeon_device * rdev)6036 int si_irq_set(struct radeon_device *rdev)
6037 {
6038 	int i;
6039 	u32 cp_int_cntl;
6040 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6041 	u32 grbm_int_cntl = 0;
6042 	u32 dma_cntl, dma_cntl1;
6043 	u32 thermal_int = 0;
6044 
6045 	if (!rdev->irq.installed) {
6046 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6047 		return -EINVAL;
6048 	}
6049 	/* don't enable anything if the ih is disabled */
6050 	if (!rdev->ih.enabled) {
6051 		si_disable_interrupts(rdev);
6052 		/* force the active interrupt state to all disabled */
6053 		si_disable_interrupt_state(rdev);
6054 		return 0;
6055 	}
6056 
6057 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6058 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6059 
6060 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6061 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6062 
6063 	thermal_int = RREG32(CG_THERMAL_INT) &
6064 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6065 
6066 	/* enable CP interrupts on all rings */
6067 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6068 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6069 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6070 	}
6071 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6072 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6073 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6074 	}
6075 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6076 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6077 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6078 	}
6079 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6080 		DRM_DEBUG("si_irq_set: sw int dma\n");
6081 		dma_cntl |= TRAP_ENABLE;
6082 	}
6083 
6084 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6085 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6086 		dma_cntl1 |= TRAP_ENABLE;
6087 	}
6088 
6089 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6090 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6091 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6092 
6093 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6094 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6095 
6096 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6097 
6098 	if (rdev->irq.dpm_thermal) {
6099 		DRM_DEBUG("dpm thermal\n");
6100 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6101 	}
6102 
6103 	for (i = 0; i < rdev->num_crtc; i++) {
6104 		radeon_irq_kms_set_irq_n_enabled(
6105 		    rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6106 		    rdev->irq.crtc_vblank_int[i] ||
6107 		    atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6108 	}
6109 
6110 	for (i = 0; i < rdev->num_crtc; i++)
6111 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6112 
6113 	if (!ASIC_IS_NODCE(rdev)) {
6114 		for (i = 0; i < 6; i++) {
6115 			radeon_irq_kms_set_irq_n_enabled(
6116 			    rdev, DC_HPDx_INT_CONTROL(i),
6117 			    DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6118 			    rdev->irq.hpd[i], "HPD", i);
6119 		}
6120 	}
6121 
6122 	WREG32(CG_THERMAL_INT, thermal_int);
6123 
6124 	/* posting read */
6125 	RREG32(SRBM_STATUS);
6126 
6127 	return 0;
6128 }
6129 
6130 /* The order we write back each register here is important */
si_irq_ack(struct radeon_device * rdev)6131 static inline void si_irq_ack(struct radeon_device *rdev)
6132 {
6133 	int i, j;
6134 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6135 	u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6136 
6137 	if (ASIC_IS_NODCE(rdev))
6138 		return;
6139 
6140 	for (i = 0; i < 6; i++) {
6141 		disp_int[i] = RREG32(si_disp_int_status[i]);
6142 		if (i < rdev->num_crtc)
6143 			grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6144 	}
6145 
6146 	/* We write back each interrupt register in pairs of two */
6147 	for (i = 0; i < rdev->num_crtc; i += 2) {
6148 		for (j = i; j < (i + 2); j++) {
6149 			if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6150 				WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6151 				       GRPH_PFLIP_INT_CLEAR);
6152 		}
6153 
6154 		for (j = i; j < (i + 2); j++) {
6155 			if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6156 				WREG32(VBLANK_STATUS + crtc_offsets[j],
6157 				       VBLANK_ACK);
6158 			if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6159 				WREG32(VLINE_STATUS + crtc_offsets[j],
6160 				       VLINE_ACK);
6161 		}
6162 	}
6163 
6164 	for (i = 0; i < 6; i++) {
6165 		if (disp_int[i] & DC_HPD1_INTERRUPT)
6166 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6167 	}
6168 
6169 	for (i = 0; i < 6; i++) {
6170 		if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6171 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6172 	}
6173 }
6174 
si_irq_disable(struct radeon_device * rdev)6175 static void si_irq_disable(struct radeon_device *rdev)
6176 {
6177 	si_disable_interrupts(rdev);
6178 	/* Wait and acknowledge irq */
6179 	mdelay(1);
6180 	si_irq_ack(rdev);
6181 	si_disable_interrupt_state(rdev);
6182 }
6183 
si_irq_suspend(struct radeon_device * rdev)6184 static void si_irq_suspend(struct radeon_device *rdev)
6185 {
6186 	si_irq_disable(rdev);
6187 	si_rlc_stop(rdev);
6188 }
6189 
si_irq_fini(struct radeon_device * rdev)6190 static void si_irq_fini(struct radeon_device *rdev)
6191 {
6192 	si_irq_suspend(rdev);
6193 	r600_ih_ring_fini(rdev);
6194 }
6195 
si_get_ih_wptr(struct radeon_device * rdev)6196 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6197 {
6198 	u32 wptr, tmp;
6199 
6200 	if (rdev->wb.enabled)
6201 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6202 	else
6203 		wptr = RREG32(IH_RB_WPTR);
6204 
6205 	if (wptr & RB_OVERFLOW) {
6206 		wptr &= ~RB_OVERFLOW;
6207 		/* When a ring buffer overflow happen start parsing interrupt
6208 		 * from the last not overwritten vector (wptr + 16). Hopefully
6209 		 * this should allow us to catchup.
6210 		 */
6211 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6212 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6213 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6214 		tmp = RREG32(IH_RB_CNTL);
6215 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6216 		WREG32(IH_RB_CNTL, tmp);
6217 	}
6218 	return (wptr & rdev->ih.ptr_mask);
6219 }
6220 
6221 /*        SI IV Ring
6222  * Each IV ring entry is 128 bits:
6223  * [7:0]    - interrupt source id
6224  * [31:8]   - reserved
6225  * [59:32]  - interrupt source data
6226  * [63:60]  - reserved
6227  * [71:64]  - RINGID
6228  * [79:72]  - VMID
6229  * [127:80] - reserved
6230  */
si_irq_process(struct radeon_device * rdev)6231 irqreturn_t si_irq_process(struct radeon_device *rdev)
6232 {
6233 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6234 	u32 crtc_idx, hpd_idx;
6235 	u32 mask;
6236 	u32 wptr;
6237 	u32 rptr;
6238 	u32 src_id, src_data, ring_id;
6239 	u32 ring_index;
6240 	bool queue_hotplug = false;
6241 	bool queue_dp = false;
6242 	bool queue_thermal = false;
6243 	u32 status, addr;
6244 	const char *event_name;
6245 
6246 	if (!rdev->ih.enabled || rdev->shutdown)
6247 		return IRQ_NONE;
6248 
6249 	wptr = si_get_ih_wptr(rdev);
6250 
6251 restart_ih:
6252 	/* is somebody else already processing irqs? */
6253 	if (atomic_xchg(&rdev->ih.lock, 1))
6254 		return IRQ_NONE;
6255 
6256 	rptr = rdev->ih.rptr;
6257 	DRM_DEBUG_VBLANK("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6258 
6259 	/* Order reading of wptr vs. reading of IH ring data */
6260 	rmb();
6261 
6262 	/* display interrupts */
6263 	si_irq_ack(rdev);
6264 
6265 	while (rptr != wptr) {
6266 		/* wptr/rptr are in bytes! */
6267 		ring_index = rptr / 4;
6268 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6269 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6270 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6271 
6272 		switch (src_id) {
6273 		case 1: /* D1 vblank/vline */
6274 		case 2: /* D2 vblank/vline */
6275 		case 3: /* D3 vblank/vline */
6276 		case 4: /* D4 vblank/vline */
6277 		case 5: /* D5 vblank/vline */
6278 		case 6: /* D6 vblank/vline */
6279 			crtc_idx = src_id - 1;
6280 
6281 			if (src_data == 0) { /* vblank */
6282 				mask = LB_D1_VBLANK_INTERRUPT;
6283 				event_name = "vblank";
6284 
6285 				if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6286 					drm_handle_vblank(rdev->ddev, crtc_idx);
6287 					rdev->pm.vblank_sync = true;
6288 					wake_up(&rdev->irq.vblank_queue);
6289 				}
6290 				if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6291 					radeon_crtc_handle_vblank(rdev,
6292 								  crtc_idx);
6293 				}
6294 
6295 			} else if (src_data == 1) { /* vline */
6296 				mask = LB_D1_VLINE_INTERRUPT;
6297 				event_name = "vline";
6298 			} else {
6299 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6300 					  src_id, src_data);
6301 				break;
6302 			}
6303 
6304 			if (!(disp_int[crtc_idx] & mask)) {
6305 				DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6306 					  crtc_idx + 1, event_name);
6307 			}
6308 
6309 			disp_int[crtc_idx] &= ~mask;
6310 			DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6311 
6312 			break;
6313 		case 8: /* D1 page flip */
6314 		case 10: /* D2 page flip */
6315 		case 12: /* D3 page flip */
6316 		case 14: /* D4 page flip */
6317 		case 16: /* D5 page flip */
6318 		case 18: /* D6 page flip */
6319 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6320 			if (radeon_use_pflipirq > 0)
6321 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6322 			break;
6323 		case 42: /* HPD hotplug */
6324 			if (src_data <= 5) {
6325 				hpd_idx = src_data;
6326 				mask = DC_HPD1_INTERRUPT;
6327 				queue_hotplug = true;
6328 				event_name = "HPD";
6329 
6330 			} else if (src_data <= 11) {
6331 				hpd_idx = src_data - 6;
6332 				mask = DC_HPD1_RX_INTERRUPT;
6333 				queue_dp = true;
6334 				event_name = "HPD_RX";
6335 
6336 			} else {
6337 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6338 					  src_id, src_data);
6339 				break;
6340 			}
6341 
6342 			if (!(disp_int[hpd_idx] & mask))
6343 				DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6344 
6345 			disp_int[hpd_idx] &= ~mask;
6346 			DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6347 			break;
6348 		case 96:
6349 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6350 			WREG32(SRBM_INT_ACK, 0x1);
6351 			break;
6352 		case 124: /* UVD */
6353 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6354 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6355 			break;
6356 		case 146:
6357 		case 147:
6358 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6359 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6360 			/* reset addr and status */
6361 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6362 			if (addr == 0x0 && status == 0x0)
6363 				break;
6364 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6365 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6366 				addr);
6367 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6368 				status);
6369 			si_vm_decode_fault(rdev, status, addr);
6370 			break;
6371 		case 176: /* RINGID0 CP_INT */
6372 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6373 			break;
6374 		case 177: /* RINGID1 CP_INT */
6375 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6376 			break;
6377 		case 178: /* RINGID2 CP_INT */
6378 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6379 			break;
6380 		case 181: /* CP EOP event */
6381 			DRM_DEBUG("IH: CP EOP\n");
6382 			switch (ring_id) {
6383 			case 0:
6384 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6385 				break;
6386 			case 1:
6387 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6388 				break;
6389 			case 2:
6390 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6391 				break;
6392 			}
6393 			break;
6394 		case 224: /* DMA trap event */
6395 			DRM_DEBUG("IH: DMA trap\n");
6396 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6397 			break;
6398 		case 230: /* thermal low to high */
6399 			DRM_DEBUG("IH: thermal low to high\n");
6400 			rdev->pm.dpm.thermal.high_to_low = false;
6401 			queue_thermal = true;
6402 			break;
6403 		case 231: /* thermal high to low */
6404 			DRM_DEBUG("IH: thermal high to low\n");
6405 			rdev->pm.dpm.thermal.high_to_low = true;
6406 			queue_thermal = true;
6407 			break;
6408 		case 233: /* GUI IDLE */
6409 			DRM_DEBUG("IH: GUI idle\n");
6410 			break;
6411 		case 244: /* DMA trap event */
6412 			DRM_DEBUG("IH: DMA1 trap\n");
6413 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6414 			break;
6415 		default:
6416 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6417 			break;
6418 		}
6419 
6420 		/* wptr/rptr are in bytes! */
6421 		rptr += 16;
6422 		rptr &= rdev->ih.ptr_mask;
6423 		WREG32(IH_RB_RPTR, rptr);
6424 	}
6425 	if (queue_dp)
6426 		schedule_work(&rdev->dp_work);
6427 	if (queue_hotplug)
6428 		schedule_delayed_work(&rdev->hotplug_work, 0);
6429 	if (queue_thermal && rdev->pm.dpm_enabled)
6430 		schedule_work(&rdev->pm.dpm.thermal.work);
6431 	rdev->ih.rptr = rptr;
6432 	atomic_set(&rdev->ih.lock, 0);
6433 
6434 	/* make sure wptr hasn't changed while processing */
6435 	wptr = si_get_ih_wptr(rdev);
6436 	if (wptr != rptr)
6437 		goto restart_ih;
6438 
6439 	return IRQ_HANDLED;
6440 }
6441 
6442 /*
6443  * startup/shutdown callbacks
6444  */
si_uvd_init(struct radeon_device * rdev)6445 static void si_uvd_init(struct radeon_device *rdev)
6446 {
6447 	int r;
6448 
6449 	if (!rdev->has_uvd)
6450 		return;
6451 
6452 	r = radeon_uvd_init(rdev);
6453 	if (r) {
6454 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6455 		/*
6456 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6457 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6458 		 * there. So it is pointless to try to go through that code
6459 		 * hence why we disable uvd here.
6460 		 */
6461 		rdev->has_uvd = 0;
6462 		return;
6463 	}
6464 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6465 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6466 }
6467 
si_uvd_start(struct radeon_device * rdev)6468 static void si_uvd_start(struct radeon_device *rdev)
6469 {
6470 	int r;
6471 
6472 	if (!rdev->has_uvd)
6473 		return;
6474 
6475 	r = uvd_v2_2_resume(rdev);
6476 	if (r) {
6477 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6478 		goto error;
6479 	}
6480 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6481 	if (r) {
6482 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6483 		goto error;
6484 	}
6485 	return;
6486 
6487 error:
6488 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6489 }
6490 
si_uvd_resume(struct radeon_device * rdev)6491 static void si_uvd_resume(struct radeon_device *rdev)
6492 {
6493 	struct radeon_ring *ring;
6494 	int r;
6495 
6496 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6497 		return;
6498 
6499 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6500 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6501 	if (r) {
6502 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6503 		return;
6504 	}
6505 	r = uvd_v1_0_init(rdev);
6506 	if (r) {
6507 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6508 		return;
6509 	}
6510 }
6511 
si_vce_init(struct radeon_device * rdev)6512 static void si_vce_init(struct radeon_device *rdev)
6513 {
6514 	int r;
6515 
6516 	if (!rdev->has_vce)
6517 		return;
6518 
6519 	r = radeon_vce_init(rdev);
6520 	if (r) {
6521 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6522 		/*
6523 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6524 		 * to early fails si_vce_start() and thus nothing happens
6525 		 * there. So it is pointless to try to go through that code
6526 		 * hence why we disable vce here.
6527 		 */
6528 		rdev->has_vce = 0;
6529 		return;
6530 	}
6531 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6532 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6533 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6534 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6535 }
6536 
si_vce_start(struct radeon_device * rdev)6537 static void si_vce_start(struct radeon_device *rdev)
6538 {
6539 	int r;
6540 
6541 	if (!rdev->has_vce)
6542 		return;
6543 
6544 	r = radeon_vce_resume(rdev);
6545 	if (r) {
6546 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6547 		goto error;
6548 	}
6549 	r = vce_v1_0_resume(rdev);
6550 	if (r) {
6551 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6552 		goto error;
6553 	}
6554 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6555 	if (r) {
6556 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6557 		goto error;
6558 	}
6559 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6560 	if (r) {
6561 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6562 		goto error;
6563 	}
6564 	return;
6565 
6566 error:
6567 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6568 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6569 }
6570 
si_vce_resume(struct radeon_device * rdev)6571 static void si_vce_resume(struct radeon_device *rdev)
6572 {
6573 	struct radeon_ring *ring;
6574 	int r;
6575 
6576 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6577 		return;
6578 
6579 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6580 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6581 	if (r) {
6582 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6583 		return;
6584 	}
6585 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6586 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6587 	if (r) {
6588 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6589 		return;
6590 	}
6591 	r = vce_v1_0_init(rdev);
6592 	if (r) {
6593 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6594 		return;
6595 	}
6596 }
6597 
si_startup(struct radeon_device * rdev)6598 static int si_startup(struct radeon_device *rdev)
6599 {
6600 	struct radeon_ring *ring;
6601 	int r;
6602 
6603 	/* enable pcie gen2/3 link */
6604 	si_pcie_gen3_enable(rdev);
6605 	/* enable aspm */
6606 	si_program_aspm(rdev);
6607 
6608 	/* scratch needs to be initialized before MC */
6609 	r = r600_vram_scratch_init(rdev);
6610 	if (r)
6611 		return r;
6612 
6613 	si_mc_program(rdev);
6614 
6615 	if (!rdev->pm.dpm_enabled) {
6616 		r = si_mc_load_microcode(rdev);
6617 		if (r) {
6618 			DRM_ERROR("Failed to load MC firmware!\n");
6619 			return r;
6620 		}
6621 	}
6622 
6623 	r = si_pcie_gart_enable(rdev);
6624 	if (r)
6625 		return r;
6626 	si_gpu_init(rdev);
6627 
6628 	/* allocate rlc buffers */
6629 	if (rdev->family == CHIP_VERDE) {
6630 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6631 		rdev->rlc.reg_list_size =
6632 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6633 	}
6634 	rdev->rlc.cs_data = si_cs_data;
6635 	r = sumo_rlc_init(rdev);
6636 	if (r) {
6637 		DRM_ERROR("Failed to init rlc BOs!\n");
6638 		return r;
6639 	}
6640 
6641 	/* allocate wb buffer */
6642 	r = radeon_wb_init(rdev);
6643 	if (r)
6644 		return r;
6645 
6646 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6647 	if (r) {
6648 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6649 		return r;
6650 	}
6651 
6652 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6653 	if (r) {
6654 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6655 		return r;
6656 	}
6657 
6658 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6659 	if (r) {
6660 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6661 		return r;
6662 	}
6663 
6664 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6665 	if (r) {
6666 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6667 		return r;
6668 	}
6669 
6670 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6671 	if (r) {
6672 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6673 		return r;
6674 	}
6675 
6676 	si_uvd_start(rdev);
6677 	si_vce_start(rdev);
6678 
6679 	/* Enable IRQ */
6680 	if (!rdev->irq.installed) {
6681 		r = radeon_irq_kms_init(rdev);
6682 		if (r)
6683 			return r;
6684 	}
6685 
6686 	r = si_irq_init(rdev);
6687 	if (r) {
6688 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6689 		radeon_irq_kms_fini(rdev);
6690 		return r;
6691 	}
6692 	si_irq_set(rdev);
6693 
6694 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6695 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6696 			     RADEON_CP_PACKET2);
6697 	if (r)
6698 		return r;
6699 
6700 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6701 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6702 			     RADEON_CP_PACKET2);
6703 	if (r)
6704 		return r;
6705 
6706 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6707 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6708 			     RADEON_CP_PACKET2);
6709 	if (r)
6710 		return r;
6711 
6712 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6713 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6714 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6715 	if (r)
6716 		return r;
6717 
6718 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6719 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6720 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6721 	if (r)
6722 		return r;
6723 
6724 	r = si_cp_load_microcode(rdev);
6725 	if (r)
6726 		return r;
6727 	r = si_cp_resume(rdev);
6728 	if (r)
6729 		return r;
6730 
6731 	r = cayman_dma_resume(rdev);
6732 	if (r)
6733 		return r;
6734 
6735 	si_uvd_resume(rdev);
6736 	si_vce_resume(rdev);
6737 
6738 	r = radeon_ib_pool_init(rdev);
6739 	if (r) {
6740 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6741 		return r;
6742 	}
6743 
6744 	r = radeon_vm_manager_init(rdev);
6745 	if (r) {
6746 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6747 		return r;
6748 	}
6749 
6750 	r = radeon_audio_init(rdev);
6751 	if (r)
6752 		return r;
6753 
6754 	return 0;
6755 }
6756 
si_resume(struct radeon_device * rdev)6757 int si_resume(struct radeon_device *rdev)
6758 {
6759 	int r;
6760 
6761 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6762 	 * posting will perform necessary task to bring back GPU into good
6763 	 * shape.
6764 	 */
6765 	/* post card */
6766 	atom_asic_init(rdev->mode_info.atom_context);
6767 
6768 	/* init golden registers */
6769 	si_init_golden_registers(rdev);
6770 
6771 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6772 		radeon_pm_resume(rdev);
6773 
6774 	rdev->accel_working = true;
6775 	r = si_startup(rdev);
6776 	if (r) {
6777 		DRM_ERROR("si startup failed on resume\n");
6778 		rdev->accel_working = false;
6779 		return r;
6780 	}
6781 
6782 	return r;
6783 
6784 }
6785 
si_suspend(struct radeon_device * rdev)6786 int si_suspend(struct radeon_device *rdev)
6787 {
6788 	radeon_pm_suspend(rdev);
6789 	radeon_audio_fini(rdev);
6790 	radeon_vm_manager_fini(rdev);
6791 	si_cp_enable(rdev, false);
6792 	cayman_dma_stop(rdev);
6793 	if (rdev->has_uvd) {
6794 		uvd_v1_0_fini(rdev);
6795 		radeon_uvd_suspend(rdev);
6796 	}
6797 	if (rdev->has_vce)
6798 		radeon_vce_suspend(rdev);
6799 	si_fini_pg(rdev);
6800 	si_fini_cg(rdev);
6801 	si_irq_suspend(rdev);
6802 	radeon_wb_disable(rdev);
6803 	si_pcie_gart_disable(rdev);
6804 	return 0;
6805 }
6806 
6807 /* Plan is to move initialization in that function and use
6808  * helper function so that radeon_device_init pretty much
6809  * do nothing more than calling asic specific function. This
6810  * should also allow to remove a bunch of callback function
6811  * like vram_info.
6812  */
si_init(struct radeon_device * rdev)6813 int si_init(struct radeon_device *rdev)
6814 {
6815 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6816 	int r;
6817 
6818 	/* Read BIOS */
6819 	if (!radeon_get_bios(rdev)) {
6820 		if (ASIC_IS_AVIVO(rdev))
6821 			return -EINVAL;
6822 	}
6823 	/* Must be an ATOMBIOS */
6824 	if (!rdev->is_atom_bios) {
6825 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6826 		return -EINVAL;
6827 	}
6828 	r = radeon_atombios_init(rdev);
6829 	if (r)
6830 		return r;
6831 
6832 	/* Post card if necessary */
6833 	if (!radeon_card_posted(rdev)) {
6834 		if (!rdev->bios) {
6835 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6836 			return -EINVAL;
6837 		}
6838 		DRM_INFO("GPU not posted. posting now...\n");
6839 		atom_asic_init(rdev->mode_info.atom_context);
6840 	}
6841 	/* init golden registers */
6842 	si_init_golden_registers(rdev);
6843 	/* Initialize scratch registers */
6844 	si_scratch_init(rdev);
6845 	/* Initialize surface registers */
6846 	radeon_surface_init(rdev);
6847 	/* Initialize clocks */
6848 	radeon_get_clock_info(rdev->ddev);
6849 
6850 	/* Fence driver */
6851 	r = radeon_fence_driver_init(rdev);
6852 	if (r)
6853 		return r;
6854 
6855 	/* initialize memory controller */
6856 	r = si_mc_init(rdev);
6857 	if (r)
6858 		return r;
6859 	/* Memory manager */
6860 	r = radeon_bo_init(rdev);
6861 	if (r)
6862 		return r;
6863 
6864 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6865 	    !rdev->rlc_fw || !rdev->mc_fw) {
6866 		r = si_init_microcode(rdev);
6867 		if (r) {
6868 			DRM_ERROR("Failed to load firmware!\n");
6869 			return r;
6870 		}
6871 	}
6872 
6873 	/* Initialize power management */
6874 	radeon_pm_init(rdev);
6875 
6876 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6877 	ring->ring_obj = NULL;
6878 	r600_ring_init(rdev, ring, 1024 * 1024);
6879 
6880 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6881 	ring->ring_obj = NULL;
6882 	r600_ring_init(rdev, ring, 1024 * 1024);
6883 
6884 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6885 	ring->ring_obj = NULL;
6886 	r600_ring_init(rdev, ring, 1024 * 1024);
6887 
6888 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6889 	ring->ring_obj = NULL;
6890 	r600_ring_init(rdev, ring, 64 * 1024);
6891 
6892 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6893 	ring->ring_obj = NULL;
6894 	r600_ring_init(rdev, ring, 64 * 1024);
6895 
6896 	si_uvd_init(rdev);
6897 	si_vce_init(rdev);
6898 
6899 	rdev->ih.ring_obj = NULL;
6900 	r600_ih_ring_init(rdev, 64 * 1024);
6901 
6902 	r = r600_pcie_gart_init(rdev);
6903 	if (r)
6904 		return r;
6905 
6906 #ifdef __DragonFly__
6907 	/*
6908 	   Some glx operations (xfce 4.14) hang on si hardware,
6909 	   tell userland acceleration is not working properly
6910 	*/
6911 	rdev->accel_working = false;
6912 	DRM_ERROR("GPU acceleration disabled for now on DragonFly\n");
6913 #else
6914 	rdev->accel_working = true;
6915 #endif
6916 	r = si_startup(rdev);
6917 	if (r) {
6918 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6919 		si_cp_fini(rdev);
6920 		cayman_dma_fini(rdev);
6921 		si_irq_fini(rdev);
6922 		sumo_rlc_fini(rdev);
6923 		radeon_wb_fini(rdev);
6924 		radeon_ib_pool_fini(rdev);
6925 		radeon_vm_manager_fini(rdev);
6926 		radeon_irq_kms_fini(rdev);
6927 		si_pcie_gart_fini(rdev);
6928 		rdev->accel_working = false;
6929 	}
6930 
6931 	/* Don't start up if the MC ucode is missing.
6932 	 * The default clocks and voltages before the MC ucode
6933 	 * is loaded are not suffient for advanced operations.
6934 	 */
6935 	if (!rdev->mc_fw) {
6936 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6937 		return -EINVAL;
6938 	}
6939 
6940 	return 0;
6941 }
6942 
si_fini(struct radeon_device * rdev)6943 void si_fini(struct radeon_device *rdev)
6944 {
6945 	radeon_pm_fini(rdev);
6946 	si_cp_fini(rdev);
6947 	cayman_dma_fini(rdev);
6948 	si_fini_pg(rdev);
6949 	si_fini_cg(rdev);
6950 	si_irq_fini(rdev);
6951 	sumo_rlc_fini(rdev);
6952 	radeon_wb_fini(rdev);
6953 	radeon_vm_manager_fini(rdev);
6954 	radeon_ib_pool_fini(rdev);
6955 	radeon_irq_kms_fini(rdev);
6956 	if (rdev->has_uvd) {
6957 		uvd_v1_0_fini(rdev);
6958 		radeon_uvd_fini(rdev);
6959 	}
6960 	if (rdev->has_vce)
6961 		radeon_vce_fini(rdev);
6962 	si_pcie_gart_fini(rdev);
6963 	r600_vram_scratch_fini(rdev);
6964 	radeon_gem_fini(rdev);
6965 	radeon_fence_driver_fini(rdev);
6966 	radeon_bo_fini(rdev);
6967 	radeon_atombios_fini(rdev);
6968 	kfree(rdev->bios);
6969 	rdev->bios = NULL;
6970 }
6971 
6972 /**
6973  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6974  *
6975  * @rdev: radeon_device pointer
6976  *
6977  * Fetches a GPU clock counter snapshot (SI).
6978  * Returns the 64 bit clock counter snapshot.
6979  */
si_get_gpu_clock_counter(struct radeon_device * rdev)6980 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6981 {
6982 	uint64_t clock;
6983 
6984 	mutex_lock(&rdev->gpu_clock_mutex);
6985 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6986 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6987 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6988 	mutex_unlock(&rdev->gpu_clock_mutex);
6989 	return clock;
6990 }
6991 
si_set_uvd_clocks(struct radeon_device * rdev,u32 vclk,u32 dclk)6992 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6993 {
6994 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6995 	int r;
6996 
6997 	/* bypass vclk and dclk with bclk */
6998 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6999 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7000 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7001 
7002 	/* put PLL in bypass mode */
7003 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7004 
7005 	if (!vclk || !dclk) {
7006 		/* keep the Bypass mode */
7007 		return 0;
7008 	}
7009 
7010 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7011 					  16384, 0x03FFFFFF, 0, 128, 5,
7012 					  &fb_div, &vclk_div, &dclk_div);
7013 	if (r)
7014 		return r;
7015 
7016 	/* set RESET_ANTI_MUX to 0 */
7017 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7018 
7019 	/* set VCO_MODE to 1 */
7020 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7021 
7022 	/* disable sleep mode */
7023 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7024 
7025 	/* deassert UPLL_RESET */
7026 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7027 
7028 	mdelay(1);
7029 
7030 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7031 	if (r)
7032 		return r;
7033 
7034 	/* assert UPLL_RESET again */
7035 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7036 
7037 	/* disable spread spectrum. */
7038 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7039 
7040 	/* set feedback divider */
7041 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7042 
7043 	/* set ref divider to 0 */
7044 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7045 
7046 	if (fb_div < 307200)
7047 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7048 	else
7049 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7050 
7051 	/* set PDIV_A and PDIV_B */
7052 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7053 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7054 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7055 
7056 	/* give the PLL some time to settle */
7057 	mdelay(15);
7058 
7059 	/* deassert PLL_RESET */
7060 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7061 
7062 	mdelay(15);
7063 
7064 	/* switch from bypass mode to normal mode */
7065 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7066 
7067 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7068 	if (r)
7069 		return r;
7070 
7071 	/* switch VCLK and DCLK selection */
7072 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7073 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7074 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7075 
7076 	mdelay(100);
7077 
7078 	return 0;
7079 }
7080 
si_pcie_gen3_enable(struct radeon_device * rdev)7081 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7082 {
7083 	struct pci_dev *root = rdev->pdev->bus->self;
7084 	int bridge_pos, gpu_pos;
7085 	u32 speed_cntl, mask, current_data_rate;
7086 	int ret, i;
7087 	u16 tmp16;
7088 
7089 #if 0
7090 	if (pci_is_root_bus(rdev->pdev->bus))
7091 		return;
7092 #endif
7093 
7094 	if (radeon_pcie_gen2 == 0)
7095 		return;
7096 
7097 	if (rdev->flags & RADEON_IS_IGP)
7098 		return;
7099 
7100 	if (!(rdev->flags & RADEON_IS_PCIE))
7101 		return;
7102 
7103 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7104 	if (ret != 0)
7105 		return;
7106 
7107 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7108 		return;
7109 
7110 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7111 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7112 		LC_CURRENT_DATA_RATE_SHIFT;
7113 	if (mask & DRM_PCIE_SPEED_80) {
7114 		if (current_data_rate == 2) {
7115 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7116 			return;
7117 		}
7118 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7119 	} else if (mask & DRM_PCIE_SPEED_50) {
7120 		if (current_data_rate == 1) {
7121 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7122 			return;
7123 		}
7124 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7125 	}
7126 
7127 	bridge_pos = pci_pcie_cap(root);
7128 	if (!bridge_pos)
7129 		return;
7130 
7131 	gpu_pos = pci_pcie_cap(rdev->pdev);
7132 	if (!gpu_pos)
7133 		return;
7134 
7135 	if (mask & DRM_PCIE_SPEED_80) {
7136 		/* re-try equalization if gen3 is not already enabled */
7137 		if (current_data_rate != 2) {
7138 			u16 bridge_cfg, gpu_cfg;
7139 			u16 bridge_cfg2, gpu_cfg2;
7140 			u32 max_lw, current_lw, tmp;
7141 
7142 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7143 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7144 
7145 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7146 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7147 
7148 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7149 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7150 
7151 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7152 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7153 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7154 
7155 			if (current_lw < max_lw) {
7156 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7157 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7158 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7159 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7160 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7161 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7162 				}
7163 			}
7164 
7165 			for (i = 0; i < 10; i++) {
7166 				/* check status */
7167 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7168 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7169 					break;
7170 
7171 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7172 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7173 
7174 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7175 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7176 
7177 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7178 				tmp |= LC_SET_QUIESCE;
7179 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7180 
7181 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7182 				tmp |= LC_REDO_EQ;
7183 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7184 
7185 				mdelay(100);
7186 
7187 				/* linkctl */
7188 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7189 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7190 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7191 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7192 
7193 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7194 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7195 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7196 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7197 
7198 				/* linkctl2 */
7199 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7200 				tmp16 &= ~((1 << 4) | (7 << 9));
7201 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7202 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7203 
7204 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7205 				tmp16 &= ~((1 << 4) | (7 << 9));
7206 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7207 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7208 
7209 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7210 				tmp &= ~LC_SET_QUIESCE;
7211 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7212 			}
7213 		}
7214 	}
7215 
7216 	/* set the link speed */
7217 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7218 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7219 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7220 
7221 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7222 	tmp16 &= ~0xf;
7223 	if (mask & DRM_PCIE_SPEED_80)
7224 		tmp16 |= 3; /* gen3 */
7225 	else if (mask & DRM_PCIE_SPEED_50)
7226 		tmp16 |= 2; /* gen2 */
7227 	else
7228 		tmp16 |= 1; /* gen1 */
7229 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7230 
7231 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7232 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7233 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7234 
7235 	for (i = 0; i < rdev->usec_timeout; i++) {
7236 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7237 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7238 			break;
7239 		udelay(1);
7240 	}
7241 }
7242 
si_program_aspm(struct radeon_device * rdev)7243 static void si_program_aspm(struct radeon_device *rdev)
7244 {
7245 	u32 data, orig;
7246 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7247 #if 0
7248 	bool disable_clkreq = false;
7249 #endif
7250 
7251 	if (radeon_aspm == 0)
7252 		return;
7253 
7254 	if (!(rdev->flags & RADEON_IS_PCIE))
7255 		return;
7256 
7257 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7258 	data &= ~LC_XMIT_N_FTS_MASK;
7259 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7260 	if (orig != data)
7261 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7262 
7263 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7264 	data |= LC_GO_TO_RECOVERY;
7265 	if (orig != data)
7266 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7267 
7268 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7269 	data |= P_IGNORE_EDB_ERR;
7270 	if (orig != data)
7271 		WREG32_PCIE(PCIE_P_CNTL, data);
7272 
7273 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7274 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7275 	data |= LC_PMI_TO_L1_DIS;
7276 	if (!disable_l0s)
7277 		data |= LC_L0S_INACTIVITY(7);
7278 
7279 	if (!disable_l1) {
7280 		data |= LC_L1_INACTIVITY(7);
7281 		data &= ~LC_PMI_TO_L1_DIS;
7282 		if (orig != data)
7283 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7284 
7285 		if (!disable_plloff_in_l1) {
7286 			bool clk_req_support;
7287 
7288 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7289 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7290 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7291 			if (orig != data)
7292 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7293 
7294 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7295 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7296 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7297 			if (orig != data)
7298 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7299 
7300 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7301 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7302 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7303 			if (orig != data)
7304 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7305 
7306 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7307 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7308 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7309 			if (orig != data)
7310 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7311 
7312 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7313 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7314 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7315 				if (orig != data)
7316 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7317 
7318 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7319 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7320 				if (orig != data)
7321 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7322 
7323 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7324 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7325 				if (orig != data)
7326 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7327 
7328 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7329 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7330 				if (orig != data)
7331 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7332 
7333 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7334 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7335 				if (orig != data)
7336 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7337 
7338 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7339 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7340 				if (orig != data)
7341 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7342 
7343 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7344 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7345 				if (orig != data)
7346 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7347 
7348 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7349 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7350 				if (orig != data)
7351 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7352 			}
7353 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7354 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7355 			data |= LC_DYN_LANES_PWR_STATE(3);
7356 			if (orig != data)
7357 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7358 
7359 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7360 			data &= ~LS2_EXIT_TIME_MASK;
7361 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7362 				data |= LS2_EXIT_TIME(5);
7363 			if (orig != data)
7364 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7365 
7366 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7367 			data &= ~LS2_EXIT_TIME_MASK;
7368 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7369 				data |= LS2_EXIT_TIME(5);
7370 			if (orig != data)
7371 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7372 
7373 #ifdef zMN_TODO
7374 			if (!disable_clkreq &&
7375 			    !pci_is_root_bus(rdev->pdev->bus)) {
7376 				struct pci_dev *root = rdev->pdev->bus->self;
7377 				u32 lnkcap;
7378 
7379 				clk_req_support = false;
7380 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7381 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7382 					clk_req_support = true;
7383 			} else {
7384 				clk_req_support = false;
7385 			}
7386 #else
7387 			clk_req_support = false;
7388 #endif
7389 
7390 			if (clk_req_support) {
7391 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7392 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7393 				if (orig != data)
7394 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7395 
7396 				orig = data = RREG32(THM_CLK_CNTL);
7397 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7398 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7399 				if (orig != data)
7400 					WREG32(THM_CLK_CNTL, data);
7401 
7402 				orig = data = RREG32(MISC_CLK_CNTL);
7403 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7404 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7405 				if (orig != data)
7406 					WREG32(MISC_CLK_CNTL, data);
7407 
7408 				orig = data = RREG32(CG_CLKPIN_CNTL);
7409 				data &= ~BCLK_AS_XCLK;
7410 				if (orig != data)
7411 					WREG32(CG_CLKPIN_CNTL, data);
7412 
7413 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7414 				data &= ~FORCE_BIF_REFCLK_EN;
7415 				if (orig != data)
7416 					WREG32(CG_CLKPIN_CNTL_2, data);
7417 
7418 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7419 				data &= ~MPLL_CLKOUT_SEL_MASK;
7420 				data |= MPLL_CLKOUT_SEL(4);
7421 				if (orig != data)
7422 					WREG32(MPLL_BYPASSCLK_SEL, data);
7423 
7424 				orig = data = RREG32(SPLL_CNTL_MODE);
7425 				data &= ~SPLL_REFCLK_SEL_MASK;
7426 				if (orig != data)
7427 					WREG32(SPLL_CNTL_MODE, data);
7428 			}
7429 		}
7430 	} else {
7431 		if (orig != data)
7432 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7433 	}
7434 
7435 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7436 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7437 	if (orig != data)
7438 		WREG32_PCIE(PCIE_CNTL2, data);
7439 
7440 	if (!disable_l0s) {
7441 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7442 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7443 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7444 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7445 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7446 				data &= ~LC_L0S_INACTIVITY_MASK;
7447 				if (orig != data)
7448 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7449 			}
7450 		}
7451 	}
7452 }
7453 
si_vce_send_vcepll_ctlreq(struct radeon_device * rdev)7454 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7455 {
7456 	unsigned i;
7457 
7458 	/* make sure VCEPLL_CTLREQ is deasserted */
7459 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7460 
7461 	mdelay(10);
7462 
7463 	/* assert UPLL_CTLREQ */
7464 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7465 
7466 	/* wait for CTLACK and CTLACK2 to get asserted */
7467 	for (i = 0; i < 100; ++i) {
7468 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7469 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7470 			break;
7471 		mdelay(10);
7472 	}
7473 
7474 	/* deassert UPLL_CTLREQ */
7475 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7476 
7477 	if (i == 100) {
7478 		DRM_ERROR("Timeout setting UVD clocks!\n");
7479 		return -ETIMEDOUT;
7480 	}
7481 
7482 	return 0;
7483 }
7484 
si_set_vce_clocks(struct radeon_device * rdev,u32 evclk,u32 ecclk)7485 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7486 {
7487 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7488 	int r;
7489 
7490 	/* bypass evclk and ecclk with bclk */
7491 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7492 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7493 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7494 
7495 	/* put PLL in bypass mode */
7496 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7497 		     ~VCEPLL_BYPASS_EN_MASK);
7498 
7499 	if (!evclk || !ecclk) {
7500 		/* keep the Bypass mode, put PLL to sleep */
7501 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7502 			     ~VCEPLL_SLEEP_MASK);
7503 		return 0;
7504 	}
7505 
7506 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7507 					  16384, 0x03FFFFFF, 0, 128, 5,
7508 					  &fb_div, &evclk_div, &ecclk_div);
7509 	if (r)
7510 		return r;
7511 
7512 	/* set RESET_ANTI_MUX to 0 */
7513 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7514 
7515 	/* set VCO_MODE to 1 */
7516 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7517 		     ~VCEPLL_VCO_MODE_MASK);
7518 
7519 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7520 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7521 		     ~VCEPLL_SLEEP_MASK);
7522 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7523 
7524 	/* deassert VCEPLL_RESET */
7525 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7526 
7527 	mdelay(1);
7528 
7529 	r = si_vce_send_vcepll_ctlreq(rdev);
7530 	if (r)
7531 		return r;
7532 
7533 	/* assert VCEPLL_RESET again */
7534 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7535 
7536 	/* disable spread spectrum. */
7537 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7538 
7539 	/* set feedback divider */
7540 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7541 
7542 	/* set ref divider to 0 */
7543 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7544 
7545 	/* set PDIV_A and PDIV_B */
7546 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7547 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7548 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7549 
7550 	/* give the PLL some time to settle */
7551 	mdelay(15);
7552 
7553 	/* deassert PLL_RESET */
7554 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7555 
7556 	mdelay(15);
7557 
7558 	/* switch from bypass mode to normal mode */
7559 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7560 
7561 	r = si_vce_send_vcepll_ctlreq(rdev);
7562 	if (r)
7563 		return r;
7564 
7565 	/* switch VCLK and DCLK selection */
7566 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7567 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7568 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7569 
7570 	mdelay(100);
7571 
7572 	return 0;
7573 }
7574