xref: /dragonfly/sys/dev/drm/radeon/si.c (revision 9317c2d0)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "radeon_audio.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
47 MODULE_FIRMWARE("radeon/tahiti_me.bin");
48 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
49 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
50 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
52 
53 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
60 
61 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
62 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
68 
69 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
70 MODULE_FIRMWARE("radeon/VERDE_me.bin");
71 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
72 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
74 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
75 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
76 
77 MODULE_FIRMWARE("radeon/verde_pfp.bin");
78 MODULE_FIRMWARE("radeon/verde_me.bin");
79 MODULE_FIRMWARE("radeon/verde_ce.bin");
80 MODULE_FIRMWARE("radeon/verde_mc.bin");
81 MODULE_FIRMWARE("radeon/verde_rlc.bin");
82 MODULE_FIRMWARE("radeon/verde_smc.bin");
83 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
84 
85 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
86 MODULE_FIRMWARE("radeon/OLAND_me.bin");
87 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
88 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
89 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
90 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
91 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
92 
93 MODULE_FIRMWARE("radeon/oland_pfp.bin");
94 MODULE_FIRMWARE("radeon/oland_me.bin");
95 MODULE_FIRMWARE("radeon/oland_ce.bin");
96 MODULE_FIRMWARE("radeon/oland_mc.bin");
97 MODULE_FIRMWARE("radeon/oland_rlc.bin");
98 MODULE_FIRMWARE("radeon/oland_smc.bin");
99 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
100 
101 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
108 
109 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
110 MODULE_FIRMWARE("radeon/hainan_me.bin");
111 MODULE_FIRMWARE("radeon/hainan_ce.bin");
112 MODULE_FIRMWARE("radeon/hainan_mc.bin");
113 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
114 MODULE_FIRMWARE("radeon/hainan_smc.bin");
115 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
116 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
117 
118 MODULE_FIRMWARE("radeon/si58_mc.bin");
119 
120 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
121 static void si_pcie_gen3_enable(struct radeon_device *rdev);
122 static void si_program_aspm(struct radeon_device *rdev);
123 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
124 					 bool enable);
125 static void si_init_pg(struct radeon_device *rdev);
126 static void si_init_cg(struct radeon_device *rdev);
127 static void si_fini_pg(struct radeon_device *rdev);
128 static void si_fini_cg(struct radeon_device *rdev);
129 static void si_rlc_stop(struct radeon_device *rdev);
130 
131 static const u32 verde_rlc_save_restore_register_list[] =
132 {
133 	(0x8000 << 16) | (0x98f4 >> 2),
134 	0x00000000,
135 	(0x8040 << 16) | (0x98f4 >> 2),
136 	0x00000000,
137 	(0x8000 << 16) | (0xe80 >> 2),
138 	0x00000000,
139 	(0x8040 << 16) | (0xe80 >> 2),
140 	0x00000000,
141 	(0x8000 << 16) | (0x89bc >> 2),
142 	0x00000000,
143 	(0x8040 << 16) | (0x89bc >> 2),
144 	0x00000000,
145 	(0x8000 << 16) | (0x8c1c >> 2),
146 	0x00000000,
147 	(0x8040 << 16) | (0x8c1c >> 2),
148 	0x00000000,
149 	(0x9c00 << 16) | (0x98f0 >> 2),
150 	0x00000000,
151 	(0x9c00 << 16) | (0xe7c >> 2),
152 	0x00000000,
153 	(0x8000 << 16) | (0x9148 >> 2),
154 	0x00000000,
155 	(0x8040 << 16) | (0x9148 >> 2),
156 	0x00000000,
157 	(0x9c00 << 16) | (0x9150 >> 2),
158 	0x00000000,
159 	(0x9c00 << 16) | (0x897c >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x8d8c >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0xac54 >> 2),
164 	0X00000000,
165 	0x3,
166 	(0x9c00 << 16) | (0x98f8 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9910 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x9914 >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x9918 >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x991c >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0x9920 >> 2),
177 	0x00000000,
178 	(0x9c00 << 16) | (0x9924 >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x9928 >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x992c >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x9930 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x9934 >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x9938 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x993c >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x9940 >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0x9944 >> 2),
195 	0x00000000,
196 	(0x9c00 << 16) | (0x9948 >> 2),
197 	0x00000000,
198 	(0x9c00 << 16) | (0x994c >> 2),
199 	0x00000000,
200 	(0x9c00 << 16) | (0x9950 >> 2),
201 	0x00000000,
202 	(0x9c00 << 16) | (0x9954 >> 2),
203 	0x00000000,
204 	(0x9c00 << 16) | (0x9958 >> 2),
205 	0x00000000,
206 	(0x9c00 << 16) | (0x995c >> 2),
207 	0x00000000,
208 	(0x9c00 << 16) | (0x9960 >> 2),
209 	0x00000000,
210 	(0x9c00 << 16) | (0x9964 >> 2),
211 	0x00000000,
212 	(0x9c00 << 16) | (0x9968 >> 2),
213 	0x00000000,
214 	(0x9c00 << 16) | (0x996c >> 2),
215 	0x00000000,
216 	(0x9c00 << 16) | (0x9970 >> 2),
217 	0x00000000,
218 	(0x9c00 << 16) | (0x9974 >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x9978 >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x997c >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x9980 >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x9984 >> 2),
227 	0x00000000,
228 	(0x9c00 << 16) | (0x9988 >> 2),
229 	0x00000000,
230 	(0x9c00 << 16) | (0x998c >> 2),
231 	0x00000000,
232 	(0x9c00 << 16) | (0x8c00 >> 2),
233 	0x00000000,
234 	(0x9c00 << 16) | (0x8c14 >> 2),
235 	0x00000000,
236 	(0x9c00 << 16) | (0x8c04 >> 2),
237 	0x00000000,
238 	(0x9c00 << 16) | (0x8c08 >> 2),
239 	0x00000000,
240 	(0x8000 << 16) | (0x9b7c >> 2),
241 	0x00000000,
242 	(0x8040 << 16) | (0x9b7c >> 2),
243 	0x00000000,
244 	(0x8000 << 16) | (0xe84 >> 2),
245 	0x00000000,
246 	(0x8040 << 16) | (0xe84 >> 2),
247 	0x00000000,
248 	(0x8000 << 16) | (0x89c0 >> 2),
249 	0x00000000,
250 	(0x8040 << 16) | (0x89c0 >> 2),
251 	0x00000000,
252 	(0x8000 << 16) | (0x914c >> 2),
253 	0x00000000,
254 	(0x8040 << 16) | (0x914c >> 2),
255 	0x00000000,
256 	(0x8000 << 16) | (0x8c20 >> 2),
257 	0x00000000,
258 	(0x8040 << 16) | (0x8c20 >> 2),
259 	0x00000000,
260 	(0x8000 << 16) | (0x9354 >> 2),
261 	0x00000000,
262 	(0x8040 << 16) | (0x9354 >> 2),
263 	0x00000000,
264 	(0x9c00 << 16) | (0x9060 >> 2),
265 	0x00000000,
266 	(0x9c00 << 16) | (0x9364 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0x9100 >> 2),
269 	0x00000000,
270 	(0x9c00 << 16) | (0x913c >> 2),
271 	0x00000000,
272 	(0x8000 << 16) | (0x90e0 >> 2),
273 	0x00000000,
274 	(0x8000 << 16) | (0x90e4 >> 2),
275 	0x00000000,
276 	(0x8000 << 16) | (0x90e8 >> 2),
277 	0x00000000,
278 	(0x8040 << 16) | (0x90e0 >> 2),
279 	0x00000000,
280 	(0x8040 << 16) | (0x90e4 >> 2),
281 	0x00000000,
282 	(0x8040 << 16) | (0x90e8 >> 2),
283 	0x00000000,
284 	(0x9c00 << 16) | (0x8bcc >> 2),
285 	0x00000000,
286 	(0x9c00 << 16) | (0x8b24 >> 2),
287 	0x00000000,
288 	(0x9c00 << 16) | (0x88c4 >> 2),
289 	0x00000000,
290 	(0x9c00 << 16) | (0x8e50 >> 2),
291 	0x00000000,
292 	(0x9c00 << 16) | (0x8c0c >> 2),
293 	0x00000000,
294 	(0x9c00 << 16) | (0x8e58 >> 2),
295 	0x00000000,
296 	(0x9c00 << 16) | (0x8e5c >> 2),
297 	0x00000000,
298 	(0x9c00 << 16) | (0x9508 >> 2),
299 	0x00000000,
300 	(0x9c00 << 16) | (0x950c >> 2),
301 	0x00000000,
302 	(0x9c00 << 16) | (0x9494 >> 2),
303 	0x00000000,
304 	(0x9c00 << 16) | (0xac0c >> 2),
305 	0x00000000,
306 	(0x9c00 << 16) | (0xac10 >> 2),
307 	0x00000000,
308 	(0x9c00 << 16) | (0xac14 >> 2),
309 	0x00000000,
310 	(0x9c00 << 16) | (0xae00 >> 2),
311 	0x00000000,
312 	(0x9c00 << 16) | (0xac08 >> 2),
313 	0x00000000,
314 	(0x9c00 << 16) | (0x88d4 >> 2),
315 	0x00000000,
316 	(0x9c00 << 16) | (0x88c8 >> 2),
317 	0x00000000,
318 	(0x9c00 << 16) | (0x88cc >> 2),
319 	0x00000000,
320 	(0x9c00 << 16) | (0x89b0 >> 2),
321 	0x00000000,
322 	(0x9c00 << 16) | (0x8b10 >> 2),
323 	0x00000000,
324 	(0x9c00 << 16) | (0x8a14 >> 2),
325 	0x00000000,
326 	(0x9c00 << 16) | (0x9830 >> 2),
327 	0x00000000,
328 	(0x9c00 << 16) | (0x9834 >> 2),
329 	0x00000000,
330 	(0x9c00 << 16) | (0x9838 >> 2),
331 	0x00000000,
332 	(0x9c00 << 16) | (0x9a10 >> 2),
333 	0x00000000,
334 	(0x8000 << 16) | (0x9870 >> 2),
335 	0x00000000,
336 	(0x8000 << 16) | (0x9874 >> 2),
337 	0x00000000,
338 	(0x8001 << 16) | (0x9870 >> 2),
339 	0x00000000,
340 	(0x8001 << 16) | (0x9874 >> 2),
341 	0x00000000,
342 	(0x8040 << 16) | (0x9870 >> 2),
343 	0x00000000,
344 	(0x8040 << 16) | (0x9874 >> 2),
345 	0x00000000,
346 	(0x8041 << 16) | (0x9870 >> 2),
347 	0x00000000,
348 	(0x8041 << 16) | (0x9874 >> 2),
349 	0x00000000,
350 	0x00000000
351 };
352 
353 static const u32 tahiti_golden_rlc_registers[] =
354 {
355 	0xc424, 0xffffffff, 0x00601005,
356 	0xc47c, 0xffffffff, 0x10104040,
357 	0xc488, 0xffffffff, 0x0100000a,
358 	0xc314, 0xffffffff, 0x00000800,
359 	0xc30c, 0xffffffff, 0x800000f4,
360 	0xf4a8, 0xffffffff, 0x00000000
361 };
362 
363 static const u32 tahiti_golden_registers[] =
364 {
365 	0x9a10, 0x00010000, 0x00018208,
366 	0x9830, 0xffffffff, 0x00000000,
367 	0x9834, 0xf00fffff, 0x00000400,
368 	0x9838, 0x0002021c, 0x00020200,
369 	0xc78, 0x00000080, 0x00000000,
370 	0xd030, 0x000300c0, 0x00800040,
371 	0xd830, 0x000300c0, 0x00800040,
372 	0x5bb0, 0x000000f0, 0x00000070,
373 	0x5bc0, 0x00200000, 0x50100000,
374 	0x7030, 0x31000311, 0x00000011,
375 	0x277c, 0x00000003, 0x000007ff,
376 	0x240c, 0x000007ff, 0x00000000,
377 	0x8a14, 0xf000001f, 0x00000007,
378 	0x8b24, 0xffffffff, 0x00ffffff,
379 	0x8b10, 0x0000ff0f, 0x00000000,
380 	0x28a4c, 0x07ffffff, 0x4e000000,
381 	0x28350, 0x3f3f3fff, 0x2a00126a,
382 	0x30, 0x000000ff, 0x0040,
383 	0x34, 0x00000040, 0x00004040,
384 	0x9100, 0x07ffffff, 0x03000000,
385 	0x8e88, 0x01ff1f3f, 0x00000000,
386 	0x8e84, 0x01ff1f3f, 0x00000000,
387 	0x9060, 0x0000007f, 0x00000020,
388 	0x9508, 0x00010000, 0x00010000,
389 	0xac14, 0x00000200, 0x000002fb,
390 	0xac10, 0xffffffff, 0x0000543b,
391 	0xac0c, 0xffffffff, 0xa9210876,
392 	0x88d0, 0xffffffff, 0x000fff40,
393 	0x88d4, 0x0000001f, 0x00000010,
394 	0x1410, 0x20000000, 0x20fffed8,
395 	0x15c0, 0x000c0fc0, 0x000c0400
396 };
397 
398 static const u32 tahiti_golden_registers2[] =
399 {
400 	0xc64, 0x00000001, 0x00000001
401 };
402 
403 static const u32 pitcairn_golden_rlc_registers[] =
404 {
405 	0xc424, 0xffffffff, 0x00601004,
406 	0xc47c, 0xffffffff, 0x10102020,
407 	0xc488, 0xffffffff, 0x01000020,
408 	0xc314, 0xffffffff, 0x00000800,
409 	0xc30c, 0xffffffff, 0x800000a4
410 };
411 
412 static const u32 pitcairn_golden_registers[] =
413 {
414 	0x9a10, 0x00010000, 0x00018208,
415 	0x9830, 0xffffffff, 0x00000000,
416 	0x9834, 0xf00fffff, 0x00000400,
417 	0x9838, 0x0002021c, 0x00020200,
418 	0xc78, 0x00000080, 0x00000000,
419 	0xd030, 0x000300c0, 0x00800040,
420 	0xd830, 0x000300c0, 0x00800040,
421 	0x5bb0, 0x000000f0, 0x00000070,
422 	0x5bc0, 0x00200000, 0x50100000,
423 	0x7030, 0x31000311, 0x00000011,
424 	0x2ae4, 0x00073ffe, 0x000022a2,
425 	0x240c, 0x000007ff, 0x00000000,
426 	0x8a14, 0xf000001f, 0x00000007,
427 	0x8b24, 0xffffffff, 0x00ffffff,
428 	0x8b10, 0x0000ff0f, 0x00000000,
429 	0x28a4c, 0x07ffffff, 0x4e000000,
430 	0x28350, 0x3f3f3fff, 0x2a00126a,
431 	0x30, 0x000000ff, 0x0040,
432 	0x34, 0x00000040, 0x00004040,
433 	0x9100, 0x07ffffff, 0x03000000,
434 	0x9060, 0x0000007f, 0x00000020,
435 	0x9508, 0x00010000, 0x00010000,
436 	0xac14, 0x000003ff, 0x000000f7,
437 	0xac10, 0xffffffff, 0x00000000,
438 	0xac0c, 0xffffffff, 0x32761054,
439 	0x88d4, 0x0000001f, 0x00000010,
440 	0x15c0, 0x000c0fc0, 0x000c0400
441 };
442 
443 static const u32 verde_golden_rlc_registers[] =
444 {
445 	0xc424, 0xffffffff, 0x033f1005,
446 	0xc47c, 0xffffffff, 0x10808020,
447 	0xc488, 0xffffffff, 0x00800008,
448 	0xc314, 0xffffffff, 0x00001000,
449 	0xc30c, 0xffffffff, 0x80010014
450 };
451 
452 static const u32 verde_golden_registers[] =
453 {
454 	0x9a10, 0x00010000, 0x00018208,
455 	0x9830, 0xffffffff, 0x00000000,
456 	0x9834, 0xf00fffff, 0x00000400,
457 	0x9838, 0x0002021c, 0x00020200,
458 	0xc78, 0x00000080, 0x00000000,
459 	0xd030, 0x000300c0, 0x00800040,
460 	0xd030, 0x000300c0, 0x00800040,
461 	0xd830, 0x000300c0, 0x00800040,
462 	0xd830, 0x000300c0, 0x00800040,
463 	0x5bb0, 0x000000f0, 0x00000070,
464 	0x5bc0, 0x00200000, 0x50100000,
465 	0x7030, 0x31000311, 0x00000011,
466 	0x2ae4, 0x00073ffe, 0x000022a2,
467 	0x2ae4, 0x00073ffe, 0x000022a2,
468 	0x2ae4, 0x00073ffe, 0x000022a2,
469 	0x240c, 0x000007ff, 0x00000000,
470 	0x240c, 0x000007ff, 0x00000000,
471 	0x240c, 0x000007ff, 0x00000000,
472 	0x8a14, 0xf000001f, 0x00000007,
473 	0x8a14, 0xf000001f, 0x00000007,
474 	0x8a14, 0xf000001f, 0x00000007,
475 	0x8b24, 0xffffffff, 0x00ffffff,
476 	0x8b10, 0x0000ff0f, 0x00000000,
477 	0x28a4c, 0x07ffffff, 0x4e000000,
478 	0x28350, 0x3f3f3fff, 0x0000124a,
479 	0x28350, 0x3f3f3fff, 0x0000124a,
480 	0x28350, 0x3f3f3fff, 0x0000124a,
481 	0x30, 0x000000ff, 0x0040,
482 	0x34, 0x00000040, 0x00004040,
483 	0x9100, 0x07ffffff, 0x03000000,
484 	0x9100, 0x07ffffff, 0x03000000,
485 	0x8e88, 0x01ff1f3f, 0x00000000,
486 	0x8e88, 0x01ff1f3f, 0x00000000,
487 	0x8e88, 0x01ff1f3f, 0x00000000,
488 	0x8e84, 0x01ff1f3f, 0x00000000,
489 	0x8e84, 0x01ff1f3f, 0x00000000,
490 	0x8e84, 0x01ff1f3f, 0x00000000,
491 	0x9060, 0x0000007f, 0x00000020,
492 	0x9508, 0x00010000, 0x00010000,
493 	0xac14, 0x000003ff, 0x00000003,
494 	0xac14, 0x000003ff, 0x00000003,
495 	0xac14, 0x000003ff, 0x00000003,
496 	0xac10, 0xffffffff, 0x00000000,
497 	0xac10, 0xffffffff, 0x00000000,
498 	0xac10, 0xffffffff, 0x00000000,
499 	0xac0c, 0xffffffff, 0x00001032,
500 	0xac0c, 0xffffffff, 0x00001032,
501 	0xac0c, 0xffffffff, 0x00001032,
502 	0x88d4, 0x0000001f, 0x00000010,
503 	0x88d4, 0x0000001f, 0x00000010,
504 	0x88d4, 0x0000001f, 0x00000010,
505 	0x15c0, 0x000c0fc0, 0x000c0400
506 };
507 
508 static const u32 oland_golden_rlc_registers[] =
509 {
510 	0xc424, 0xffffffff, 0x00601005,
511 	0xc47c, 0xffffffff, 0x10104040,
512 	0xc488, 0xffffffff, 0x0100000a,
513 	0xc314, 0xffffffff, 0x00000800,
514 	0xc30c, 0xffffffff, 0x800000f4
515 };
516 
517 static const u32 oland_golden_registers[] =
518 {
519 	0x9a10, 0x00010000, 0x00018208,
520 	0x9830, 0xffffffff, 0x00000000,
521 	0x9834, 0xf00fffff, 0x00000400,
522 	0x9838, 0x0002021c, 0x00020200,
523 	0xc78, 0x00000080, 0x00000000,
524 	0xd030, 0x000300c0, 0x00800040,
525 	0xd830, 0x000300c0, 0x00800040,
526 	0x5bb0, 0x000000f0, 0x00000070,
527 	0x5bc0, 0x00200000, 0x50100000,
528 	0x7030, 0x31000311, 0x00000011,
529 	0x2ae4, 0x00073ffe, 0x000022a2,
530 	0x240c, 0x000007ff, 0x00000000,
531 	0x8a14, 0xf000001f, 0x00000007,
532 	0x8b24, 0xffffffff, 0x00ffffff,
533 	0x8b10, 0x0000ff0f, 0x00000000,
534 	0x28a4c, 0x07ffffff, 0x4e000000,
535 	0x28350, 0x3f3f3fff, 0x00000082,
536 	0x30, 0x000000ff, 0x0040,
537 	0x34, 0x00000040, 0x00004040,
538 	0x9100, 0x07ffffff, 0x03000000,
539 	0x9060, 0x0000007f, 0x00000020,
540 	0x9508, 0x00010000, 0x00010000,
541 	0xac14, 0x000003ff, 0x000000f3,
542 	0xac10, 0xffffffff, 0x00000000,
543 	0xac0c, 0xffffffff, 0x00003210,
544 	0x88d4, 0x0000001f, 0x00000010,
545 	0x15c0, 0x000c0fc0, 0x000c0400
546 };
547 
548 static const u32 hainan_golden_registers[] =
549 {
550 	0x9a10, 0x00010000, 0x00018208,
551 	0x9830, 0xffffffff, 0x00000000,
552 	0x9834, 0xf00fffff, 0x00000400,
553 	0x9838, 0x0002021c, 0x00020200,
554 	0xd0c0, 0xff000fff, 0x00000100,
555 	0xd030, 0x000300c0, 0x00800040,
556 	0xd8c0, 0xff000fff, 0x00000100,
557 	0xd830, 0x000300c0, 0x00800040,
558 	0x2ae4, 0x00073ffe, 0x000022a2,
559 	0x240c, 0x000007ff, 0x00000000,
560 	0x8a14, 0xf000001f, 0x00000007,
561 	0x8b24, 0xffffffff, 0x00ffffff,
562 	0x8b10, 0x0000ff0f, 0x00000000,
563 	0x28a4c, 0x07ffffff, 0x4e000000,
564 	0x28350, 0x3f3f3fff, 0x00000000,
565 	0x30, 0x000000ff, 0x0040,
566 	0x34, 0x00000040, 0x00004040,
567 	0x9100, 0x03e00000, 0x03600000,
568 	0x9060, 0x0000007f, 0x00000020,
569 	0x9508, 0x00010000, 0x00010000,
570 	0xac14, 0x000003ff, 0x000000f1,
571 	0xac10, 0xffffffff, 0x00000000,
572 	0xac0c, 0xffffffff, 0x00003210,
573 	0x88d4, 0x0000001f, 0x00000010,
574 	0x15c0, 0x000c0fc0, 0x000c0400
575 };
576 
577 static const u32 hainan_golden_registers2[] =
578 {
579 	0x98f8, 0xffffffff, 0x02010001
580 };
581 
582 static const u32 tahiti_mgcg_cgcg_init[] =
583 {
584 	0xc400, 0xffffffff, 0xfffffffc,
585 	0x802c, 0xffffffff, 0xe0000000,
586 	0x9a60, 0xffffffff, 0x00000100,
587 	0x92a4, 0xffffffff, 0x00000100,
588 	0xc164, 0xffffffff, 0x00000100,
589 	0x9774, 0xffffffff, 0x00000100,
590 	0x8984, 0xffffffff, 0x06000100,
591 	0x8a18, 0xffffffff, 0x00000100,
592 	0x92a0, 0xffffffff, 0x00000100,
593 	0xc380, 0xffffffff, 0x00000100,
594 	0x8b28, 0xffffffff, 0x00000100,
595 	0x9144, 0xffffffff, 0x00000100,
596 	0x8d88, 0xffffffff, 0x00000100,
597 	0x8d8c, 0xffffffff, 0x00000100,
598 	0x9030, 0xffffffff, 0x00000100,
599 	0x9034, 0xffffffff, 0x00000100,
600 	0x9038, 0xffffffff, 0x00000100,
601 	0x903c, 0xffffffff, 0x00000100,
602 	0xad80, 0xffffffff, 0x00000100,
603 	0xac54, 0xffffffff, 0x00000100,
604 	0x897c, 0xffffffff, 0x06000100,
605 	0x9868, 0xffffffff, 0x00000100,
606 	0x9510, 0xffffffff, 0x00000100,
607 	0xaf04, 0xffffffff, 0x00000100,
608 	0xae04, 0xffffffff, 0x00000100,
609 	0x949c, 0xffffffff, 0x00000100,
610 	0x802c, 0xffffffff, 0xe0000000,
611 	0x9160, 0xffffffff, 0x00010000,
612 	0x9164, 0xffffffff, 0x00030002,
613 	0x9168, 0xffffffff, 0x00040007,
614 	0x916c, 0xffffffff, 0x00060005,
615 	0x9170, 0xffffffff, 0x00090008,
616 	0x9174, 0xffffffff, 0x00020001,
617 	0x9178, 0xffffffff, 0x00040003,
618 	0x917c, 0xffffffff, 0x00000007,
619 	0x9180, 0xffffffff, 0x00060005,
620 	0x9184, 0xffffffff, 0x00090008,
621 	0x9188, 0xffffffff, 0x00030002,
622 	0x918c, 0xffffffff, 0x00050004,
623 	0x9190, 0xffffffff, 0x00000008,
624 	0x9194, 0xffffffff, 0x00070006,
625 	0x9198, 0xffffffff, 0x000a0009,
626 	0x919c, 0xffffffff, 0x00040003,
627 	0x91a0, 0xffffffff, 0x00060005,
628 	0x91a4, 0xffffffff, 0x00000009,
629 	0x91a8, 0xffffffff, 0x00080007,
630 	0x91ac, 0xffffffff, 0x000b000a,
631 	0x91b0, 0xffffffff, 0x00050004,
632 	0x91b4, 0xffffffff, 0x00070006,
633 	0x91b8, 0xffffffff, 0x0008000b,
634 	0x91bc, 0xffffffff, 0x000a0009,
635 	0x91c0, 0xffffffff, 0x000d000c,
636 	0x91c4, 0xffffffff, 0x00060005,
637 	0x91c8, 0xffffffff, 0x00080007,
638 	0x91cc, 0xffffffff, 0x0000000b,
639 	0x91d0, 0xffffffff, 0x000a0009,
640 	0x91d4, 0xffffffff, 0x000d000c,
641 	0x91d8, 0xffffffff, 0x00070006,
642 	0x91dc, 0xffffffff, 0x00090008,
643 	0x91e0, 0xffffffff, 0x0000000c,
644 	0x91e4, 0xffffffff, 0x000b000a,
645 	0x91e8, 0xffffffff, 0x000e000d,
646 	0x91ec, 0xffffffff, 0x00080007,
647 	0x91f0, 0xffffffff, 0x000a0009,
648 	0x91f4, 0xffffffff, 0x0000000d,
649 	0x91f8, 0xffffffff, 0x000c000b,
650 	0x91fc, 0xffffffff, 0x000f000e,
651 	0x9200, 0xffffffff, 0x00090008,
652 	0x9204, 0xffffffff, 0x000b000a,
653 	0x9208, 0xffffffff, 0x000c000f,
654 	0x920c, 0xffffffff, 0x000e000d,
655 	0x9210, 0xffffffff, 0x00110010,
656 	0x9214, 0xffffffff, 0x000a0009,
657 	0x9218, 0xffffffff, 0x000c000b,
658 	0x921c, 0xffffffff, 0x0000000f,
659 	0x9220, 0xffffffff, 0x000e000d,
660 	0x9224, 0xffffffff, 0x00110010,
661 	0x9228, 0xffffffff, 0x000b000a,
662 	0x922c, 0xffffffff, 0x000d000c,
663 	0x9230, 0xffffffff, 0x00000010,
664 	0x9234, 0xffffffff, 0x000f000e,
665 	0x9238, 0xffffffff, 0x00120011,
666 	0x923c, 0xffffffff, 0x000c000b,
667 	0x9240, 0xffffffff, 0x000e000d,
668 	0x9244, 0xffffffff, 0x00000011,
669 	0x9248, 0xffffffff, 0x0010000f,
670 	0x924c, 0xffffffff, 0x00130012,
671 	0x9250, 0xffffffff, 0x000d000c,
672 	0x9254, 0xffffffff, 0x000f000e,
673 	0x9258, 0xffffffff, 0x00100013,
674 	0x925c, 0xffffffff, 0x00120011,
675 	0x9260, 0xffffffff, 0x00150014,
676 	0x9264, 0xffffffff, 0x000e000d,
677 	0x9268, 0xffffffff, 0x0010000f,
678 	0x926c, 0xffffffff, 0x00000013,
679 	0x9270, 0xffffffff, 0x00120011,
680 	0x9274, 0xffffffff, 0x00150014,
681 	0x9278, 0xffffffff, 0x000f000e,
682 	0x927c, 0xffffffff, 0x00110010,
683 	0x9280, 0xffffffff, 0x00000014,
684 	0x9284, 0xffffffff, 0x00130012,
685 	0x9288, 0xffffffff, 0x00160015,
686 	0x928c, 0xffffffff, 0x0010000f,
687 	0x9290, 0xffffffff, 0x00120011,
688 	0x9294, 0xffffffff, 0x00000015,
689 	0x9298, 0xffffffff, 0x00140013,
690 	0x929c, 0xffffffff, 0x00170016,
691 	0x9150, 0xffffffff, 0x96940200,
692 	0x8708, 0xffffffff, 0x00900100,
693 	0xc478, 0xffffffff, 0x00000080,
694 	0xc404, 0xffffffff, 0x0020003f,
695 	0x30, 0xffffffff, 0x0000001c,
696 	0x34, 0x000f0000, 0x000f0000,
697 	0x160c, 0xffffffff, 0x00000100,
698 	0x1024, 0xffffffff, 0x00000100,
699 	0x102c, 0x00000101, 0x00000000,
700 	0x20a8, 0xffffffff, 0x00000104,
701 	0x264c, 0x000c0000, 0x000c0000,
702 	0x2648, 0x000c0000, 0x000c0000,
703 	0x55e4, 0xff000fff, 0x00000100,
704 	0x55e8, 0x00000001, 0x00000001,
705 	0x2f50, 0x00000001, 0x00000001,
706 	0x30cc, 0xc0000fff, 0x00000104,
707 	0xc1e4, 0x00000001, 0x00000001,
708 	0xd0c0, 0xfffffff0, 0x00000100,
709 	0xd8c0, 0xfffffff0, 0x00000100
710 };
711 
712 static const u32 pitcairn_mgcg_cgcg_init[] =
713 {
714 	0xc400, 0xffffffff, 0xfffffffc,
715 	0x802c, 0xffffffff, 0xe0000000,
716 	0x9a60, 0xffffffff, 0x00000100,
717 	0x92a4, 0xffffffff, 0x00000100,
718 	0xc164, 0xffffffff, 0x00000100,
719 	0x9774, 0xffffffff, 0x00000100,
720 	0x8984, 0xffffffff, 0x06000100,
721 	0x8a18, 0xffffffff, 0x00000100,
722 	0x92a0, 0xffffffff, 0x00000100,
723 	0xc380, 0xffffffff, 0x00000100,
724 	0x8b28, 0xffffffff, 0x00000100,
725 	0x9144, 0xffffffff, 0x00000100,
726 	0x8d88, 0xffffffff, 0x00000100,
727 	0x8d8c, 0xffffffff, 0x00000100,
728 	0x9030, 0xffffffff, 0x00000100,
729 	0x9034, 0xffffffff, 0x00000100,
730 	0x9038, 0xffffffff, 0x00000100,
731 	0x903c, 0xffffffff, 0x00000100,
732 	0xad80, 0xffffffff, 0x00000100,
733 	0xac54, 0xffffffff, 0x00000100,
734 	0x897c, 0xffffffff, 0x06000100,
735 	0x9868, 0xffffffff, 0x00000100,
736 	0x9510, 0xffffffff, 0x00000100,
737 	0xaf04, 0xffffffff, 0x00000100,
738 	0xae04, 0xffffffff, 0x00000100,
739 	0x949c, 0xffffffff, 0x00000100,
740 	0x802c, 0xffffffff, 0xe0000000,
741 	0x9160, 0xffffffff, 0x00010000,
742 	0x9164, 0xffffffff, 0x00030002,
743 	0x9168, 0xffffffff, 0x00040007,
744 	0x916c, 0xffffffff, 0x00060005,
745 	0x9170, 0xffffffff, 0x00090008,
746 	0x9174, 0xffffffff, 0x00020001,
747 	0x9178, 0xffffffff, 0x00040003,
748 	0x917c, 0xffffffff, 0x00000007,
749 	0x9180, 0xffffffff, 0x00060005,
750 	0x9184, 0xffffffff, 0x00090008,
751 	0x9188, 0xffffffff, 0x00030002,
752 	0x918c, 0xffffffff, 0x00050004,
753 	0x9190, 0xffffffff, 0x00000008,
754 	0x9194, 0xffffffff, 0x00070006,
755 	0x9198, 0xffffffff, 0x000a0009,
756 	0x919c, 0xffffffff, 0x00040003,
757 	0x91a0, 0xffffffff, 0x00060005,
758 	0x91a4, 0xffffffff, 0x00000009,
759 	0x91a8, 0xffffffff, 0x00080007,
760 	0x91ac, 0xffffffff, 0x000b000a,
761 	0x91b0, 0xffffffff, 0x00050004,
762 	0x91b4, 0xffffffff, 0x00070006,
763 	0x91b8, 0xffffffff, 0x0008000b,
764 	0x91bc, 0xffffffff, 0x000a0009,
765 	0x91c0, 0xffffffff, 0x000d000c,
766 	0x9200, 0xffffffff, 0x00090008,
767 	0x9204, 0xffffffff, 0x000b000a,
768 	0x9208, 0xffffffff, 0x000c000f,
769 	0x920c, 0xffffffff, 0x000e000d,
770 	0x9210, 0xffffffff, 0x00110010,
771 	0x9214, 0xffffffff, 0x000a0009,
772 	0x9218, 0xffffffff, 0x000c000b,
773 	0x921c, 0xffffffff, 0x0000000f,
774 	0x9220, 0xffffffff, 0x000e000d,
775 	0x9224, 0xffffffff, 0x00110010,
776 	0x9228, 0xffffffff, 0x000b000a,
777 	0x922c, 0xffffffff, 0x000d000c,
778 	0x9230, 0xffffffff, 0x00000010,
779 	0x9234, 0xffffffff, 0x000f000e,
780 	0x9238, 0xffffffff, 0x00120011,
781 	0x923c, 0xffffffff, 0x000c000b,
782 	0x9240, 0xffffffff, 0x000e000d,
783 	0x9244, 0xffffffff, 0x00000011,
784 	0x9248, 0xffffffff, 0x0010000f,
785 	0x924c, 0xffffffff, 0x00130012,
786 	0x9250, 0xffffffff, 0x000d000c,
787 	0x9254, 0xffffffff, 0x000f000e,
788 	0x9258, 0xffffffff, 0x00100013,
789 	0x925c, 0xffffffff, 0x00120011,
790 	0x9260, 0xffffffff, 0x00150014,
791 	0x9150, 0xffffffff, 0x96940200,
792 	0x8708, 0xffffffff, 0x00900100,
793 	0xc478, 0xffffffff, 0x00000080,
794 	0xc404, 0xffffffff, 0x0020003f,
795 	0x30, 0xffffffff, 0x0000001c,
796 	0x34, 0x000f0000, 0x000f0000,
797 	0x160c, 0xffffffff, 0x00000100,
798 	0x1024, 0xffffffff, 0x00000100,
799 	0x102c, 0x00000101, 0x00000000,
800 	0x20a8, 0xffffffff, 0x00000104,
801 	0x55e4, 0xff000fff, 0x00000100,
802 	0x55e8, 0x00000001, 0x00000001,
803 	0x2f50, 0x00000001, 0x00000001,
804 	0x30cc, 0xc0000fff, 0x00000104,
805 	0xc1e4, 0x00000001, 0x00000001,
806 	0xd0c0, 0xfffffff0, 0x00000100,
807 	0xd8c0, 0xfffffff0, 0x00000100
808 };
809 
810 static const u32 verde_mgcg_cgcg_init[] =
811 {
812 	0xc400, 0xffffffff, 0xfffffffc,
813 	0x802c, 0xffffffff, 0xe0000000,
814 	0x9a60, 0xffffffff, 0x00000100,
815 	0x92a4, 0xffffffff, 0x00000100,
816 	0xc164, 0xffffffff, 0x00000100,
817 	0x9774, 0xffffffff, 0x00000100,
818 	0x8984, 0xffffffff, 0x06000100,
819 	0x8a18, 0xffffffff, 0x00000100,
820 	0x92a0, 0xffffffff, 0x00000100,
821 	0xc380, 0xffffffff, 0x00000100,
822 	0x8b28, 0xffffffff, 0x00000100,
823 	0x9144, 0xffffffff, 0x00000100,
824 	0x8d88, 0xffffffff, 0x00000100,
825 	0x8d8c, 0xffffffff, 0x00000100,
826 	0x9030, 0xffffffff, 0x00000100,
827 	0x9034, 0xffffffff, 0x00000100,
828 	0x9038, 0xffffffff, 0x00000100,
829 	0x903c, 0xffffffff, 0x00000100,
830 	0xad80, 0xffffffff, 0x00000100,
831 	0xac54, 0xffffffff, 0x00000100,
832 	0x897c, 0xffffffff, 0x06000100,
833 	0x9868, 0xffffffff, 0x00000100,
834 	0x9510, 0xffffffff, 0x00000100,
835 	0xaf04, 0xffffffff, 0x00000100,
836 	0xae04, 0xffffffff, 0x00000100,
837 	0x949c, 0xffffffff, 0x00000100,
838 	0x802c, 0xffffffff, 0xe0000000,
839 	0x9160, 0xffffffff, 0x00010000,
840 	0x9164, 0xffffffff, 0x00030002,
841 	0x9168, 0xffffffff, 0x00040007,
842 	0x916c, 0xffffffff, 0x00060005,
843 	0x9170, 0xffffffff, 0x00090008,
844 	0x9174, 0xffffffff, 0x00020001,
845 	0x9178, 0xffffffff, 0x00040003,
846 	0x917c, 0xffffffff, 0x00000007,
847 	0x9180, 0xffffffff, 0x00060005,
848 	0x9184, 0xffffffff, 0x00090008,
849 	0x9188, 0xffffffff, 0x00030002,
850 	0x918c, 0xffffffff, 0x00050004,
851 	0x9190, 0xffffffff, 0x00000008,
852 	0x9194, 0xffffffff, 0x00070006,
853 	0x9198, 0xffffffff, 0x000a0009,
854 	0x919c, 0xffffffff, 0x00040003,
855 	0x91a0, 0xffffffff, 0x00060005,
856 	0x91a4, 0xffffffff, 0x00000009,
857 	0x91a8, 0xffffffff, 0x00080007,
858 	0x91ac, 0xffffffff, 0x000b000a,
859 	0x91b0, 0xffffffff, 0x00050004,
860 	0x91b4, 0xffffffff, 0x00070006,
861 	0x91b8, 0xffffffff, 0x0008000b,
862 	0x91bc, 0xffffffff, 0x000a0009,
863 	0x91c0, 0xffffffff, 0x000d000c,
864 	0x9200, 0xffffffff, 0x00090008,
865 	0x9204, 0xffffffff, 0x000b000a,
866 	0x9208, 0xffffffff, 0x000c000f,
867 	0x920c, 0xffffffff, 0x000e000d,
868 	0x9210, 0xffffffff, 0x00110010,
869 	0x9214, 0xffffffff, 0x000a0009,
870 	0x9218, 0xffffffff, 0x000c000b,
871 	0x921c, 0xffffffff, 0x0000000f,
872 	0x9220, 0xffffffff, 0x000e000d,
873 	0x9224, 0xffffffff, 0x00110010,
874 	0x9228, 0xffffffff, 0x000b000a,
875 	0x922c, 0xffffffff, 0x000d000c,
876 	0x9230, 0xffffffff, 0x00000010,
877 	0x9234, 0xffffffff, 0x000f000e,
878 	0x9238, 0xffffffff, 0x00120011,
879 	0x923c, 0xffffffff, 0x000c000b,
880 	0x9240, 0xffffffff, 0x000e000d,
881 	0x9244, 0xffffffff, 0x00000011,
882 	0x9248, 0xffffffff, 0x0010000f,
883 	0x924c, 0xffffffff, 0x00130012,
884 	0x9250, 0xffffffff, 0x000d000c,
885 	0x9254, 0xffffffff, 0x000f000e,
886 	0x9258, 0xffffffff, 0x00100013,
887 	0x925c, 0xffffffff, 0x00120011,
888 	0x9260, 0xffffffff, 0x00150014,
889 	0x9150, 0xffffffff, 0x96940200,
890 	0x8708, 0xffffffff, 0x00900100,
891 	0xc478, 0xffffffff, 0x00000080,
892 	0xc404, 0xffffffff, 0x0020003f,
893 	0x30, 0xffffffff, 0x0000001c,
894 	0x34, 0x000f0000, 0x000f0000,
895 	0x160c, 0xffffffff, 0x00000100,
896 	0x1024, 0xffffffff, 0x00000100,
897 	0x102c, 0x00000101, 0x00000000,
898 	0x20a8, 0xffffffff, 0x00000104,
899 	0x264c, 0x000c0000, 0x000c0000,
900 	0x2648, 0x000c0000, 0x000c0000,
901 	0x55e4, 0xff000fff, 0x00000100,
902 	0x55e8, 0x00000001, 0x00000001,
903 	0x2f50, 0x00000001, 0x00000001,
904 	0x30cc, 0xc0000fff, 0x00000104,
905 	0xc1e4, 0x00000001, 0x00000001,
906 	0xd0c0, 0xfffffff0, 0x00000100,
907 	0xd8c0, 0xfffffff0, 0x00000100
908 };
909 
910 static const u32 oland_mgcg_cgcg_init[] =
911 {
912 	0xc400, 0xffffffff, 0xfffffffc,
913 	0x802c, 0xffffffff, 0xe0000000,
914 	0x9a60, 0xffffffff, 0x00000100,
915 	0x92a4, 0xffffffff, 0x00000100,
916 	0xc164, 0xffffffff, 0x00000100,
917 	0x9774, 0xffffffff, 0x00000100,
918 	0x8984, 0xffffffff, 0x06000100,
919 	0x8a18, 0xffffffff, 0x00000100,
920 	0x92a0, 0xffffffff, 0x00000100,
921 	0xc380, 0xffffffff, 0x00000100,
922 	0x8b28, 0xffffffff, 0x00000100,
923 	0x9144, 0xffffffff, 0x00000100,
924 	0x8d88, 0xffffffff, 0x00000100,
925 	0x8d8c, 0xffffffff, 0x00000100,
926 	0x9030, 0xffffffff, 0x00000100,
927 	0x9034, 0xffffffff, 0x00000100,
928 	0x9038, 0xffffffff, 0x00000100,
929 	0x903c, 0xffffffff, 0x00000100,
930 	0xad80, 0xffffffff, 0x00000100,
931 	0xac54, 0xffffffff, 0x00000100,
932 	0x897c, 0xffffffff, 0x06000100,
933 	0x9868, 0xffffffff, 0x00000100,
934 	0x9510, 0xffffffff, 0x00000100,
935 	0xaf04, 0xffffffff, 0x00000100,
936 	0xae04, 0xffffffff, 0x00000100,
937 	0x949c, 0xffffffff, 0x00000100,
938 	0x802c, 0xffffffff, 0xe0000000,
939 	0x9160, 0xffffffff, 0x00010000,
940 	0x9164, 0xffffffff, 0x00030002,
941 	0x9168, 0xffffffff, 0x00040007,
942 	0x916c, 0xffffffff, 0x00060005,
943 	0x9170, 0xffffffff, 0x00090008,
944 	0x9174, 0xffffffff, 0x00020001,
945 	0x9178, 0xffffffff, 0x00040003,
946 	0x917c, 0xffffffff, 0x00000007,
947 	0x9180, 0xffffffff, 0x00060005,
948 	0x9184, 0xffffffff, 0x00090008,
949 	0x9188, 0xffffffff, 0x00030002,
950 	0x918c, 0xffffffff, 0x00050004,
951 	0x9190, 0xffffffff, 0x00000008,
952 	0x9194, 0xffffffff, 0x00070006,
953 	0x9198, 0xffffffff, 0x000a0009,
954 	0x919c, 0xffffffff, 0x00040003,
955 	0x91a0, 0xffffffff, 0x00060005,
956 	0x91a4, 0xffffffff, 0x00000009,
957 	0x91a8, 0xffffffff, 0x00080007,
958 	0x91ac, 0xffffffff, 0x000b000a,
959 	0x91b0, 0xffffffff, 0x00050004,
960 	0x91b4, 0xffffffff, 0x00070006,
961 	0x91b8, 0xffffffff, 0x0008000b,
962 	0x91bc, 0xffffffff, 0x000a0009,
963 	0x91c0, 0xffffffff, 0x000d000c,
964 	0x91c4, 0xffffffff, 0x00060005,
965 	0x91c8, 0xffffffff, 0x00080007,
966 	0x91cc, 0xffffffff, 0x0000000b,
967 	0x91d0, 0xffffffff, 0x000a0009,
968 	0x91d4, 0xffffffff, 0x000d000c,
969 	0x9150, 0xffffffff, 0x96940200,
970 	0x8708, 0xffffffff, 0x00900100,
971 	0xc478, 0xffffffff, 0x00000080,
972 	0xc404, 0xffffffff, 0x0020003f,
973 	0x30, 0xffffffff, 0x0000001c,
974 	0x34, 0x000f0000, 0x000f0000,
975 	0x160c, 0xffffffff, 0x00000100,
976 	0x1024, 0xffffffff, 0x00000100,
977 	0x102c, 0x00000101, 0x00000000,
978 	0x20a8, 0xffffffff, 0x00000104,
979 	0x264c, 0x000c0000, 0x000c0000,
980 	0x2648, 0x000c0000, 0x000c0000,
981 	0x55e4, 0xff000fff, 0x00000100,
982 	0x55e8, 0x00000001, 0x00000001,
983 	0x2f50, 0x00000001, 0x00000001,
984 	0x30cc, 0xc0000fff, 0x00000104,
985 	0xc1e4, 0x00000001, 0x00000001,
986 	0xd0c0, 0xfffffff0, 0x00000100,
987 	0xd8c0, 0xfffffff0, 0x00000100
988 };
989 
990 static const u32 hainan_mgcg_cgcg_init[] =
991 {
992 	0xc400, 0xffffffff, 0xfffffffc,
993 	0x802c, 0xffffffff, 0xe0000000,
994 	0x9a60, 0xffffffff, 0x00000100,
995 	0x92a4, 0xffffffff, 0x00000100,
996 	0xc164, 0xffffffff, 0x00000100,
997 	0x9774, 0xffffffff, 0x00000100,
998 	0x8984, 0xffffffff, 0x06000100,
999 	0x8a18, 0xffffffff, 0x00000100,
1000 	0x92a0, 0xffffffff, 0x00000100,
1001 	0xc380, 0xffffffff, 0x00000100,
1002 	0x8b28, 0xffffffff, 0x00000100,
1003 	0x9144, 0xffffffff, 0x00000100,
1004 	0x8d88, 0xffffffff, 0x00000100,
1005 	0x8d8c, 0xffffffff, 0x00000100,
1006 	0x9030, 0xffffffff, 0x00000100,
1007 	0x9034, 0xffffffff, 0x00000100,
1008 	0x9038, 0xffffffff, 0x00000100,
1009 	0x903c, 0xffffffff, 0x00000100,
1010 	0xad80, 0xffffffff, 0x00000100,
1011 	0xac54, 0xffffffff, 0x00000100,
1012 	0x897c, 0xffffffff, 0x06000100,
1013 	0x9868, 0xffffffff, 0x00000100,
1014 	0x9510, 0xffffffff, 0x00000100,
1015 	0xaf04, 0xffffffff, 0x00000100,
1016 	0xae04, 0xffffffff, 0x00000100,
1017 	0x949c, 0xffffffff, 0x00000100,
1018 	0x802c, 0xffffffff, 0xe0000000,
1019 	0x9160, 0xffffffff, 0x00010000,
1020 	0x9164, 0xffffffff, 0x00030002,
1021 	0x9168, 0xffffffff, 0x00040007,
1022 	0x916c, 0xffffffff, 0x00060005,
1023 	0x9170, 0xffffffff, 0x00090008,
1024 	0x9174, 0xffffffff, 0x00020001,
1025 	0x9178, 0xffffffff, 0x00040003,
1026 	0x917c, 0xffffffff, 0x00000007,
1027 	0x9180, 0xffffffff, 0x00060005,
1028 	0x9184, 0xffffffff, 0x00090008,
1029 	0x9188, 0xffffffff, 0x00030002,
1030 	0x918c, 0xffffffff, 0x00050004,
1031 	0x9190, 0xffffffff, 0x00000008,
1032 	0x9194, 0xffffffff, 0x00070006,
1033 	0x9198, 0xffffffff, 0x000a0009,
1034 	0x919c, 0xffffffff, 0x00040003,
1035 	0x91a0, 0xffffffff, 0x00060005,
1036 	0x91a4, 0xffffffff, 0x00000009,
1037 	0x91a8, 0xffffffff, 0x00080007,
1038 	0x91ac, 0xffffffff, 0x000b000a,
1039 	0x91b0, 0xffffffff, 0x00050004,
1040 	0x91b4, 0xffffffff, 0x00070006,
1041 	0x91b8, 0xffffffff, 0x0008000b,
1042 	0x91bc, 0xffffffff, 0x000a0009,
1043 	0x91c0, 0xffffffff, 0x000d000c,
1044 	0x91c4, 0xffffffff, 0x00060005,
1045 	0x91c8, 0xffffffff, 0x00080007,
1046 	0x91cc, 0xffffffff, 0x0000000b,
1047 	0x91d0, 0xffffffff, 0x000a0009,
1048 	0x91d4, 0xffffffff, 0x000d000c,
1049 	0x9150, 0xffffffff, 0x96940200,
1050 	0x8708, 0xffffffff, 0x00900100,
1051 	0xc478, 0xffffffff, 0x00000080,
1052 	0xc404, 0xffffffff, 0x0020003f,
1053 	0x30, 0xffffffff, 0x0000001c,
1054 	0x34, 0x000f0000, 0x000f0000,
1055 	0x160c, 0xffffffff, 0x00000100,
1056 	0x1024, 0xffffffff, 0x00000100,
1057 	0x20a8, 0xffffffff, 0x00000104,
1058 	0x264c, 0x000c0000, 0x000c0000,
1059 	0x2648, 0x000c0000, 0x000c0000,
1060 	0x2f50, 0x00000001, 0x00000001,
1061 	0x30cc, 0xc0000fff, 0x00000104,
1062 	0xc1e4, 0x00000001, 0x00000001,
1063 	0xd0c0, 0xfffffff0, 0x00000100,
1064 	0xd8c0, 0xfffffff0, 0x00000100
1065 };
1066 
1067 static u32 verde_pg_init[] =
1068 {
1069 	0x353c, 0xffffffff, 0x40000,
1070 	0x3538, 0xffffffff, 0x200010ff,
1071 	0x353c, 0xffffffff, 0x0,
1072 	0x353c, 0xffffffff, 0x0,
1073 	0x353c, 0xffffffff, 0x0,
1074 	0x353c, 0xffffffff, 0x0,
1075 	0x353c, 0xffffffff, 0x0,
1076 	0x353c, 0xffffffff, 0x7007,
1077 	0x3538, 0xffffffff, 0x300010ff,
1078 	0x353c, 0xffffffff, 0x0,
1079 	0x353c, 0xffffffff, 0x0,
1080 	0x353c, 0xffffffff, 0x0,
1081 	0x353c, 0xffffffff, 0x0,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x400000,
1084 	0x3538, 0xffffffff, 0x100010ff,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x0,
1087 	0x353c, 0xffffffff, 0x0,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x120200,
1091 	0x3538, 0xffffffff, 0x500010ff,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x0,
1094 	0x353c, 0xffffffff, 0x0,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x1e1e16,
1098 	0x3538, 0xffffffff, 0x600010ff,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x0,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x171f1e,
1105 	0x3538, 0xffffffff, 0x700010ff,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x0,
1108 	0x353c, 0xffffffff, 0x0,
1109 	0x353c, 0xffffffff, 0x0,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x3538, 0xffffffff, 0x9ff,
1113 	0x3500, 0xffffffff, 0x0,
1114 	0x3504, 0xffffffff, 0x10000800,
1115 	0x3504, 0xffffffff, 0xf,
1116 	0x3504, 0xffffffff, 0xf,
1117 	0x3500, 0xffffffff, 0x4,
1118 	0x3504, 0xffffffff, 0x1000051e,
1119 	0x3504, 0xffffffff, 0xffff,
1120 	0x3504, 0xffffffff, 0xffff,
1121 	0x3500, 0xffffffff, 0x8,
1122 	0x3504, 0xffffffff, 0x80500,
1123 	0x3500, 0xffffffff, 0x12,
1124 	0x3504, 0xffffffff, 0x9050c,
1125 	0x3500, 0xffffffff, 0x1d,
1126 	0x3504, 0xffffffff, 0xb052c,
1127 	0x3500, 0xffffffff, 0x2a,
1128 	0x3504, 0xffffffff, 0x1053e,
1129 	0x3500, 0xffffffff, 0x2d,
1130 	0x3504, 0xffffffff, 0x10546,
1131 	0x3500, 0xffffffff, 0x30,
1132 	0x3504, 0xffffffff, 0xa054e,
1133 	0x3500, 0xffffffff, 0x3c,
1134 	0x3504, 0xffffffff, 0x1055f,
1135 	0x3500, 0xffffffff, 0x3f,
1136 	0x3504, 0xffffffff, 0x10567,
1137 	0x3500, 0xffffffff, 0x42,
1138 	0x3504, 0xffffffff, 0x1056f,
1139 	0x3500, 0xffffffff, 0x45,
1140 	0x3504, 0xffffffff, 0x10572,
1141 	0x3500, 0xffffffff, 0x48,
1142 	0x3504, 0xffffffff, 0x20575,
1143 	0x3500, 0xffffffff, 0x4c,
1144 	0x3504, 0xffffffff, 0x190801,
1145 	0x3500, 0xffffffff, 0x67,
1146 	0x3504, 0xffffffff, 0x1082a,
1147 	0x3500, 0xffffffff, 0x6a,
1148 	0x3504, 0xffffffff, 0x1b082d,
1149 	0x3500, 0xffffffff, 0x87,
1150 	0x3504, 0xffffffff, 0x310851,
1151 	0x3500, 0xffffffff, 0xba,
1152 	0x3504, 0xffffffff, 0x891,
1153 	0x3500, 0xffffffff, 0xbc,
1154 	0x3504, 0xffffffff, 0x893,
1155 	0x3500, 0xffffffff, 0xbe,
1156 	0x3504, 0xffffffff, 0x20895,
1157 	0x3500, 0xffffffff, 0xc2,
1158 	0x3504, 0xffffffff, 0x20899,
1159 	0x3500, 0xffffffff, 0xc6,
1160 	0x3504, 0xffffffff, 0x2089d,
1161 	0x3500, 0xffffffff, 0xca,
1162 	0x3504, 0xffffffff, 0x8a1,
1163 	0x3500, 0xffffffff, 0xcc,
1164 	0x3504, 0xffffffff, 0x8a3,
1165 	0x3500, 0xffffffff, 0xce,
1166 	0x3504, 0xffffffff, 0x308a5,
1167 	0x3500, 0xffffffff, 0xd3,
1168 	0x3504, 0xffffffff, 0x6d08cd,
1169 	0x3500, 0xffffffff, 0x142,
1170 	0x3504, 0xffffffff, 0x2000095a,
1171 	0x3504, 0xffffffff, 0x1,
1172 	0x3500, 0xffffffff, 0x144,
1173 	0x3504, 0xffffffff, 0x301f095b,
1174 	0x3500, 0xffffffff, 0x165,
1175 	0x3504, 0xffffffff, 0xc094d,
1176 	0x3500, 0xffffffff, 0x173,
1177 	0x3504, 0xffffffff, 0xf096d,
1178 	0x3500, 0xffffffff, 0x184,
1179 	0x3504, 0xffffffff, 0x15097f,
1180 	0x3500, 0xffffffff, 0x19b,
1181 	0x3504, 0xffffffff, 0xc0998,
1182 	0x3500, 0xffffffff, 0x1a9,
1183 	0x3504, 0xffffffff, 0x409a7,
1184 	0x3500, 0xffffffff, 0x1af,
1185 	0x3504, 0xffffffff, 0xcdc,
1186 	0x3500, 0xffffffff, 0x1b1,
1187 	0x3504, 0xffffffff, 0x800,
1188 	0x3508, 0xffffffff, 0x6c9b2000,
1189 	0x3510, 0xfc00, 0x2000,
1190 	0x3544, 0xffffffff, 0xfc0,
1191 	0x28d4, 0x00000100, 0x100
1192 };
1193 
1194 static void si_init_golden_registers(struct radeon_device *rdev)
1195 {
1196 	switch (rdev->family) {
1197 	case CHIP_TAHITI:
1198 		radeon_program_register_sequence(rdev,
1199 						 tahiti_golden_registers,
1200 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1201 		radeon_program_register_sequence(rdev,
1202 						 tahiti_golden_rlc_registers,
1203 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1204 		radeon_program_register_sequence(rdev,
1205 						 tahiti_mgcg_cgcg_init,
1206 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1207 		radeon_program_register_sequence(rdev,
1208 						 tahiti_golden_registers2,
1209 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1210 		break;
1211 	case CHIP_PITCAIRN:
1212 		radeon_program_register_sequence(rdev,
1213 						 pitcairn_golden_registers,
1214 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1215 		radeon_program_register_sequence(rdev,
1216 						 pitcairn_golden_rlc_registers,
1217 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1218 		radeon_program_register_sequence(rdev,
1219 						 pitcairn_mgcg_cgcg_init,
1220 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1221 		break;
1222 	case CHIP_VERDE:
1223 		radeon_program_register_sequence(rdev,
1224 						 verde_golden_registers,
1225 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1226 		radeon_program_register_sequence(rdev,
1227 						 verde_golden_rlc_registers,
1228 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1229 		radeon_program_register_sequence(rdev,
1230 						 verde_mgcg_cgcg_init,
1231 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1232 		radeon_program_register_sequence(rdev,
1233 						 verde_pg_init,
1234 						 (const u32)ARRAY_SIZE(verde_pg_init));
1235 		break;
1236 	case CHIP_OLAND:
1237 		radeon_program_register_sequence(rdev,
1238 						 oland_golden_registers,
1239 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1240 		radeon_program_register_sequence(rdev,
1241 						 oland_golden_rlc_registers,
1242 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1243 		radeon_program_register_sequence(rdev,
1244 						 oland_mgcg_cgcg_init,
1245 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1246 		break;
1247 	case CHIP_HAINAN:
1248 		radeon_program_register_sequence(rdev,
1249 						 hainan_golden_registers,
1250 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1251 		radeon_program_register_sequence(rdev,
1252 						 hainan_golden_registers2,
1253 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1254 		radeon_program_register_sequence(rdev,
1255 						 hainan_mgcg_cgcg_init,
1256 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1257 		break;
1258 	default:
1259 		break;
1260 	}
1261 }
1262 
1263 /**
1264  * si_get_allowed_info_register - fetch the register for the info ioctl
1265  *
1266  * @rdev: radeon_device pointer
1267  * @reg: register offset in bytes
1268  * @val: register value
1269  *
1270  * Returns 0 for success or -EINVAL for an invalid register
1271  *
1272  */
1273 int si_get_allowed_info_register(struct radeon_device *rdev,
1274 				 u32 reg, u32 *val)
1275 {
1276 	switch (reg) {
1277 	case GRBM_STATUS:
1278 	case GRBM_STATUS2:
1279 	case GRBM_STATUS_SE0:
1280 	case GRBM_STATUS_SE1:
1281 	case SRBM_STATUS:
1282 	case SRBM_STATUS2:
1283 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1284 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1285 	case UVD_STATUS:
1286 		*val = RREG32(reg);
1287 		return 0;
1288 	default:
1289 		return -EINVAL;
1290 	}
1291 }
1292 
1293 #define PCIE_BUS_CLK                10000
1294 #define TCLK                        (PCIE_BUS_CLK / 10)
1295 
1296 /**
1297  * si_get_xclk - get the xclk
1298  *
1299  * @rdev: radeon_device pointer
1300  *
1301  * Returns the reference clock used by the gfx engine
1302  * (SI).
1303  */
1304 u32 si_get_xclk(struct radeon_device *rdev)
1305 {
1306 	u32 reference_clock = rdev->clock.spll.reference_freq;
1307 	u32 tmp;
1308 
1309 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1310 	if (tmp & MUX_TCLK_TO_XCLK)
1311 		return TCLK;
1312 
1313 	tmp = RREG32(CG_CLKPIN_CNTL);
1314 	if (tmp & XTALIN_DIVIDE)
1315 		return reference_clock / 4;
1316 
1317 	return reference_clock;
1318 }
1319 
1320 /* get temperature in millidegrees */
1321 int si_get_temp(struct radeon_device *rdev)
1322 {
1323 	u32 temp;
1324 	int actual_temp = 0;
1325 
1326 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1327 		CTF_TEMP_SHIFT;
1328 
1329 	if (temp & 0x200)
1330 		actual_temp = 255;
1331 	else
1332 		actual_temp = temp & 0x1ff;
1333 
1334 	actual_temp = (actual_temp * 1000);
1335 
1336 	return actual_temp;
1337 }
1338 
1339 #define TAHITI_IO_MC_REGS_SIZE 36
1340 
1341 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1342 	{0x0000006f, 0x03044000},
1343 	{0x00000070, 0x0480c018},
1344 	{0x00000071, 0x00000040},
1345 	{0x00000072, 0x01000000},
1346 	{0x00000074, 0x000000ff},
1347 	{0x00000075, 0x00143400},
1348 	{0x00000076, 0x08ec0800},
1349 	{0x00000077, 0x040000cc},
1350 	{0x00000079, 0x00000000},
1351 	{0x0000007a, 0x21000409},
1352 	{0x0000007c, 0x00000000},
1353 	{0x0000007d, 0xe8000000},
1354 	{0x0000007e, 0x044408a8},
1355 	{0x0000007f, 0x00000003},
1356 	{0x00000080, 0x00000000},
1357 	{0x00000081, 0x01000000},
1358 	{0x00000082, 0x02000000},
1359 	{0x00000083, 0x00000000},
1360 	{0x00000084, 0xe3f3e4f4},
1361 	{0x00000085, 0x00052024},
1362 	{0x00000087, 0x00000000},
1363 	{0x00000088, 0x66036603},
1364 	{0x00000089, 0x01000000},
1365 	{0x0000008b, 0x1c0a0000},
1366 	{0x0000008c, 0xff010000},
1367 	{0x0000008e, 0xffffefff},
1368 	{0x0000008f, 0xfff3efff},
1369 	{0x00000090, 0xfff3efbf},
1370 	{0x00000094, 0x00101101},
1371 	{0x00000095, 0x00000fff},
1372 	{0x00000096, 0x00116fff},
1373 	{0x00000097, 0x60010000},
1374 	{0x00000098, 0x10010000},
1375 	{0x00000099, 0x00006000},
1376 	{0x0000009a, 0x00001000},
1377 	{0x0000009f, 0x00a77400}
1378 };
1379 
1380 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1381 	{0x0000006f, 0x03044000},
1382 	{0x00000070, 0x0480c018},
1383 	{0x00000071, 0x00000040},
1384 	{0x00000072, 0x01000000},
1385 	{0x00000074, 0x000000ff},
1386 	{0x00000075, 0x00143400},
1387 	{0x00000076, 0x08ec0800},
1388 	{0x00000077, 0x040000cc},
1389 	{0x00000079, 0x00000000},
1390 	{0x0000007a, 0x21000409},
1391 	{0x0000007c, 0x00000000},
1392 	{0x0000007d, 0xe8000000},
1393 	{0x0000007e, 0x044408a8},
1394 	{0x0000007f, 0x00000003},
1395 	{0x00000080, 0x00000000},
1396 	{0x00000081, 0x01000000},
1397 	{0x00000082, 0x02000000},
1398 	{0x00000083, 0x00000000},
1399 	{0x00000084, 0xe3f3e4f4},
1400 	{0x00000085, 0x00052024},
1401 	{0x00000087, 0x00000000},
1402 	{0x00000088, 0x66036603},
1403 	{0x00000089, 0x01000000},
1404 	{0x0000008b, 0x1c0a0000},
1405 	{0x0000008c, 0xff010000},
1406 	{0x0000008e, 0xffffefff},
1407 	{0x0000008f, 0xfff3efff},
1408 	{0x00000090, 0xfff3efbf},
1409 	{0x00000094, 0x00101101},
1410 	{0x00000095, 0x00000fff},
1411 	{0x00000096, 0x00116fff},
1412 	{0x00000097, 0x60010000},
1413 	{0x00000098, 0x10010000},
1414 	{0x00000099, 0x00006000},
1415 	{0x0000009a, 0x00001000},
1416 	{0x0000009f, 0x00a47400}
1417 };
1418 
1419 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1420 	{0x0000006f, 0x03044000},
1421 	{0x00000070, 0x0480c018},
1422 	{0x00000071, 0x00000040},
1423 	{0x00000072, 0x01000000},
1424 	{0x00000074, 0x000000ff},
1425 	{0x00000075, 0x00143400},
1426 	{0x00000076, 0x08ec0800},
1427 	{0x00000077, 0x040000cc},
1428 	{0x00000079, 0x00000000},
1429 	{0x0000007a, 0x21000409},
1430 	{0x0000007c, 0x00000000},
1431 	{0x0000007d, 0xe8000000},
1432 	{0x0000007e, 0x044408a8},
1433 	{0x0000007f, 0x00000003},
1434 	{0x00000080, 0x00000000},
1435 	{0x00000081, 0x01000000},
1436 	{0x00000082, 0x02000000},
1437 	{0x00000083, 0x00000000},
1438 	{0x00000084, 0xe3f3e4f4},
1439 	{0x00000085, 0x00052024},
1440 	{0x00000087, 0x00000000},
1441 	{0x00000088, 0x66036603},
1442 	{0x00000089, 0x01000000},
1443 	{0x0000008b, 0x1c0a0000},
1444 	{0x0000008c, 0xff010000},
1445 	{0x0000008e, 0xffffefff},
1446 	{0x0000008f, 0xfff3efff},
1447 	{0x00000090, 0xfff3efbf},
1448 	{0x00000094, 0x00101101},
1449 	{0x00000095, 0x00000fff},
1450 	{0x00000096, 0x00116fff},
1451 	{0x00000097, 0x60010000},
1452 	{0x00000098, 0x10010000},
1453 	{0x00000099, 0x00006000},
1454 	{0x0000009a, 0x00001000},
1455 	{0x0000009f, 0x00a37400}
1456 };
1457 
1458 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1459 	{0x0000006f, 0x03044000},
1460 	{0x00000070, 0x0480c018},
1461 	{0x00000071, 0x00000040},
1462 	{0x00000072, 0x01000000},
1463 	{0x00000074, 0x000000ff},
1464 	{0x00000075, 0x00143400},
1465 	{0x00000076, 0x08ec0800},
1466 	{0x00000077, 0x040000cc},
1467 	{0x00000079, 0x00000000},
1468 	{0x0000007a, 0x21000409},
1469 	{0x0000007c, 0x00000000},
1470 	{0x0000007d, 0xe8000000},
1471 	{0x0000007e, 0x044408a8},
1472 	{0x0000007f, 0x00000003},
1473 	{0x00000080, 0x00000000},
1474 	{0x00000081, 0x01000000},
1475 	{0x00000082, 0x02000000},
1476 	{0x00000083, 0x00000000},
1477 	{0x00000084, 0xe3f3e4f4},
1478 	{0x00000085, 0x00052024},
1479 	{0x00000087, 0x00000000},
1480 	{0x00000088, 0x66036603},
1481 	{0x00000089, 0x01000000},
1482 	{0x0000008b, 0x1c0a0000},
1483 	{0x0000008c, 0xff010000},
1484 	{0x0000008e, 0xffffefff},
1485 	{0x0000008f, 0xfff3efff},
1486 	{0x00000090, 0xfff3efbf},
1487 	{0x00000094, 0x00101101},
1488 	{0x00000095, 0x00000fff},
1489 	{0x00000096, 0x00116fff},
1490 	{0x00000097, 0x60010000},
1491 	{0x00000098, 0x10010000},
1492 	{0x00000099, 0x00006000},
1493 	{0x0000009a, 0x00001000},
1494 	{0x0000009f, 0x00a17730}
1495 };
1496 
1497 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1498 	{0x0000006f, 0x03044000},
1499 	{0x00000070, 0x0480c018},
1500 	{0x00000071, 0x00000040},
1501 	{0x00000072, 0x01000000},
1502 	{0x00000074, 0x000000ff},
1503 	{0x00000075, 0x00143400},
1504 	{0x00000076, 0x08ec0800},
1505 	{0x00000077, 0x040000cc},
1506 	{0x00000079, 0x00000000},
1507 	{0x0000007a, 0x21000409},
1508 	{0x0000007c, 0x00000000},
1509 	{0x0000007d, 0xe8000000},
1510 	{0x0000007e, 0x044408a8},
1511 	{0x0000007f, 0x00000003},
1512 	{0x00000080, 0x00000000},
1513 	{0x00000081, 0x01000000},
1514 	{0x00000082, 0x02000000},
1515 	{0x00000083, 0x00000000},
1516 	{0x00000084, 0xe3f3e4f4},
1517 	{0x00000085, 0x00052024},
1518 	{0x00000087, 0x00000000},
1519 	{0x00000088, 0x66036603},
1520 	{0x00000089, 0x01000000},
1521 	{0x0000008b, 0x1c0a0000},
1522 	{0x0000008c, 0xff010000},
1523 	{0x0000008e, 0xffffefff},
1524 	{0x0000008f, 0xfff3efff},
1525 	{0x00000090, 0xfff3efbf},
1526 	{0x00000094, 0x00101101},
1527 	{0x00000095, 0x00000fff},
1528 	{0x00000096, 0x00116fff},
1529 	{0x00000097, 0x60010000},
1530 	{0x00000098, 0x10010000},
1531 	{0x00000099, 0x00006000},
1532 	{0x0000009a, 0x00001000},
1533 	{0x0000009f, 0x00a07730}
1534 };
1535 
1536 /* ucode loading */
1537 int si_mc_load_microcode(struct radeon_device *rdev)
1538 {
1539 	const __be32 *fw_data = NULL;
1540 	const __le32 *new_fw_data = NULL;
1541 	u32 running;
1542 	u32 *io_mc_regs = NULL;
1543 	const __le32 *new_io_mc_regs = NULL;
1544 	int i, regs_size, ucode_size;
1545 
1546 	if (!rdev->mc_fw)
1547 		return -EINVAL;
1548 
1549 	if (rdev->new_fw) {
1550 		const struct mc_firmware_header_v1_0 *hdr =
1551 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1552 
1553 		radeon_ucode_print_mc_hdr(&hdr->header);
1554 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1555 		new_io_mc_regs = (const __le32 *)
1556 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1557 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1558 		new_fw_data = (const __le32 *)
1559 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1560 	} else {
1561 		ucode_size = rdev->mc_fw->datasize / 4;
1562 
1563 		switch (rdev->family) {
1564 		case CHIP_TAHITI:
1565 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1566 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1567 			break;
1568 		case CHIP_PITCAIRN:
1569 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1570 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1571 			break;
1572 		case CHIP_VERDE:
1573 		default:
1574 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1575 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1576 			break;
1577 		case CHIP_OLAND:
1578 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1579 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1580 			break;
1581 		case CHIP_HAINAN:
1582 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1583 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1584 			break;
1585 		}
1586 		fw_data = (const __be32 *)rdev->mc_fw->data;
1587 	}
1588 
1589 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1590 
1591 	if (running == 0) {
1592 		/* reset the engine and set to writable */
1593 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1594 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1595 
1596 		/* load mc io regs */
1597 		for (i = 0; i < regs_size; i++) {
1598 			if (rdev->new_fw) {
1599 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1600 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1601 			} else {
1602 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1603 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1604 			}
1605 		}
1606 		/* load the MC ucode */
1607 		for (i = 0; i < ucode_size; i++) {
1608 			if (rdev->new_fw)
1609 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1610 			else
1611 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1612 		}
1613 
1614 		/* put the engine back into the active state */
1615 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1616 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1617 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1618 
1619 		/* wait for training to complete */
1620 		for (i = 0; i < rdev->usec_timeout; i++) {
1621 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1622 				break;
1623 			udelay(1);
1624 		}
1625 		for (i = 0; i < rdev->usec_timeout; i++) {
1626 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1627 				break;
1628 			udelay(1);
1629 		}
1630 	}
1631 
1632 	return 0;
1633 }
1634 
1635 static int si_init_microcode(struct radeon_device *rdev)
1636 {
1637 	const char *chip_name;
1638 	const char *new_chip_name;
1639 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1640 	size_t smc_req_size, mc2_req_size;
1641 	char fw_name[30];
1642 	int err;
1643 	int new_fw = 0;
1644 	bool new_smc = false;
1645 	bool si58_fw = false;
1646 	bool banks2_fw = false;
1647 
1648 	DRM_DEBUG("\n");
1649 
1650 	switch (rdev->family) {
1651 	case CHIP_TAHITI:
1652 		chip_name = "TAHITI";
1653 		new_chip_name = "tahiti";
1654 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1655 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1656 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1657 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1658 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1659 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1660 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1661 		break;
1662 	case CHIP_PITCAIRN:
1663 		chip_name = "PITCAIRN";
1664 		if ((rdev->pdev->revision == 0x81) &&
1665 		    ((rdev->pdev->device == 0x6810) ||
1666 		     (rdev->pdev->device == 0x6811)))
1667 			new_smc = true;
1668 		new_chip_name = "pitcairn";
1669 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1670 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1671 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1672 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1673 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1674 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1675 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1676 		break;
1677 	case CHIP_VERDE:
1678 		chip_name = "VERDE";
1679 		if (((rdev->pdev->device == 0x6820) &&
1680 		     ((rdev->pdev->revision == 0x81) ||
1681 		      (rdev->pdev->revision == 0x83))) ||
1682 		    ((rdev->pdev->device == 0x6821) &&
1683 		     ((rdev->pdev->revision == 0x83) ||
1684 		      (rdev->pdev->revision == 0x87))) ||
1685 		    ((rdev->pdev->revision == 0x87) &&
1686 		     ((rdev->pdev->device == 0x6823) ||
1687 		      (rdev->pdev->device == 0x682b))))
1688 			new_smc = true;
1689 		new_chip_name = "verde";
1690 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1691 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1692 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1693 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1694 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1695 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1696 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1697 		break;
1698 	case CHIP_OLAND:
1699 		chip_name = "OLAND";
1700 		if (((rdev->pdev->revision == 0x81) &&
1701 		     ((rdev->pdev->device == 0x6600) ||
1702 		      (rdev->pdev->device == 0x6604) ||
1703 		      (rdev->pdev->device == 0x6605) ||
1704 		      (rdev->pdev->device == 0x6610))) ||
1705 		    ((rdev->pdev->revision == 0x83) &&
1706 		     (rdev->pdev->device == 0x6610)))
1707 			new_smc = true;
1708 		new_chip_name = "oland";
1709 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1710 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1711 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1712 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1713 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1714 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1715 		break;
1716 	case CHIP_HAINAN:
1717 		chip_name = "HAINAN";
1718 		if (((rdev->pdev->revision == 0x81) &&
1719 		     (rdev->pdev->device == 0x6660)) ||
1720 		    ((rdev->pdev->revision == 0x83) &&
1721 		     ((rdev->pdev->device == 0x6660) ||
1722 		      (rdev->pdev->device == 0x6663) ||
1723 		      (rdev->pdev->device == 0x6665) ||
1724 		      (rdev->pdev->device == 0x6667))))
1725 			new_smc = true;
1726 		else if ((rdev->pdev->revision == 0xc3) &&
1727 			 (rdev->pdev->device == 0x6665))
1728 			banks2_fw = true;
1729 		new_chip_name = "hainan";
1730 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1731 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1732 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1733 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1734 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1735 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1736 		break;
1737 	default: BUG();
1738 	}
1739 
1740 	/* this memory configuration requires special firmware */
1741 	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1742 		si58_fw = true;
1743 
1744 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1745 
1746 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1747 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1748 	if (err) {
1749 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1750 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1751 		if (err)
1752 			goto out;
1753 		if (rdev->pfp_fw->datasize != pfp_req_size) {
1754 			printk(KERN_ERR
1755 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1756 			       rdev->pfp_fw->datasize, fw_name);
1757 			err = -EINVAL;
1758 			goto out;
1759 		}
1760 	} else {
1761 		err = radeon_ucode_validate(rdev->pfp_fw);
1762 		if (err) {
1763 			printk(KERN_ERR
1764 			       "si_cp: validation failed for firmware \"%s\"\n",
1765 			       fw_name);
1766 			goto out;
1767 		} else {
1768 			new_fw++;
1769 		}
1770 	}
1771 
1772 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
1773 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1774 	if (err) {
1775 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1776 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1777 		if (err)
1778 			goto out;
1779 		if (rdev->me_fw->datasize != me_req_size) {
1780 			printk(KERN_ERR
1781 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1782 			       rdev->me_fw->datasize, fw_name);
1783 			err = -EINVAL;
1784 		}
1785 	} else {
1786 		err = radeon_ucode_validate(rdev->me_fw);
1787 		if (err) {
1788 			printk(KERN_ERR
1789 			       "si_cp: validation failed for firmware \"%s\"\n",
1790 			       fw_name);
1791 			goto out;
1792 		} else {
1793 			new_fw++;
1794 		}
1795 	}
1796 
1797 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
1798 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1799 	if (err) {
1800 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1801 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1802 		if (err)
1803 			goto out;
1804 		if (rdev->ce_fw->datasize != ce_req_size) {
1805 			printk(KERN_ERR
1806 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1807 			       rdev->ce_fw->datasize, fw_name);
1808 			err = -EINVAL;
1809 		}
1810 	} else {
1811 		err = radeon_ucode_validate(rdev->ce_fw);
1812 		if (err) {
1813 			printk(KERN_ERR
1814 			       "si_cp: validation failed for firmware \"%s\"\n",
1815 			       fw_name);
1816 			goto out;
1817 		} else {
1818 			new_fw++;
1819 		}
1820 	}
1821 
1822 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
1823 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1824 	if (err) {
1825 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
1826 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1827 		if (err)
1828 			goto out;
1829 		if (rdev->rlc_fw->datasize != rlc_req_size) {
1830 			printk(KERN_ERR
1831 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1832 			       rdev->rlc_fw->datasize, fw_name);
1833 			err = -EINVAL;
1834 		}
1835 	} else {
1836 		err = radeon_ucode_validate(rdev->rlc_fw);
1837 		if (err) {
1838 			printk(KERN_ERR
1839 			       "si_cp: validation failed for firmware \"%s\"\n",
1840 			       fw_name);
1841 			goto out;
1842 		} else {
1843 			new_fw++;
1844 		}
1845 	}
1846 
1847 	if (si58_fw)
1848 		snprintf(fw_name, sizeof(fw_name), "radeonkmsfw_si58_mc.bin");
1849 	else
1850 		snprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc.bin", new_chip_name);
1851 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1852 	if (err) {
1853 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
1854 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1855 		if (err) {
1856 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1857 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1858 			if (err)
1859 				goto out;
1860 		}
1861 		if ((rdev->mc_fw->datasize != mc_req_size) &&
1862 		    (rdev->mc_fw->datasize != mc2_req_size)) {
1863 			printk(KERN_ERR
1864 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1865 			       rdev->mc_fw->datasize, fw_name);
1866 			err = -EINVAL;
1867 		}
1868 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
1869 	} else {
1870 		err = radeon_ucode_validate(rdev->mc_fw);
1871 		if (err) {
1872 			printk(KERN_ERR
1873 			       "si_cp: validation failed for firmware \"%s\"\n",
1874 			       fw_name);
1875 			goto out;
1876 		} else {
1877 			new_fw++;
1878 		}
1879 	}
1880 
1881 	if (banks2_fw)
1882 		snprintf(fw_name, sizeof(fw_name), "radeonkmsfw_banks_k_2_smc");
1883 	else if (new_smc)
1884 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_k_smc", new_chip_name);
1885 	else
1886 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
1887 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1888 	if (err) {
1889 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1890 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1891 		if (err) {
1892 			printk(KERN_ERR
1893 			       "smc: error loading firmware \"%s\"\n",
1894 			       fw_name);
1895 			release_firmware(rdev->smc_fw);
1896 			rdev->smc_fw = NULL;
1897 			err = 0;
1898 		} else if (rdev->smc_fw->datasize != smc_req_size) {
1899 			printk(KERN_ERR
1900 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1901 			       rdev->smc_fw->datasize, fw_name);
1902 			err = -EINVAL;
1903 		}
1904 	} else {
1905 		err = radeon_ucode_validate(rdev->smc_fw);
1906 		if (err) {
1907 			printk(KERN_ERR
1908 			       "si_cp: validation failed for firmware \"%s\"\n",
1909 			       fw_name);
1910 			goto out;
1911 		} else {
1912 			new_fw++;
1913 		}
1914 	}
1915 
1916 	if (new_fw == 0) {
1917 		rdev->new_fw = false;
1918 	} else if (new_fw < 6) {
1919 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1920 		err = -EINVAL;
1921 	} else {
1922 		rdev->new_fw = true;
1923 	}
1924 out:
1925 	if (err) {
1926 		if (err != -EINVAL)
1927 			printk(KERN_ERR
1928 			       "si_cp: Failed to load firmware \"%s\"\n",
1929 			       fw_name);
1930 		release_firmware(rdev->pfp_fw);
1931 		rdev->pfp_fw = NULL;
1932 		release_firmware(rdev->me_fw);
1933 		rdev->me_fw = NULL;
1934 		release_firmware(rdev->ce_fw);
1935 		rdev->ce_fw = NULL;
1936 		release_firmware(rdev->rlc_fw);
1937 		rdev->rlc_fw = NULL;
1938 		release_firmware(rdev->mc_fw);
1939 		rdev->mc_fw = NULL;
1940 		release_firmware(rdev->smc_fw);
1941 		rdev->smc_fw = NULL;
1942 	}
1943 	return err;
1944 }
1945 
1946 /**
1947  * si_fini_microcode - drop the firmwares image references
1948  *
1949  * @rdev: radeon_device pointer
1950  *
1951  * Drop the pfp, me, rlc, mc and ce firmware image references.
1952  * Called at driver shutdown.
1953  */
1954 static void si_fini_microcode(struct radeon_device *rdev)
1955 {
1956 	release_firmware(rdev->pfp_fw);
1957 	rdev->pfp_fw = NULL;
1958 	release_firmware(rdev->me_fw);
1959 	rdev->me_fw = NULL;
1960 	release_firmware(rdev->rlc_fw);
1961 	rdev->rlc_fw = NULL;
1962 	release_firmware(rdev->mc_fw);
1963 	rdev->mc_fw = NULL;
1964 	release_firmware(rdev->smc_fw);
1965 	rdev->smc_fw = NULL;
1966 	release_firmware(rdev->ce_fw);
1967 	rdev->ce_fw = NULL;
1968 }
1969 
1970 /* watermark setup */
1971 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1972 				   struct radeon_crtc *radeon_crtc,
1973 				   struct drm_display_mode *mode,
1974 				   struct drm_display_mode *other_mode)
1975 {
1976 	u32 tmp, buffer_alloc, i;
1977 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1978 	/*
1979 	 * Line Buffer Setup
1980 	 * There are 3 line buffers, each one shared by 2 display controllers.
1981 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1982 	 * the display controllers.  The paritioning is done via one of four
1983 	 * preset allocations specified in bits 21:20:
1984 	 *  0 - half lb
1985 	 *  2 - whole lb, other crtc must be disabled
1986 	 */
1987 	/* this can get tricky if we have two large displays on a paired group
1988 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1989 	 * non-linked crtcs for maximum line buffer allocation.
1990 	 */
1991 	if (radeon_crtc->base.enabled && mode) {
1992 		if (other_mode) {
1993 			tmp = 0; /* 1/2 */
1994 			buffer_alloc = 1;
1995 		} else {
1996 			tmp = 2; /* whole */
1997 			buffer_alloc = 2;
1998 		}
1999 	} else {
2000 		tmp = 0;
2001 		buffer_alloc = 0;
2002 	}
2003 
2004 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
2005 	       DC_LB_MEMORY_CONFIG(tmp));
2006 
2007 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
2008 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
2009 	for (i = 0; i < rdev->usec_timeout; i++) {
2010 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
2011 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
2012 			break;
2013 		udelay(1);
2014 	}
2015 
2016 	if (radeon_crtc->base.enabled && mode) {
2017 		switch (tmp) {
2018 		case 0:
2019 		default:
2020 			return 4096 * 2;
2021 		case 2:
2022 			return 8192 * 2;
2023 		}
2024 	}
2025 
2026 	/* controller not enabled, so no lb used */
2027 	return 0;
2028 }
2029 
2030 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2031 {
2032 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2033 
2034 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2035 	case 0:
2036 	default:
2037 		return 1;
2038 	case 1:
2039 		return 2;
2040 	case 2:
2041 		return 4;
2042 	case 3:
2043 		return 8;
2044 	case 4:
2045 		return 3;
2046 	case 5:
2047 		return 6;
2048 	case 6:
2049 		return 10;
2050 	case 7:
2051 		return 12;
2052 	case 8:
2053 		return 16;
2054 	}
2055 }
2056 
2057 struct dce6_wm_params {
2058 	u32 dram_channels; /* number of dram channels */
2059 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2060 	u32 sclk;          /* engine clock in kHz */
2061 	u32 disp_clk;      /* display clock in kHz */
2062 	u32 src_width;     /* viewport width */
2063 	u32 active_time;   /* active display time in ns */
2064 	u32 blank_time;    /* blank time in ns */
2065 	bool interlaced;    /* mode is interlaced */
2066 	fixed20_12 vsc;    /* vertical scale ratio */
2067 	u32 num_heads;     /* number of active crtcs */
2068 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2069 	u32 lb_size;       /* line buffer allocated to pipe */
2070 	u32 vtaps;         /* vertical scaler taps */
2071 };
2072 
2073 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2074 {
2075 	/* Calculate raw DRAM Bandwidth */
2076 	fixed20_12 dram_efficiency; /* 0.7 */
2077 	fixed20_12 yclk, dram_channels, bandwidth;
2078 	fixed20_12 a;
2079 
2080 	a.full = dfixed_const(1000);
2081 	yclk.full = dfixed_const(wm->yclk);
2082 	yclk.full = dfixed_div(yclk, a);
2083 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2084 	a.full = dfixed_const(10);
2085 	dram_efficiency.full = dfixed_const(7);
2086 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2087 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2088 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2089 
2090 	return dfixed_trunc(bandwidth);
2091 }
2092 
2093 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2094 {
2095 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2096 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2097 	fixed20_12 yclk, dram_channels, bandwidth;
2098 	fixed20_12 a;
2099 
2100 	a.full = dfixed_const(1000);
2101 	yclk.full = dfixed_const(wm->yclk);
2102 	yclk.full = dfixed_div(yclk, a);
2103 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2104 	a.full = dfixed_const(10);
2105 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2106 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2107 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2108 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2109 
2110 	return dfixed_trunc(bandwidth);
2111 }
2112 
2113 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2114 {
2115 	/* Calculate the display Data return Bandwidth */
2116 	fixed20_12 return_efficiency; /* 0.8 */
2117 	fixed20_12 sclk, bandwidth;
2118 	fixed20_12 a;
2119 
2120 	a.full = dfixed_const(1000);
2121 	sclk.full = dfixed_const(wm->sclk);
2122 	sclk.full = dfixed_div(sclk, a);
2123 	a.full = dfixed_const(10);
2124 	return_efficiency.full = dfixed_const(8);
2125 	return_efficiency.full = dfixed_div(return_efficiency, a);
2126 	a.full = dfixed_const(32);
2127 	bandwidth.full = dfixed_mul(a, sclk);
2128 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2129 
2130 	return dfixed_trunc(bandwidth);
2131 }
2132 
2133 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2134 {
2135 	return 32;
2136 }
2137 
2138 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2139 {
2140 	/* Calculate the DMIF Request Bandwidth */
2141 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2142 	fixed20_12 disp_clk, sclk, bandwidth;
2143 	fixed20_12 a, b1, b2;
2144 	u32 min_bandwidth;
2145 
2146 	a.full = dfixed_const(1000);
2147 	disp_clk.full = dfixed_const(wm->disp_clk);
2148 	disp_clk.full = dfixed_div(disp_clk, a);
2149 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2150 	b1.full = dfixed_mul(a, disp_clk);
2151 
2152 	a.full = dfixed_const(1000);
2153 	sclk.full = dfixed_const(wm->sclk);
2154 	sclk.full = dfixed_div(sclk, a);
2155 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2156 	b2.full = dfixed_mul(a, sclk);
2157 
2158 	a.full = dfixed_const(10);
2159 	disp_clk_request_efficiency.full = dfixed_const(8);
2160 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2161 
2162 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2163 
2164 	a.full = dfixed_const(min_bandwidth);
2165 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2166 
2167 	return dfixed_trunc(bandwidth);
2168 }
2169 
2170 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2171 {
2172 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2173 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2174 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2175 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2176 
2177 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2178 }
2179 
2180 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2181 {
2182 	/* Calculate the display mode Average Bandwidth
2183 	 * DisplayMode should contain the source and destination dimensions,
2184 	 * timing, etc.
2185 	 */
2186 	fixed20_12 bpp;
2187 	fixed20_12 line_time;
2188 	fixed20_12 src_width;
2189 	fixed20_12 bandwidth;
2190 	fixed20_12 a;
2191 
2192 	a.full = dfixed_const(1000);
2193 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2194 	line_time.full = dfixed_div(line_time, a);
2195 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2196 	src_width.full = dfixed_const(wm->src_width);
2197 	bandwidth.full = dfixed_mul(src_width, bpp);
2198 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2199 	bandwidth.full = dfixed_div(bandwidth, line_time);
2200 
2201 	return dfixed_trunc(bandwidth);
2202 }
2203 
2204 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2205 {
2206 	/* First calcualte the latency in ns */
2207 	u32 mc_latency = 2000; /* 2000 ns. */
2208 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2209 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2210 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2211 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2212 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2213 		(wm->num_heads * cursor_line_pair_return_time);
2214 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2215 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2216 	u32 tmp, dmif_size = 12288;
2217 	fixed20_12 a, b, c;
2218 
2219 	if (wm->num_heads == 0)
2220 		return 0;
2221 
2222 	a.full = dfixed_const(2);
2223 	b.full = dfixed_const(1);
2224 	if ((wm->vsc.full > a.full) ||
2225 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2226 	    (wm->vtaps >= 5) ||
2227 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2228 		max_src_lines_per_dst_line = 4;
2229 	else
2230 		max_src_lines_per_dst_line = 2;
2231 
2232 	a.full = dfixed_const(available_bandwidth);
2233 	b.full = dfixed_const(wm->num_heads);
2234 	a.full = dfixed_div(a, b);
2235 
2236 	b.full = dfixed_const(mc_latency + 512);
2237 	c.full = dfixed_const(wm->disp_clk);
2238 	b.full = dfixed_div(b, c);
2239 
2240 	c.full = dfixed_const(dmif_size);
2241 	b.full = dfixed_div(c, b);
2242 
2243 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2244 
2245 	b.full = dfixed_const(1000);
2246 	c.full = dfixed_const(wm->disp_clk);
2247 	b.full = dfixed_div(c, b);
2248 	c.full = dfixed_const(wm->bytes_per_pixel);
2249 	b.full = dfixed_mul(b, c);
2250 
2251 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2252 
2253 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2254 	b.full = dfixed_const(1000);
2255 	c.full = dfixed_const(lb_fill_bw);
2256 	b.full = dfixed_div(c, b);
2257 	a.full = dfixed_div(a, b);
2258 	line_fill_time = dfixed_trunc(a);
2259 
2260 	if (line_fill_time < wm->active_time)
2261 		return latency;
2262 	else
2263 		return latency + (line_fill_time - wm->active_time);
2264 
2265 }
2266 
2267 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2268 {
2269 	if (dce6_average_bandwidth(wm) <=
2270 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2271 		return true;
2272 	else
2273 		return false;
2274 };
2275 
2276 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2277 {
2278 	if (dce6_average_bandwidth(wm) <=
2279 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2280 		return true;
2281 	else
2282 		return false;
2283 };
2284 
2285 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2286 {
2287 	u32 lb_partitions = wm->lb_size / wm->src_width;
2288 	u32 line_time = wm->active_time + wm->blank_time;
2289 	u32 latency_tolerant_lines;
2290 	u32 latency_hiding;
2291 	fixed20_12 a;
2292 
2293 	a.full = dfixed_const(1);
2294 	if (wm->vsc.full > a.full)
2295 		latency_tolerant_lines = 1;
2296 	else {
2297 		if (lb_partitions <= (wm->vtaps + 1))
2298 			latency_tolerant_lines = 1;
2299 		else
2300 			latency_tolerant_lines = 2;
2301 	}
2302 
2303 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2304 
2305 	if (dce6_latency_watermark(wm) <= latency_hiding)
2306 		return true;
2307 	else
2308 		return false;
2309 }
2310 
2311 static void dce6_program_watermarks(struct radeon_device *rdev,
2312 					 struct radeon_crtc *radeon_crtc,
2313 					 u32 lb_size, u32 num_heads)
2314 {
2315 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2316 	struct dce6_wm_params wm_low, wm_high;
2317 	u32 dram_channels;
2318 	u32 pixel_period;
2319 	u32 line_time = 0;
2320 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2321 	u32 priority_a_mark = 0, priority_b_mark = 0;
2322 	u32 priority_a_cnt = PRIORITY_OFF;
2323 	u32 priority_b_cnt = PRIORITY_OFF;
2324 	u32 tmp, arb_control3;
2325 	fixed20_12 a, b, c;
2326 
2327 	if (radeon_crtc->base.enabled && num_heads && mode) {
2328 		pixel_period = 1000000 / (u32)mode->clock;
2329 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2330 		priority_a_cnt = 0;
2331 		priority_b_cnt = 0;
2332 
2333 		if (rdev->family == CHIP_ARUBA)
2334 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2335 		else
2336 			dram_channels = si_get_number_of_dram_channels(rdev);
2337 
2338 		/* watermark for high clocks */
2339 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2340 			wm_high.yclk =
2341 				radeon_dpm_get_mclk(rdev, false) * 10;
2342 			wm_high.sclk =
2343 				radeon_dpm_get_sclk(rdev, false) * 10;
2344 		} else {
2345 			wm_high.yclk = rdev->pm.current_mclk * 10;
2346 			wm_high.sclk = rdev->pm.current_sclk * 10;
2347 		}
2348 
2349 		wm_high.disp_clk = mode->clock;
2350 		wm_high.src_width = mode->crtc_hdisplay;
2351 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2352 		wm_high.blank_time = line_time - wm_high.active_time;
2353 		wm_high.interlaced = false;
2354 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2355 			wm_high.interlaced = true;
2356 		wm_high.vsc = radeon_crtc->vsc;
2357 		wm_high.vtaps = 1;
2358 		if (radeon_crtc->rmx_type != RMX_OFF)
2359 			wm_high.vtaps = 2;
2360 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2361 		wm_high.lb_size = lb_size;
2362 		wm_high.dram_channels = dram_channels;
2363 		wm_high.num_heads = num_heads;
2364 
2365 		/* watermark for low clocks */
2366 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2367 			wm_low.yclk =
2368 				radeon_dpm_get_mclk(rdev, true) * 10;
2369 			wm_low.sclk =
2370 				radeon_dpm_get_sclk(rdev, true) * 10;
2371 		} else {
2372 			wm_low.yclk = rdev->pm.current_mclk * 10;
2373 			wm_low.sclk = rdev->pm.current_sclk * 10;
2374 		}
2375 
2376 		wm_low.disp_clk = mode->clock;
2377 		wm_low.src_width = mode->crtc_hdisplay;
2378 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2379 		wm_low.blank_time = line_time - wm_low.active_time;
2380 		wm_low.interlaced = false;
2381 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2382 			wm_low.interlaced = true;
2383 		wm_low.vsc = radeon_crtc->vsc;
2384 		wm_low.vtaps = 1;
2385 		if (radeon_crtc->rmx_type != RMX_OFF)
2386 			wm_low.vtaps = 2;
2387 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2388 		wm_low.lb_size = lb_size;
2389 		wm_low.dram_channels = dram_channels;
2390 		wm_low.num_heads = num_heads;
2391 
2392 		/* set for high clocks */
2393 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2394 		/* set for low clocks */
2395 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2396 
2397 		/* possibly force display priority to high */
2398 		/* should really do this at mode validation time... */
2399 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2400 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2401 		    !dce6_check_latency_hiding(&wm_high) ||
2402 		    (rdev->disp_priority == 2)) {
2403 			DRM_DEBUG_KMS("force priority to high\n");
2404 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2405 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2406 		}
2407 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2408 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2409 		    !dce6_check_latency_hiding(&wm_low) ||
2410 		    (rdev->disp_priority == 2)) {
2411 			DRM_DEBUG_KMS("force priority to high\n");
2412 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2413 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2414 		}
2415 
2416 		a.full = dfixed_const(1000);
2417 		b.full = dfixed_const(mode->clock);
2418 		b.full = dfixed_div(b, a);
2419 		c.full = dfixed_const(latency_watermark_a);
2420 		c.full = dfixed_mul(c, b);
2421 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2422 		c.full = dfixed_div(c, a);
2423 		a.full = dfixed_const(16);
2424 		c.full = dfixed_div(c, a);
2425 		priority_a_mark = dfixed_trunc(c);
2426 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2427 
2428 		a.full = dfixed_const(1000);
2429 		b.full = dfixed_const(mode->clock);
2430 		b.full = dfixed_div(b, a);
2431 		c.full = dfixed_const(latency_watermark_b);
2432 		c.full = dfixed_mul(c, b);
2433 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2434 		c.full = dfixed_div(c, a);
2435 		a.full = dfixed_const(16);
2436 		c.full = dfixed_div(c, a);
2437 		priority_b_mark = dfixed_trunc(c);
2438 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2439 
2440 		/* Save number of lines the linebuffer leads before the scanout */
2441 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2442 	}
2443 
2444 	/* select wm A */
2445 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2446 	tmp = arb_control3;
2447 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2448 	tmp |= LATENCY_WATERMARK_MASK(1);
2449 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2450 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2451 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2452 		LATENCY_HIGH_WATERMARK(line_time)));
2453 	/* select wm B */
2454 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2455 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2456 	tmp |= LATENCY_WATERMARK_MASK(2);
2457 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2458 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2459 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2460 		LATENCY_HIGH_WATERMARK(line_time)));
2461 	/* restore original selection */
2462 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2463 
2464 	/* write the priority marks */
2465 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2466 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2467 
2468 	/* save values for DPM */
2469 	radeon_crtc->line_time = line_time;
2470 	radeon_crtc->wm_high = latency_watermark_a;
2471 	radeon_crtc->wm_low = latency_watermark_b;
2472 }
2473 
2474 void dce6_bandwidth_update(struct radeon_device *rdev)
2475 {
2476 	struct drm_display_mode *mode0 = NULL;
2477 	struct drm_display_mode *mode1 = NULL;
2478 	u32 num_heads = 0, lb_size;
2479 	int i;
2480 
2481 	if (!rdev->mode_info.mode_config_initialized)
2482 		return;
2483 
2484 	radeon_update_display_priority(rdev);
2485 
2486 	for (i = 0; i < rdev->num_crtc; i++) {
2487 		if (rdev->mode_info.crtcs[i]->base.enabled)
2488 			num_heads++;
2489 	}
2490 	for (i = 0; i < rdev->num_crtc; i += 2) {
2491 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2492 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2493 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2494 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2495 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2496 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2497 	}
2498 }
2499 
2500 /*
2501  * Core functions
2502  */
2503 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2504 {
2505 	u32 *tile = rdev->config.si.tile_mode_array;
2506 	const u32 num_tile_mode_states =
2507 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2508 	u32 reg_offset, split_equal_to_row_size;
2509 
2510 	switch (rdev->config.si.mem_row_size_in_kb) {
2511 	case 1:
2512 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2513 		break;
2514 	case 2:
2515 	default:
2516 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2517 		break;
2518 	case 4:
2519 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2520 		break;
2521 	}
2522 
2523 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2524 		tile[reg_offset] = 0;
2525 
2526 	switch(rdev->family) {
2527 	case CHIP_TAHITI:
2528 	case CHIP_PITCAIRN:
2529 		/* non-AA compressed depth or any compressed stencil */
2530 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2531 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2532 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2533 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2534 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2535 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2537 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2538 		/* 2xAA/4xAA compressed depth only */
2539 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2541 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2542 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2543 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2544 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2546 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2547 		/* 8xAA compressed depth only */
2548 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2550 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2551 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2552 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2553 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2555 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2556 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2557 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2558 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2559 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2560 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2561 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2562 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2564 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2565 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2566 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2567 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2568 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2569 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2570 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2571 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2573 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2574 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2575 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2577 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2578 			   TILE_SPLIT(split_equal_to_row_size) |
2579 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2580 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2582 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2583 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2584 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2586 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2587 			   TILE_SPLIT(split_equal_to_row_size) |
2588 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2589 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2591 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2592 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2593 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2595 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2596 			   TILE_SPLIT(split_equal_to_row_size) |
2597 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2598 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2600 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2601 		/* 1D and 1D Array Surfaces */
2602 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2603 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2605 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2606 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2607 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2609 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2610 		/* Displayable maps. */
2611 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2612 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2613 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2614 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2615 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2616 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2618 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2619 		/* Display 8bpp. */
2620 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2622 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2623 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2624 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2625 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2627 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2628 		/* Display 16bpp. */
2629 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2630 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2631 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2632 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2633 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2634 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2636 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2637 		/* Display 32bpp. */
2638 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2639 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2640 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2641 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2642 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2643 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2646 		/* Thin. */
2647 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2648 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2649 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2650 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2651 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2652 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2654 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2655 		/* Thin 8 bpp. */
2656 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2658 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2659 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2660 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2661 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2663 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2664 		/* Thin 16 bpp. */
2665 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2667 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2668 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2669 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2670 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2672 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2673 		/* Thin 32 bpp. */
2674 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2676 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2677 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2678 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2679 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2682 		/* Thin 64 bpp. */
2683 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2685 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2686 			   TILE_SPLIT(split_equal_to_row_size) |
2687 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2688 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2690 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2691 		/* 8 bpp PRT. */
2692 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2694 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2695 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2696 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2697 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2698 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2699 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2700 		/* 16 bpp PRT */
2701 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2703 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2704 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2705 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2706 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2708 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2709 		/* 32 bpp PRT */
2710 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2712 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2713 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2714 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2715 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2717 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2718 		/* 64 bpp PRT */
2719 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2720 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2721 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2722 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2723 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2724 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2725 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2726 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2727 		/* 128 bpp PRT */
2728 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2730 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2731 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2732 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2733 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2735 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2736 
2737 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2738 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2739 		break;
2740 
2741 	case CHIP_VERDE:
2742 	case CHIP_OLAND:
2743 	case CHIP_HAINAN:
2744 		/* non-AA compressed depth or any compressed stencil */
2745 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2747 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2749 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2750 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2753 		/* 2xAA/4xAA compressed depth only */
2754 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2755 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2756 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2757 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2758 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2759 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2761 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2762 		/* 8xAA compressed depth only */
2763 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2764 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2765 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2766 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2767 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2768 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2770 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2771 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2772 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2773 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2774 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2775 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2776 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2777 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2779 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2780 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2781 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2783 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2785 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2786 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2788 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2789 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2790 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2792 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793 			   TILE_SPLIT(split_equal_to_row_size) |
2794 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2795 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2797 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2798 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2799 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2801 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802 			   TILE_SPLIT(split_equal_to_row_size) |
2803 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2804 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2807 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2808 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2810 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2811 			   TILE_SPLIT(split_equal_to_row_size) |
2812 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2813 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2816 		/* 1D and 1D Array Surfaces */
2817 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2818 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2819 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2820 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2821 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2822 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2824 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2825 		/* Displayable maps. */
2826 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2827 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2828 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2829 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2830 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2831 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2833 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2834 		/* Display 8bpp. */
2835 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2837 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2838 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2839 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2840 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2842 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2843 		/* Display 16bpp. */
2844 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2845 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2846 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2847 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2848 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2849 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2851 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2852 		/* Display 32bpp. */
2853 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2854 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2855 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2856 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2857 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2858 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2860 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2861 		/* Thin. */
2862 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2863 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2864 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2865 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2866 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2867 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2869 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2870 		/* Thin 8 bpp. */
2871 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2873 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2874 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2875 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2876 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2878 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2879 		/* Thin 16 bpp. */
2880 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2882 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2883 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2884 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2885 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2887 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2888 		/* Thin 32 bpp. */
2889 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2891 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2892 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2893 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2894 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2895 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2896 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2897 		/* Thin 64 bpp. */
2898 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2900 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2901 			   TILE_SPLIT(split_equal_to_row_size) |
2902 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2903 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2904 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2905 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2906 		/* 8 bpp PRT. */
2907 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2909 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2910 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2911 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2912 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2913 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2914 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2915 		/* 16 bpp PRT */
2916 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2918 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2919 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2920 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2921 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2923 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2924 		/* 32 bpp PRT */
2925 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2927 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2928 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2929 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2930 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2932 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2933 		/* 64 bpp PRT */
2934 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2935 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2936 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2937 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2938 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2939 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2940 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2941 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2942 		/* 128 bpp PRT */
2943 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2944 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2945 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2946 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2947 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2948 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2950 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2951 
2952 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2953 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2954 		break;
2955 
2956 	default:
2957 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2958 	}
2959 }
2960 
2961 static void si_select_se_sh(struct radeon_device *rdev,
2962 			    u32 se_num, u32 sh_num)
2963 {
2964 	u32 data = INSTANCE_BROADCAST_WRITES;
2965 
2966 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2967 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2968 	else if (se_num == 0xffffffff)
2969 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2970 	else if (sh_num == 0xffffffff)
2971 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2972 	else
2973 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2974 	WREG32(GRBM_GFX_INDEX, data);
2975 }
2976 
2977 static u32 si_create_bitmask(u32 bit_width)
2978 {
2979 	u32 i, mask = 0;
2980 
2981 	for (i = 0; i < bit_width; i++) {
2982 		mask <<= 1;
2983 		mask |= 1;
2984 	}
2985 	return mask;
2986 }
2987 
2988 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2989 {
2990 	u32 data, mask;
2991 
2992 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2993 	if (data & 1)
2994 		data &= INACTIVE_CUS_MASK;
2995 	else
2996 		data = 0;
2997 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2998 
2999 	data >>= INACTIVE_CUS_SHIFT;
3000 
3001 	mask = si_create_bitmask(cu_per_sh);
3002 
3003 	return ~data & mask;
3004 }
3005 
3006 static void si_setup_spi(struct radeon_device *rdev,
3007 			 u32 se_num, u32 sh_per_se,
3008 			 u32 cu_per_sh)
3009 {
3010 	int i, j, k;
3011 	u32 data, mask, active_cu;
3012 
3013 	for (i = 0; i < se_num; i++) {
3014 		for (j = 0; j < sh_per_se; j++) {
3015 			si_select_se_sh(rdev, i, j);
3016 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3017 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3018 
3019 			mask = 1;
3020 			for (k = 0; k < 16; k++) {
3021 				mask <<= k;
3022 				if (active_cu & mask) {
3023 					data &= ~mask;
3024 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3025 					break;
3026 				}
3027 			}
3028 		}
3029 	}
3030 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3031 }
3032 
3033 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3034 			      u32 max_rb_num_per_se,
3035 			      u32 sh_per_se)
3036 {
3037 	u32 data, mask;
3038 
3039 	data = RREG32(CC_RB_BACKEND_DISABLE);
3040 	if (data & 1)
3041 		data &= BACKEND_DISABLE_MASK;
3042 	else
3043 		data = 0;
3044 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3045 
3046 	data >>= BACKEND_DISABLE_SHIFT;
3047 
3048 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3049 
3050 	return data & mask;
3051 }
3052 
3053 static void si_setup_rb(struct radeon_device *rdev,
3054 			u32 se_num, u32 sh_per_se,
3055 			u32 max_rb_num_per_se)
3056 {
3057 	int i, j;
3058 	u32 data, mask;
3059 	u32 disabled_rbs = 0;
3060 	u32 enabled_rbs = 0;
3061 
3062 	for (i = 0; i < se_num; i++) {
3063 		for (j = 0; j < sh_per_se; j++) {
3064 			si_select_se_sh(rdev, i, j);
3065 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3066 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3067 		}
3068 	}
3069 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3070 
3071 	mask = 1;
3072 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3073 		if (!(disabled_rbs & mask))
3074 			enabled_rbs |= mask;
3075 		mask <<= 1;
3076 	}
3077 
3078 	rdev->config.si.backend_enable_mask = enabled_rbs;
3079 
3080 	for (i = 0; i < se_num; i++) {
3081 		si_select_se_sh(rdev, i, 0xffffffff);
3082 		data = 0;
3083 		for (j = 0; j < sh_per_se; j++) {
3084 			switch (enabled_rbs & 3) {
3085 			case 1:
3086 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3087 				break;
3088 			case 2:
3089 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3090 				break;
3091 			case 3:
3092 			default:
3093 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3094 				break;
3095 			}
3096 			enabled_rbs >>= 2;
3097 		}
3098 		WREG32(PA_SC_RASTER_CONFIG, data);
3099 	}
3100 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3101 }
3102 
3103 static void si_gpu_init(struct radeon_device *rdev)
3104 {
3105 	u32 gb_addr_config = 0;
3106 	u32 mc_shared_chmap, mc_arb_ramcfg;
3107 	u32 sx_debug_1;
3108 	u32 hdp_host_path_cntl;
3109 	u32 tmp;
3110 	int i, j;
3111 
3112 	switch (rdev->family) {
3113 	case CHIP_TAHITI:
3114 		rdev->config.si.max_shader_engines = 2;
3115 		rdev->config.si.max_tile_pipes = 12;
3116 		rdev->config.si.max_cu_per_sh = 8;
3117 		rdev->config.si.max_sh_per_se = 2;
3118 		rdev->config.si.max_backends_per_se = 4;
3119 		rdev->config.si.max_texture_channel_caches = 12;
3120 		rdev->config.si.max_gprs = 256;
3121 		rdev->config.si.max_gs_threads = 32;
3122 		rdev->config.si.max_hw_contexts = 8;
3123 
3124 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3125 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3126 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3127 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3128 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3129 		break;
3130 	case CHIP_PITCAIRN:
3131 		rdev->config.si.max_shader_engines = 2;
3132 		rdev->config.si.max_tile_pipes = 8;
3133 		rdev->config.si.max_cu_per_sh = 5;
3134 		rdev->config.si.max_sh_per_se = 2;
3135 		rdev->config.si.max_backends_per_se = 4;
3136 		rdev->config.si.max_texture_channel_caches = 8;
3137 		rdev->config.si.max_gprs = 256;
3138 		rdev->config.si.max_gs_threads = 32;
3139 		rdev->config.si.max_hw_contexts = 8;
3140 
3141 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3142 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3143 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3144 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3145 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3146 		break;
3147 	case CHIP_VERDE:
3148 	default:
3149 		rdev->config.si.max_shader_engines = 1;
3150 		rdev->config.si.max_tile_pipes = 4;
3151 		rdev->config.si.max_cu_per_sh = 5;
3152 		rdev->config.si.max_sh_per_se = 2;
3153 		rdev->config.si.max_backends_per_se = 4;
3154 		rdev->config.si.max_texture_channel_caches = 4;
3155 		rdev->config.si.max_gprs = 256;
3156 		rdev->config.si.max_gs_threads = 32;
3157 		rdev->config.si.max_hw_contexts = 8;
3158 
3159 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3160 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3161 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3162 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3163 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3164 		break;
3165 	case CHIP_OLAND:
3166 		rdev->config.si.max_shader_engines = 1;
3167 		rdev->config.si.max_tile_pipes = 4;
3168 		rdev->config.si.max_cu_per_sh = 6;
3169 		rdev->config.si.max_sh_per_se = 1;
3170 		rdev->config.si.max_backends_per_se = 2;
3171 		rdev->config.si.max_texture_channel_caches = 4;
3172 		rdev->config.si.max_gprs = 256;
3173 		rdev->config.si.max_gs_threads = 16;
3174 		rdev->config.si.max_hw_contexts = 8;
3175 
3176 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3177 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3178 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3179 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3180 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3181 		break;
3182 	case CHIP_HAINAN:
3183 		rdev->config.si.max_shader_engines = 1;
3184 		rdev->config.si.max_tile_pipes = 4;
3185 		rdev->config.si.max_cu_per_sh = 5;
3186 		rdev->config.si.max_sh_per_se = 1;
3187 		rdev->config.si.max_backends_per_se = 1;
3188 		rdev->config.si.max_texture_channel_caches = 2;
3189 		rdev->config.si.max_gprs = 256;
3190 		rdev->config.si.max_gs_threads = 16;
3191 		rdev->config.si.max_hw_contexts = 8;
3192 
3193 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3194 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3195 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3196 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3197 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3198 		break;
3199 	}
3200 
3201 	/* Initialize HDP */
3202 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3203 		WREG32((0x2c14 + j), 0x00000000);
3204 		WREG32((0x2c18 + j), 0x00000000);
3205 		WREG32((0x2c1c + j), 0x00000000);
3206 		WREG32((0x2c20 + j), 0x00000000);
3207 		WREG32((0x2c24 + j), 0x00000000);
3208 	}
3209 
3210 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3211 	WREG32(SRBM_INT_CNTL, 1);
3212 	WREG32(SRBM_INT_ACK, 1);
3213 
3214 	evergreen_fix_pci_max_read_req_size(rdev);
3215 
3216 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3217 
3218 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3219 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3220 
3221 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3222 	rdev->config.si.mem_max_burst_length_bytes = 256;
3223 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3224 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3225 	if (rdev->config.si.mem_row_size_in_kb > 4)
3226 		rdev->config.si.mem_row_size_in_kb = 4;
3227 	/* XXX use MC settings? */
3228 	rdev->config.si.shader_engine_tile_size = 32;
3229 	rdev->config.si.num_gpus = 1;
3230 	rdev->config.si.multi_gpu_tile_size = 64;
3231 
3232 	/* fix up row size */
3233 	gb_addr_config &= ~ROW_SIZE_MASK;
3234 	switch (rdev->config.si.mem_row_size_in_kb) {
3235 	case 1:
3236 	default:
3237 		gb_addr_config |= ROW_SIZE(0);
3238 		break;
3239 	case 2:
3240 		gb_addr_config |= ROW_SIZE(1);
3241 		break;
3242 	case 4:
3243 		gb_addr_config |= ROW_SIZE(2);
3244 		break;
3245 	}
3246 
3247 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3248 	 * not have bank info, so create a custom tiling dword.
3249 	 * bits 3:0   num_pipes
3250 	 * bits 7:4   num_banks
3251 	 * bits 11:8  group_size
3252 	 * bits 15:12 row_size
3253 	 */
3254 	rdev->config.si.tile_config = 0;
3255 	switch (rdev->config.si.num_tile_pipes) {
3256 	case 1:
3257 		rdev->config.si.tile_config |= (0 << 0);
3258 		break;
3259 	case 2:
3260 		rdev->config.si.tile_config |= (1 << 0);
3261 		break;
3262 	case 4:
3263 		rdev->config.si.tile_config |= (2 << 0);
3264 		break;
3265 	case 8:
3266 	default:
3267 		/* XXX what about 12? */
3268 		rdev->config.si.tile_config |= (3 << 0);
3269 		break;
3270 	}
3271 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3272 	case 0: /* four banks */
3273 		rdev->config.si.tile_config |= 0 << 4;
3274 		break;
3275 	case 1: /* eight banks */
3276 		rdev->config.si.tile_config |= 1 << 4;
3277 		break;
3278 	case 2: /* sixteen banks */
3279 	default:
3280 		rdev->config.si.tile_config |= 2 << 4;
3281 		break;
3282 	}
3283 	rdev->config.si.tile_config |=
3284 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3285 	rdev->config.si.tile_config |=
3286 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3287 
3288 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3289 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3290 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3291 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3292 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3293 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3294 	if (rdev->has_uvd) {
3295 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3296 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3297 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3298 	}
3299 
3300 	si_tiling_mode_table_init(rdev);
3301 
3302 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3303 		    rdev->config.si.max_sh_per_se,
3304 		    rdev->config.si.max_backends_per_se);
3305 
3306 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3307 		     rdev->config.si.max_sh_per_se,
3308 		     rdev->config.si.max_cu_per_sh);
3309 
3310 	rdev->config.si.active_cus = 0;
3311 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3312 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3313 			rdev->config.si.active_cus +=
3314 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3315 		}
3316 	}
3317 
3318 	/* set HW defaults for 3D engine */
3319 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3320 				     ROQ_IB2_START(0x2b)));
3321 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3322 
3323 	sx_debug_1 = RREG32(SX_DEBUG_1);
3324 	WREG32(SX_DEBUG_1, sx_debug_1);
3325 
3326 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3327 
3328 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3329 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3330 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3331 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3332 
3333 	WREG32(VGT_NUM_INSTANCES, 1);
3334 
3335 	WREG32(CP_PERFMON_CNTL, 0);
3336 
3337 	WREG32(SQ_CONFIG, 0);
3338 
3339 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3340 					  FORCE_EOV_MAX_REZ_CNT(255)));
3341 
3342 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3343 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3344 
3345 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3346 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3347 
3348 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3349 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3350 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3351 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3352 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3353 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3354 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3355 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3356 
3357 	tmp = RREG32(HDP_MISC_CNTL);
3358 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3359 	WREG32(HDP_MISC_CNTL, tmp);
3360 
3361 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3362 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3363 
3364 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3365 
3366 	udelay(50);
3367 }
3368 
3369 /*
3370  * GPU scratch registers helpers function.
3371  */
3372 static void si_scratch_init(struct radeon_device *rdev)
3373 {
3374 	int i;
3375 
3376 	rdev->scratch.num_reg = 7;
3377 	rdev->scratch.reg_base = SCRATCH_REG0;
3378 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3379 		rdev->scratch.free[i] = true;
3380 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3381 	}
3382 }
3383 
3384 void si_fence_ring_emit(struct radeon_device *rdev,
3385 			struct radeon_fence *fence)
3386 {
3387 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3388 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3389 
3390 	/* flush read cache over gart */
3391 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3392 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3393 	radeon_ring_write(ring, 0);
3394 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3395 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3396 			  PACKET3_TC_ACTION_ENA |
3397 			  PACKET3_SH_KCACHE_ACTION_ENA |
3398 			  PACKET3_SH_ICACHE_ACTION_ENA);
3399 	radeon_ring_write(ring, 0xFFFFFFFF);
3400 	radeon_ring_write(ring, 0);
3401 	radeon_ring_write(ring, 10); /* poll interval */
3402 	/* EVENT_WRITE_EOP - flush caches, send int */
3403 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3404 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3405 	radeon_ring_write(ring, lower_32_bits(addr));
3406 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3407 	radeon_ring_write(ring, fence->seq);
3408 	radeon_ring_write(ring, 0);
3409 }
3410 
3411 /*
3412  * IB stuff
3413  */
3414 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3415 {
3416 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3417 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3418 	u32 header;
3419 
3420 	if (ib->is_const_ib) {
3421 		/* set switch buffer packet before const IB */
3422 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3423 		radeon_ring_write(ring, 0);
3424 
3425 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3426 	} else {
3427 		u32 next_rptr;
3428 		if (ring->rptr_save_reg) {
3429 			next_rptr = ring->wptr + 3 + 4 + 8;
3430 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3431 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3432 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3433 			radeon_ring_write(ring, next_rptr);
3434 		} else if (rdev->wb.enabled) {
3435 			next_rptr = ring->wptr + 5 + 4 + 8;
3436 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3437 			radeon_ring_write(ring, (1 << 8));
3438 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3439 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3440 			radeon_ring_write(ring, next_rptr);
3441 		}
3442 
3443 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3444 	}
3445 
3446 	radeon_ring_write(ring, header);
3447 	radeon_ring_write(ring,
3448 #ifdef __BIG_ENDIAN
3449 			  (2 << 0) |
3450 #endif
3451 			  (ib->gpu_addr & 0xFFFFFFFC));
3452 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3453 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3454 
3455 	if (!ib->is_const_ib) {
3456 		/* flush read cache over gart for this vmid */
3457 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3458 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3459 		radeon_ring_write(ring, vm_id);
3460 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3461 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3462 				  PACKET3_TC_ACTION_ENA |
3463 				  PACKET3_SH_KCACHE_ACTION_ENA |
3464 				  PACKET3_SH_ICACHE_ACTION_ENA);
3465 		radeon_ring_write(ring, 0xFFFFFFFF);
3466 		radeon_ring_write(ring, 0);
3467 		radeon_ring_write(ring, 10); /* poll interval */
3468 	}
3469 }
3470 
3471 /*
3472  * CP.
3473  */
3474 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3475 {
3476 	if (enable)
3477 		WREG32(CP_ME_CNTL, 0);
3478 	else {
3479 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3480 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3481 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3482 		WREG32(SCRATCH_UMSK, 0);
3483 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3484 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3485 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3486 	}
3487 	udelay(50);
3488 }
3489 
3490 static int si_cp_load_microcode(struct radeon_device *rdev)
3491 {
3492 	int i;
3493 
3494 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3495 		return -EINVAL;
3496 
3497 	si_cp_enable(rdev, false);
3498 
3499 	if (rdev->new_fw) {
3500 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3501 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3502 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3503 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3504 		const struct gfx_firmware_header_v1_0 *me_hdr =
3505 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3506 		const __le32 *fw_data;
3507 		u32 fw_size;
3508 
3509 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3510 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3511 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3512 
3513 		/* PFP */
3514 		fw_data = (const __le32 *)
3515 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3516 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3517 		WREG32(CP_PFP_UCODE_ADDR, 0);
3518 		for (i = 0; i < fw_size; i++)
3519 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3520 		WREG32(CP_PFP_UCODE_ADDR, 0);
3521 
3522 		/* CE */
3523 		fw_data = (const __le32 *)
3524 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3525 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3526 		WREG32(CP_CE_UCODE_ADDR, 0);
3527 		for (i = 0; i < fw_size; i++)
3528 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3529 		WREG32(CP_CE_UCODE_ADDR, 0);
3530 
3531 		/* ME */
3532 		fw_data = (const __be32 *)
3533 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3534 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3535 		WREG32(CP_ME_RAM_WADDR, 0);
3536 		for (i = 0; i < fw_size; i++)
3537 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3538 		WREG32(CP_ME_RAM_WADDR, 0);
3539 	} else {
3540 		const __be32 *fw_data;
3541 
3542 		/* PFP */
3543 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3544 		WREG32(CP_PFP_UCODE_ADDR, 0);
3545 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3546 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3547 		WREG32(CP_PFP_UCODE_ADDR, 0);
3548 
3549 		/* CE */
3550 		fw_data = (const __be32 *)rdev->ce_fw->data;
3551 		WREG32(CP_CE_UCODE_ADDR, 0);
3552 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3553 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3554 		WREG32(CP_CE_UCODE_ADDR, 0);
3555 
3556 		/* ME */
3557 		fw_data = (const __be32 *)rdev->me_fw->data;
3558 		WREG32(CP_ME_RAM_WADDR, 0);
3559 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3560 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3561 		WREG32(CP_ME_RAM_WADDR, 0);
3562 	}
3563 
3564 	WREG32(CP_PFP_UCODE_ADDR, 0);
3565 	WREG32(CP_CE_UCODE_ADDR, 0);
3566 	WREG32(CP_ME_RAM_WADDR, 0);
3567 	WREG32(CP_ME_RAM_RADDR, 0);
3568 	return 0;
3569 }
3570 
3571 static int si_cp_start(struct radeon_device *rdev)
3572 {
3573 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3574 	int r, i;
3575 
3576 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3577 	if (r) {
3578 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3579 		return r;
3580 	}
3581 	/* init the CP */
3582 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3583 	radeon_ring_write(ring, 0x1);
3584 	radeon_ring_write(ring, 0x0);
3585 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3586 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3587 	radeon_ring_write(ring, 0);
3588 	radeon_ring_write(ring, 0);
3589 
3590 	/* init the CE partitions */
3591 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3592 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3593 	radeon_ring_write(ring, 0xc000);
3594 	radeon_ring_write(ring, 0xe000);
3595 	radeon_ring_unlock_commit(rdev, ring, false);
3596 
3597 	si_cp_enable(rdev, true);
3598 
3599 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3600 	if (r) {
3601 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3602 		return r;
3603 	}
3604 
3605 	/* setup clear context state */
3606 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3607 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3608 
3609 	for (i = 0; i < si_default_size; i++)
3610 		radeon_ring_write(ring, si_default_state[i]);
3611 
3612 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3613 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3614 
3615 	/* set clear context state */
3616 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3617 	radeon_ring_write(ring, 0);
3618 
3619 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3620 	radeon_ring_write(ring, 0x00000316);
3621 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3622 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3623 
3624 	radeon_ring_unlock_commit(rdev, ring, false);
3625 
3626 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3627 		ring = &rdev->ring[i];
3628 		r = radeon_ring_lock(rdev, ring, 2);
3629 
3630 		/* clear the compute context state */
3631 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3632 		radeon_ring_write(ring, 0);
3633 
3634 		radeon_ring_unlock_commit(rdev, ring, false);
3635 	}
3636 
3637 	return 0;
3638 }
3639 
3640 static void si_cp_fini(struct radeon_device *rdev)
3641 {
3642 	struct radeon_ring *ring;
3643 	si_cp_enable(rdev, false);
3644 
3645 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3646 	radeon_ring_fini(rdev, ring);
3647 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3648 
3649 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3650 	radeon_ring_fini(rdev, ring);
3651 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3652 
3653 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3654 	radeon_ring_fini(rdev, ring);
3655 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3656 }
3657 
3658 static int si_cp_resume(struct radeon_device *rdev)
3659 {
3660 	struct radeon_ring *ring;
3661 	u32 tmp;
3662 	u32 rb_bufsz;
3663 	int r;
3664 
3665 	si_enable_gui_idle_interrupt(rdev, false);
3666 
3667 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3668 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3669 
3670 	/* Set the write pointer delay */
3671 	WREG32(CP_RB_WPTR_DELAY, 0);
3672 
3673 	WREG32(CP_DEBUG, 0);
3674 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3675 
3676 	/* ring 0 - compute and gfx */
3677 	/* Set ring buffer size */
3678 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3679 	rb_bufsz = order_base_2(ring->ring_size / 8);
3680 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3681 #ifdef __BIG_ENDIAN
3682 	tmp |= BUF_SWAP_32BIT;
3683 #endif
3684 	WREG32(CP_RB0_CNTL, tmp);
3685 
3686 	/* Initialize the ring buffer's read and write pointers */
3687 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3688 	ring->wptr = 0;
3689 	WREG32(CP_RB0_WPTR, ring->wptr);
3690 
3691 	/* set the wb address whether it's enabled or not */
3692 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3693 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3694 
3695 	if (rdev->wb.enabled)
3696 		WREG32(SCRATCH_UMSK, 0xff);
3697 	else {
3698 		tmp |= RB_NO_UPDATE;
3699 		WREG32(SCRATCH_UMSK, 0);
3700 	}
3701 
3702 	mdelay(1);
3703 	WREG32(CP_RB0_CNTL, tmp);
3704 
3705 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3706 
3707 	/* ring1  - compute only */
3708 	/* Set ring buffer size */
3709 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3710 	rb_bufsz = order_base_2(ring->ring_size / 8);
3711 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3712 #ifdef __BIG_ENDIAN
3713 	tmp |= BUF_SWAP_32BIT;
3714 #endif
3715 	WREG32(CP_RB1_CNTL, tmp);
3716 
3717 	/* Initialize the ring buffer's read and write pointers */
3718 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3719 	ring->wptr = 0;
3720 	WREG32(CP_RB1_WPTR, ring->wptr);
3721 
3722 	/* set the wb address whether it's enabled or not */
3723 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3724 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3725 
3726 	mdelay(1);
3727 	WREG32(CP_RB1_CNTL, tmp);
3728 
3729 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3730 
3731 	/* ring2 - compute only */
3732 	/* Set ring buffer size */
3733 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3734 	rb_bufsz = order_base_2(ring->ring_size / 8);
3735 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3736 #ifdef __BIG_ENDIAN
3737 	tmp |= BUF_SWAP_32BIT;
3738 #endif
3739 	WREG32(CP_RB2_CNTL, tmp);
3740 
3741 	/* Initialize the ring buffer's read and write pointers */
3742 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3743 	ring->wptr = 0;
3744 	WREG32(CP_RB2_WPTR, ring->wptr);
3745 
3746 	/* set the wb address whether it's enabled or not */
3747 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3748 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3749 
3750 	mdelay(1);
3751 	WREG32(CP_RB2_CNTL, tmp);
3752 
3753 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3754 
3755 	/* start the rings */
3756 	si_cp_start(rdev);
3757 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3758 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3759 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3760 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3761 	if (r) {
3762 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3763 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3764 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3765 		return r;
3766 	}
3767 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3768 	if (r) {
3769 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3770 	}
3771 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3772 	if (r) {
3773 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3774 	}
3775 
3776 	si_enable_gui_idle_interrupt(rdev, true);
3777 
3778 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3779 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3780 
3781 	return 0;
3782 }
3783 
3784 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3785 {
3786 	u32 reset_mask = 0;
3787 	u32 tmp;
3788 
3789 	/* GRBM_STATUS */
3790 	tmp = RREG32(GRBM_STATUS);
3791 	if (tmp & (PA_BUSY | SC_BUSY |
3792 		   BCI_BUSY | SX_BUSY |
3793 		   TA_BUSY | VGT_BUSY |
3794 		   DB_BUSY | CB_BUSY |
3795 		   GDS_BUSY | SPI_BUSY |
3796 		   IA_BUSY | IA_BUSY_NO_DMA))
3797 		reset_mask |= RADEON_RESET_GFX;
3798 
3799 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3800 		   CP_BUSY | CP_COHERENCY_BUSY))
3801 		reset_mask |= RADEON_RESET_CP;
3802 
3803 	if (tmp & GRBM_EE_BUSY)
3804 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3805 
3806 	/* GRBM_STATUS2 */
3807 	tmp = RREG32(GRBM_STATUS2);
3808 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3809 		reset_mask |= RADEON_RESET_RLC;
3810 
3811 	/* DMA_STATUS_REG 0 */
3812 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3813 	if (!(tmp & DMA_IDLE))
3814 		reset_mask |= RADEON_RESET_DMA;
3815 
3816 	/* DMA_STATUS_REG 1 */
3817 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3818 	if (!(tmp & DMA_IDLE))
3819 		reset_mask |= RADEON_RESET_DMA1;
3820 
3821 	/* SRBM_STATUS2 */
3822 	tmp = RREG32(SRBM_STATUS2);
3823 	if (tmp & DMA_BUSY)
3824 		reset_mask |= RADEON_RESET_DMA;
3825 
3826 	if (tmp & DMA1_BUSY)
3827 		reset_mask |= RADEON_RESET_DMA1;
3828 
3829 	/* SRBM_STATUS */
3830 	tmp = RREG32(SRBM_STATUS);
3831 
3832 	if (tmp & IH_BUSY)
3833 		reset_mask |= RADEON_RESET_IH;
3834 
3835 	if (tmp & SEM_BUSY)
3836 		reset_mask |= RADEON_RESET_SEM;
3837 
3838 	if (tmp & GRBM_RQ_PENDING)
3839 		reset_mask |= RADEON_RESET_GRBM;
3840 
3841 	if (tmp & VMC_BUSY)
3842 		reset_mask |= RADEON_RESET_VMC;
3843 
3844 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3845 		   MCC_BUSY | MCD_BUSY))
3846 		reset_mask |= RADEON_RESET_MC;
3847 
3848 	if (evergreen_is_display_hung(rdev))
3849 		reset_mask |= RADEON_RESET_DISPLAY;
3850 
3851 	/* VM_L2_STATUS */
3852 	tmp = RREG32(VM_L2_STATUS);
3853 	if (tmp & L2_BUSY)
3854 		reset_mask |= RADEON_RESET_VMC;
3855 
3856 	/* Skip MC reset as it's mostly likely not hung, just busy */
3857 	if (reset_mask & RADEON_RESET_MC) {
3858 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3859 		reset_mask &= ~RADEON_RESET_MC;
3860 	}
3861 
3862 	return reset_mask;
3863 }
3864 
3865 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3866 {
3867 	struct evergreen_mc_save save;
3868 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3869 	u32 tmp;
3870 
3871 	if (reset_mask == 0)
3872 		return;
3873 
3874 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3875 
3876 	evergreen_print_gpu_status_regs(rdev);
3877 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3878 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3879 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3880 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3881 
3882 	/* disable PG/CG */
3883 	si_fini_pg(rdev);
3884 	si_fini_cg(rdev);
3885 
3886 	/* stop the rlc */
3887 	si_rlc_stop(rdev);
3888 
3889 	/* Disable CP parsing/prefetching */
3890 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3891 
3892 	if (reset_mask & RADEON_RESET_DMA) {
3893 		/* dma0 */
3894 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3895 		tmp &= ~DMA_RB_ENABLE;
3896 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3897 	}
3898 	if (reset_mask & RADEON_RESET_DMA1) {
3899 		/* dma1 */
3900 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3901 		tmp &= ~DMA_RB_ENABLE;
3902 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3903 	}
3904 
3905 	udelay(50);
3906 
3907 	evergreen_mc_stop(rdev, &save);
3908 	if (evergreen_mc_wait_for_idle(rdev)) {
3909 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3910 	}
3911 
3912 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3913 		grbm_soft_reset = SOFT_RESET_CB |
3914 			SOFT_RESET_DB |
3915 			SOFT_RESET_GDS |
3916 			SOFT_RESET_PA |
3917 			SOFT_RESET_SC |
3918 			SOFT_RESET_BCI |
3919 			SOFT_RESET_SPI |
3920 			SOFT_RESET_SX |
3921 			SOFT_RESET_TC |
3922 			SOFT_RESET_TA |
3923 			SOFT_RESET_VGT |
3924 			SOFT_RESET_IA;
3925 	}
3926 
3927 	if (reset_mask & RADEON_RESET_CP) {
3928 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3929 
3930 		srbm_soft_reset |= SOFT_RESET_GRBM;
3931 	}
3932 
3933 	if (reset_mask & RADEON_RESET_DMA)
3934 		srbm_soft_reset |= SOFT_RESET_DMA;
3935 
3936 	if (reset_mask & RADEON_RESET_DMA1)
3937 		srbm_soft_reset |= SOFT_RESET_DMA1;
3938 
3939 	if (reset_mask & RADEON_RESET_DISPLAY)
3940 		srbm_soft_reset |= SOFT_RESET_DC;
3941 
3942 	if (reset_mask & RADEON_RESET_RLC)
3943 		grbm_soft_reset |= SOFT_RESET_RLC;
3944 
3945 	if (reset_mask & RADEON_RESET_SEM)
3946 		srbm_soft_reset |= SOFT_RESET_SEM;
3947 
3948 	if (reset_mask & RADEON_RESET_IH)
3949 		srbm_soft_reset |= SOFT_RESET_IH;
3950 
3951 	if (reset_mask & RADEON_RESET_GRBM)
3952 		srbm_soft_reset |= SOFT_RESET_GRBM;
3953 
3954 	if (reset_mask & RADEON_RESET_VMC)
3955 		srbm_soft_reset |= SOFT_RESET_VMC;
3956 
3957 	if (reset_mask & RADEON_RESET_MC)
3958 		srbm_soft_reset |= SOFT_RESET_MC;
3959 
3960 	if (grbm_soft_reset) {
3961 		tmp = RREG32(GRBM_SOFT_RESET);
3962 		tmp |= grbm_soft_reset;
3963 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3964 		WREG32(GRBM_SOFT_RESET, tmp);
3965 		tmp = RREG32(GRBM_SOFT_RESET);
3966 
3967 		udelay(50);
3968 
3969 		tmp &= ~grbm_soft_reset;
3970 		WREG32(GRBM_SOFT_RESET, tmp);
3971 		tmp = RREG32(GRBM_SOFT_RESET);
3972 	}
3973 
3974 	if (srbm_soft_reset) {
3975 		tmp = RREG32(SRBM_SOFT_RESET);
3976 		tmp |= srbm_soft_reset;
3977 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3978 		WREG32(SRBM_SOFT_RESET, tmp);
3979 		tmp = RREG32(SRBM_SOFT_RESET);
3980 
3981 		udelay(50);
3982 
3983 		tmp &= ~srbm_soft_reset;
3984 		WREG32(SRBM_SOFT_RESET, tmp);
3985 		tmp = RREG32(SRBM_SOFT_RESET);
3986 	}
3987 
3988 	/* Wait a little for things to settle down */
3989 	udelay(50);
3990 
3991 	evergreen_mc_resume(rdev, &save);
3992 	udelay(50);
3993 
3994 	evergreen_print_gpu_status_regs(rdev);
3995 }
3996 
3997 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3998 {
3999 	u32 tmp, i;
4000 
4001 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4002 	tmp |= SPLL_BYPASS_EN;
4003 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4004 
4005 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4006 	tmp |= SPLL_CTLREQ_CHG;
4007 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4008 
4009 	for (i = 0; i < rdev->usec_timeout; i++) {
4010 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
4011 			break;
4012 		udelay(1);
4013 	}
4014 
4015 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4016 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4017 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4018 
4019 	tmp = RREG32(MPLL_CNTL_MODE);
4020 	tmp &= ~MPLL_MCLK_SEL;
4021 	WREG32(MPLL_CNTL_MODE, tmp);
4022 }
4023 
4024 static void si_spll_powerdown(struct radeon_device *rdev)
4025 {
4026 	u32 tmp;
4027 
4028 	tmp = RREG32(SPLL_CNTL_MODE);
4029 	tmp |= SPLL_SW_DIR_CONTROL;
4030 	WREG32(SPLL_CNTL_MODE, tmp);
4031 
4032 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4033 	tmp |= SPLL_RESET;
4034 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4035 
4036 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4037 	tmp |= SPLL_SLEEP;
4038 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4039 
4040 	tmp = RREG32(SPLL_CNTL_MODE);
4041 	tmp &= ~SPLL_SW_DIR_CONTROL;
4042 	WREG32(SPLL_CNTL_MODE, tmp);
4043 }
4044 
4045 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4046 {
4047 	struct evergreen_mc_save save;
4048 	u32 tmp, i;
4049 
4050 	dev_info(rdev->dev, "GPU pci config reset\n");
4051 
4052 	/* disable dpm? */
4053 
4054 	/* disable cg/pg */
4055 	si_fini_pg(rdev);
4056 	si_fini_cg(rdev);
4057 
4058 	/* Disable CP parsing/prefetching */
4059 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4060 	/* dma0 */
4061 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4062 	tmp &= ~DMA_RB_ENABLE;
4063 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4064 	/* dma1 */
4065 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4066 	tmp &= ~DMA_RB_ENABLE;
4067 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4068 	/* XXX other engines? */
4069 
4070 	/* halt the rlc, disable cp internal ints */
4071 	si_rlc_stop(rdev);
4072 
4073 	udelay(50);
4074 
4075 	/* disable mem access */
4076 	evergreen_mc_stop(rdev, &save);
4077 	if (evergreen_mc_wait_for_idle(rdev)) {
4078 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4079 	}
4080 
4081 	/* set mclk/sclk to bypass */
4082 	si_set_clk_bypass_mode(rdev);
4083 	/* powerdown spll */
4084 	si_spll_powerdown(rdev);
4085 	/* disable BM */
4086 	pci_clear_master(rdev->pdev);
4087 	/* reset */
4088 	radeon_pci_config_reset(rdev);
4089 	/* wait for asic to come out of reset */
4090 	for (i = 0; i < rdev->usec_timeout; i++) {
4091 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4092 			break;
4093 		udelay(1);
4094 	}
4095 }
4096 
4097 int si_asic_reset(struct radeon_device *rdev, bool hard)
4098 {
4099 	u32 reset_mask;
4100 
4101 	if (hard) {
4102 		si_gpu_pci_config_reset(rdev);
4103 		return 0;
4104 	}
4105 
4106 	reset_mask = si_gpu_check_soft_reset(rdev);
4107 
4108 	if (reset_mask)
4109 		r600_set_bios_scratch_engine_hung(rdev, true);
4110 
4111 	/* try soft reset */
4112 	si_gpu_soft_reset(rdev, reset_mask);
4113 
4114 	reset_mask = si_gpu_check_soft_reset(rdev);
4115 
4116 	/* try pci config reset */
4117 	if (reset_mask && radeon_hard_reset)
4118 		si_gpu_pci_config_reset(rdev);
4119 
4120 	reset_mask = si_gpu_check_soft_reset(rdev);
4121 
4122 	if (!reset_mask)
4123 		r600_set_bios_scratch_engine_hung(rdev, false);
4124 
4125 	return 0;
4126 }
4127 
4128 /**
4129  * si_gfx_is_lockup - Check if the GFX engine is locked up
4130  *
4131  * @rdev: radeon_device pointer
4132  * @ring: radeon_ring structure holding ring information
4133  *
4134  * Check if the GFX engine is locked up.
4135  * Returns true if the engine appears to be locked up, false if not.
4136  */
4137 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4138 {
4139 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4140 
4141 	if (!(reset_mask & (RADEON_RESET_GFX |
4142 			    RADEON_RESET_COMPUTE |
4143 			    RADEON_RESET_CP))) {
4144 		radeon_ring_lockup_update(rdev, ring);
4145 		return false;
4146 	}
4147 	return radeon_ring_test_lockup(rdev, ring);
4148 }
4149 
4150 /* MC */
4151 static void si_mc_program(struct radeon_device *rdev)
4152 {
4153 	struct evergreen_mc_save save;
4154 	u32 tmp;
4155 	int i, j;
4156 
4157 	/* Initialize HDP */
4158 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4159 		WREG32((0x2c14 + j), 0x00000000);
4160 		WREG32((0x2c18 + j), 0x00000000);
4161 		WREG32((0x2c1c + j), 0x00000000);
4162 		WREG32((0x2c20 + j), 0x00000000);
4163 		WREG32((0x2c24 + j), 0x00000000);
4164 	}
4165 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4166 
4167 	evergreen_mc_stop(rdev, &save);
4168 	if (radeon_mc_wait_for_idle(rdev)) {
4169 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4170 	}
4171 	if (!ASIC_IS_NODCE(rdev))
4172 		/* Lockout access through VGA aperture*/
4173 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4174 	/* Update configuration */
4175 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4176 	       rdev->mc.vram_start >> 12);
4177 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4178 	       rdev->mc.vram_end >> 12);
4179 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4180 	       rdev->vram_scratch.gpu_addr >> 12);
4181 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4182 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4183 	WREG32(MC_VM_FB_LOCATION, tmp);
4184 	/* XXX double check these! */
4185 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4186 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4187 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4188 	WREG32(MC_VM_AGP_BASE, 0);
4189 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4190 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4191 	if (radeon_mc_wait_for_idle(rdev)) {
4192 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4193 	}
4194 	evergreen_mc_resume(rdev, &save);
4195 	if (!ASIC_IS_NODCE(rdev)) {
4196 		/* we need to own VRAM, so turn off the VGA renderer here
4197 		 * to stop it overwriting our objects */
4198 		rv515_vga_render_disable(rdev);
4199 	}
4200 }
4201 
4202 void si_vram_gtt_location(struct radeon_device *rdev,
4203 			  struct radeon_mc *mc)
4204 {
4205 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4206 		/* leave room for at least 1024M GTT */
4207 		dev_warn(rdev->dev, "limiting VRAM\n");
4208 		mc->real_vram_size = 0xFFC0000000ULL;
4209 		mc->mc_vram_size = 0xFFC0000000ULL;
4210 	}
4211 	radeon_vram_location(rdev, &rdev->mc, 0);
4212 	rdev->mc.gtt_base_align = 0;
4213 	radeon_gtt_location(rdev, mc);
4214 }
4215 
4216 static int si_mc_init(struct radeon_device *rdev)
4217 {
4218 	u32 tmp;
4219 	int chansize, numchan;
4220 
4221 	/* Get VRAM informations */
4222 	rdev->mc.vram_is_ddr = true;
4223 	tmp = RREG32(MC_ARB_RAMCFG);
4224 	if (tmp & CHANSIZE_OVERRIDE) {
4225 		chansize = 16;
4226 	} else if (tmp & CHANSIZE_MASK) {
4227 		chansize = 64;
4228 	} else {
4229 		chansize = 32;
4230 	}
4231 	tmp = RREG32(MC_SHARED_CHMAP);
4232 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4233 	case 0:
4234 	default:
4235 		numchan = 1;
4236 		break;
4237 	case 1:
4238 		numchan = 2;
4239 		break;
4240 	case 2:
4241 		numchan = 4;
4242 		break;
4243 	case 3:
4244 		numchan = 8;
4245 		break;
4246 	case 4:
4247 		numchan = 3;
4248 		break;
4249 	case 5:
4250 		numchan = 6;
4251 		break;
4252 	case 6:
4253 		numchan = 10;
4254 		break;
4255 	case 7:
4256 		numchan = 12;
4257 		break;
4258 	case 8:
4259 		numchan = 16;
4260 		break;
4261 	}
4262 	rdev->mc.vram_width = numchan * chansize;
4263 	/* Could aper size report 0 ? */
4264 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4265 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4266 	/* size in MB on si */
4267 	tmp = RREG32(CONFIG_MEMSIZE);
4268 	/* some boards may have garbage in the upper 16 bits */
4269 	if (tmp & 0xffff0000) {
4270 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4271 		if (tmp & 0xffff)
4272 			tmp &= 0xffff;
4273 	}
4274 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4275 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4276 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4277 	si_vram_gtt_location(rdev, &rdev->mc);
4278 	radeon_update_bandwidth_info(rdev);
4279 
4280 	return 0;
4281 }
4282 
4283 /*
4284  * GART
4285  */
4286 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4287 {
4288 	/* flush hdp cache */
4289 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4290 
4291 	/* bits 0-15 are the VM contexts0-15 */
4292 	WREG32(VM_INVALIDATE_REQUEST, 1);
4293 }
4294 
4295 static int si_pcie_gart_enable(struct radeon_device *rdev)
4296 {
4297 	int r, i;
4298 
4299 	if (rdev->gart.robj == NULL) {
4300 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4301 		return -EINVAL;
4302 	}
4303 	r = radeon_gart_table_vram_pin(rdev);
4304 	if (r)
4305 		return r;
4306 	/* Setup TLB control */
4307 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4308 	       (0xA << 7) |
4309 	       ENABLE_L1_TLB |
4310 	       ENABLE_L1_FRAGMENT_PROCESSING |
4311 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4312 	       ENABLE_ADVANCED_DRIVER_MODEL |
4313 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4314 	/* Setup L2 cache */
4315 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4316 	       ENABLE_L2_FRAGMENT_PROCESSING |
4317 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4318 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4319 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4320 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4321 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4322 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4323 	       BANK_SELECT(4) |
4324 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4325 	/* setup context0 */
4326 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4327 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4328 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4329 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4330 			(u32)(rdev->dummy_page.addr >> 12));
4331 	WREG32(VM_CONTEXT0_CNTL2, 0);
4332 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4333 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4334 
4335 	WREG32(0x15D4, 0);
4336 	WREG32(0x15D8, 0);
4337 	WREG32(0x15DC, 0);
4338 
4339 	/* empty context1-15 */
4340 	/* set vm size, must be a multiple of 4 */
4341 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4342 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4343 	/* Assign the pt base to something valid for now; the pts used for
4344 	 * the VMs are determined by the application and setup and assigned
4345 	 * on the fly in the vm part of radeon_gart.c
4346 	 */
4347 	for (i = 1; i < 16; i++) {
4348 		if (i < 8)
4349 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4350 			       rdev->vm_manager.saved_table_addr[i]);
4351 		else
4352 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4353 			       rdev->vm_manager.saved_table_addr[i]);
4354 	}
4355 
4356 	/* enable context1-15 */
4357 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4358 	       (u32)(rdev->dummy_page.addr >> 12));
4359 	WREG32(VM_CONTEXT1_CNTL2, 4);
4360 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4361 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4362 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4363 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4364 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4365 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4366 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4367 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4368 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4369 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4370 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4371 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4372 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4373 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4374 
4375 	si_pcie_gart_tlb_flush(rdev);
4376 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4377 		 (unsigned)(rdev->mc.gtt_size >> 20),
4378 		 (unsigned long long)rdev->gart.table_addr);
4379 	rdev->gart.ready = true;
4380 	return 0;
4381 }
4382 
4383 static void si_pcie_gart_disable(struct radeon_device *rdev)
4384 {
4385 	unsigned i;
4386 
4387 	for (i = 1; i < 16; ++i) {
4388 		uint32_t reg;
4389 		if (i < 8)
4390 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4391 		else
4392 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4393 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4394 	}
4395 
4396 	/* Disable all tables */
4397 	WREG32(VM_CONTEXT0_CNTL, 0);
4398 	WREG32(VM_CONTEXT1_CNTL, 0);
4399 	/* Setup TLB control */
4400 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4401 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4402 	/* Setup L2 cache */
4403 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4404 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4405 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4406 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4407 	WREG32(VM_L2_CNTL2, 0);
4408 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4409 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4410 	radeon_gart_table_vram_unpin(rdev);
4411 }
4412 
4413 static void si_pcie_gart_fini(struct radeon_device *rdev)
4414 {
4415 	si_pcie_gart_disable(rdev);
4416 	radeon_gart_table_vram_free(rdev);
4417 	radeon_gart_fini(rdev);
4418 }
4419 
4420 /* vm parser */
4421 static bool si_vm_reg_valid(u32 reg)
4422 {
4423 	/* context regs are fine */
4424 	if (reg >= 0x28000)
4425 		return true;
4426 
4427 	/* shader regs are also fine */
4428 	if (reg >= 0xB000 && reg < 0xC000)
4429 		return true;
4430 
4431 	/* check config regs */
4432 	switch (reg) {
4433 	case GRBM_GFX_INDEX:
4434 	case CP_STRMOUT_CNTL:
4435 	case VGT_VTX_VECT_EJECT_REG:
4436 	case VGT_CACHE_INVALIDATION:
4437 	case VGT_ESGS_RING_SIZE:
4438 	case VGT_GSVS_RING_SIZE:
4439 	case VGT_GS_VERTEX_REUSE:
4440 	case VGT_PRIMITIVE_TYPE:
4441 	case VGT_INDEX_TYPE:
4442 	case VGT_NUM_INDICES:
4443 	case VGT_NUM_INSTANCES:
4444 	case VGT_TF_RING_SIZE:
4445 	case VGT_HS_OFFCHIP_PARAM:
4446 	case VGT_TF_MEMORY_BASE:
4447 	case PA_CL_ENHANCE:
4448 	case PA_SU_LINE_STIPPLE_VALUE:
4449 	case PA_SC_LINE_STIPPLE_STATE:
4450 	case PA_SC_ENHANCE:
4451 	case SQC_CACHES:
4452 	case SPI_STATIC_THREAD_MGMT_1:
4453 	case SPI_STATIC_THREAD_MGMT_2:
4454 	case SPI_STATIC_THREAD_MGMT_3:
4455 	case SPI_PS_MAX_WAVE_ID:
4456 	case SPI_CONFIG_CNTL:
4457 	case SPI_CONFIG_CNTL_1:
4458 	case TA_CNTL_AUX:
4459 	case TA_CS_BC_BASE_ADDR:
4460 		return true;
4461 	default:
4462 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4463 		return false;
4464 	}
4465 }
4466 
4467 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4468 				  u32 *ib, struct radeon_cs_packet *pkt)
4469 {
4470 	switch (pkt->opcode) {
4471 	case PACKET3_NOP:
4472 	case PACKET3_SET_BASE:
4473 	case PACKET3_SET_CE_DE_COUNTERS:
4474 	case PACKET3_LOAD_CONST_RAM:
4475 	case PACKET3_WRITE_CONST_RAM:
4476 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4477 	case PACKET3_DUMP_CONST_RAM:
4478 	case PACKET3_INCREMENT_CE_COUNTER:
4479 	case PACKET3_WAIT_ON_DE_COUNTER:
4480 	case PACKET3_CE_WRITE:
4481 		break;
4482 	default:
4483 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4484 		return -EINVAL;
4485 	}
4486 	return 0;
4487 }
4488 
4489 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4490 {
4491 	u32 start_reg, reg, i;
4492 	u32 command = ib[idx + 4];
4493 	u32 info = ib[idx + 1];
4494 	u32 idx_value = ib[idx];
4495 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4496 		/* src address space is register */
4497 		if (((info & 0x60000000) >> 29) == 0) {
4498 			start_reg = idx_value << 2;
4499 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4500 				reg = start_reg;
4501 				if (!si_vm_reg_valid(reg)) {
4502 					DRM_ERROR("CP DMA Bad SRC register\n");
4503 					return -EINVAL;
4504 				}
4505 			} else {
4506 				for (i = 0; i < (command & 0x1fffff); i++) {
4507 					reg = start_reg + (4 * i);
4508 					if (!si_vm_reg_valid(reg)) {
4509 						DRM_ERROR("CP DMA Bad SRC register\n");
4510 						return -EINVAL;
4511 					}
4512 				}
4513 			}
4514 		}
4515 	}
4516 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4517 		/* dst address space is register */
4518 		if (((info & 0x00300000) >> 20) == 0) {
4519 			start_reg = ib[idx + 2];
4520 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4521 				reg = start_reg;
4522 				if (!si_vm_reg_valid(reg)) {
4523 					DRM_ERROR("CP DMA Bad DST register\n");
4524 					return -EINVAL;
4525 				}
4526 			} else {
4527 				for (i = 0; i < (command & 0x1fffff); i++) {
4528 					reg = start_reg + (4 * i);
4529 				if (!si_vm_reg_valid(reg)) {
4530 						DRM_ERROR("CP DMA Bad DST register\n");
4531 						return -EINVAL;
4532 					}
4533 				}
4534 			}
4535 		}
4536 	}
4537 	return 0;
4538 }
4539 
4540 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4541 				   u32 *ib, struct radeon_cs_packet *pkt)
4542 {
4543 	int r;
4544 	u32 idx = pkt->idx + 1;
4545 	u32 idx_value = ib[idx];
4546 	u32 start_reg, end_reg, reg, i;
4547 
4548 	switch (pkt->opcode) {
4549 	case PACKET3_NOP:
4550 	case PACKET3_SET_BASE:
4551 	case PACKET3_CLEAR_STATE:
4552 	case PACKET3_INDEX_BUFFER_SIZE:
4553 	case PACKET3_DISPATCH_DIRECT:
4554 	case PACKET3_DISPATCH_INDIRECT:
4555 	case PACKET3_ALLOC_GDS:
4556 	case PACKET3_WRITE_GDS_RAM:
4557 	case PACKET3_ATOMIC_GDS:
4558 	case PACKET3_ATOMIC:
4559 	case PACKET3_OCCLUSION_QUERY:
4560 	case PACKET3_SET_PREDICATION:
4561 	case PACKET3_COND_EXEC:
4562 	case PACKET3_PRED_EXEC:
4563 	case PACKET3_DRAW_INDIRECT:
4564 	case PACKET3_DRAW_INDEX_INDIRECT:
4565 	case PACKET3_INDEX_BASE:
4566 	case PACKET3_DRAW_INDEX_2:
4567 	case PACKET3_CONTEXT_CONTROL:
4568 	case PACKET3_INDEX_TYPE:
4569 	case PACKET3_DRAW_INDIRECT_MULTI:
4570 	case PACKET3_DRAW_INDEX_AUTO:
4571 	case PACKET3_DRAW_INDEX_IMMD:
4572 	case PACKET3_NUM_INSTANCES:
4573 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4574 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4575 	case PACKET3_DRAW_INDEX_OFFSET_2:
4576 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4577 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4578 	case PACKET3_MPEG_INDEX:
4579 	case PACKET3_WAIT_REG_MEM:
4580 	case PACKET3_MEM_WRITE:
4581 	case PACKET3_PFP_SYNC_ME:
4582 	case PACKET3_SURFACE_SYNC:
4583 	case PACKET3_EVENT_WRITE:
4584 	case PACKET3_EVENT_WRITE_EOP:
4585 	case PACKET3_EVENT_WRITE_EOS:
4586 	case PACKET3_SET_CONTEXT_REG:
4587 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4588 	case PACKET3_SET_SH_REG:
4589 	case PACKET3_SET_SH_REG_OFFSET:
4590 	case PACKET3_INCREMENT_DE_COUNTER:
4591 	case PACKET3_WAIT_ON_CE_COUNTER:
4592 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4593 	case PACKET3_ME_WRITE:
4594 		break;
4595 	case PACKET3_COPY_DATA:
4596 		if ((idx_value & 0xf00) == 0) {
4597 			reg = ib[idx + 3] * 4;
4598 			if (!si_vm_reg_valid(reg))
4599 				return -EINVAL;
4600 		}
4601 		break;
4602 	case PACKET3_WRITE_DATA:
4603 		if ((idx_value & 0xf00) == 0) {
4604 			start_reg = ib[idx + 1] * 4;
4605 			if (idx_value & 0x10000) {
4606 				if (!si_vm_reg_valid(start_reg))
4607 					return -EINVAL;
4608 			} else {
4609 				for (i = 0; i < (pkt->count - 2); i++) {
4610 					reg = start_reg + (4 * i);
4611 					if (!si_vm_reg_valid(reg))
4612 						return -EINVAL;
4613 				}
4614 			}
4615 		}
4616 		break;
4617 	case PACKET3_COND_WRITE:
4618 		if (idx_value & 0x100) {
4619 			reg = ib[idx + 5] * 4;
4620 			if (!si_vm_reg_valid(reg))
4621 				return -EINVAL;
4622 		}
4623 		break;
4624 	case PACKET3_COPY_DW:
4625 		if (idx_value & 0x2) {
4626 			reg = ib[idx + 3] * 4;
4627 			if (!si_vm_reg_valid(reg))
4628 				return -EINVAL;
4629 		}
4630 		break;
4631 	case PACKET3_SET_CONFIG_REG:
4632 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4633 		end_reg = 4 * pkt->count + start_reg - 4;
4634 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4635 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4636 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4637 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4638 			return -EINVAL;
4639 		}
4640 		for (i = 0; i < pkt->count; i++) {
4641 			reg = start_reg + (4 * i);
4642 			if (!si_vm_reg_valid(reg))
4643 				return -EINVAL;
4644 		}
4645 		break;
4646 	case PACKET3_CP_DMA:
4647 		r = si_vm_packet3_cp_dma_check(ib, idx);
4648 		if (r)
4649 			return r;
4650 		break;
4651 	default:
4652 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4653 		return -EINVAL;
4654 	}
4655 	return 0;
4656 }
4657 
4658 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4659 				       u32 *ib, struct radeon_cs_packet *pkt)
4660 {
4661 	int r;
4662 	u32 idx = pkt->idx + 1;
4663 	u32 idx_value = ib[idx];
4664 	u32 start_reg, reg, i;
4665 
4666 	switch (pkt->opcode) {
4667 	case PACKET3_NOP:
4668 	case PACKET3_SET_BASE:
4669 	case PACKET3_CLEAR_STATE:
4670 	case PACKET3_DISPATCH_DIRECT:
4671 	case PACKET3_DISPATCH_INDIRECT:
4672 	case PACKET3_ALLOC_GDS:
4673 	case PACKET3_WRITE_GDS_RAM:
4674 	case PACKET3_ATOMIC_GDS:
4675 	case PACKET3_ATOMIC:
4676 	case PACKET3_OCCLUSION_QUERY:
4677 	case PACKET3_SET_PREDICATION:
4678 	case PACKET3_COND_EXEC:
4679 	case PACKET3_PRED_EXEC:
4680 	case PACKET3_CONTEXT_CONTROL:
4681 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4682 	case PACKET3_WAIT_REG_MEM:
4683 	case PACKET3_MEM_WRITE:
4684 	case PACKET3_PFP_SYNC_ME:
4685 	case PACKET3_SURFACE_SYNC:
4686 	case PACKET3_EVENT_WRITE:
4687 	case PACKET3_EVENT_WRITE_EOP:
4688 	case PACKET3_EVENT_WRITE_EOS:
4689 	case PACKET3_SET_CONTEXT_REG:
4690 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4691 	case PACKET3_SET_SH_REG:
4692 	case PACKET3_SET_SH_REG_OFFSET:
4693 	case PACKET3_INCREMENT_DE_COUNTER:
4694 	case PACKET3_WAIT_ON_CE_COUNTER:
4695 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4696 	case PACKET3_ME_WRITE:
4697 		break;
4698 	case PACKET3_COPY_DATA:
4699 		if ((idx_value & 0xf00) == 0) {
4700 			reg = ib[idx + 3] * 4;
4701 			if (!si_vm_reg_valid(reg))
4702 				return -EINVAL;
4703 		}
4704 		break;
4705 	case PACKET3_WRITE_DATA:
4706 		if ((idx_value & 0xf00) == 0) {
4707 			start_reg = ib[idx + 1] * 4;
4708 			if (idx_value & 0x10000) {
4709 				if (!si_vm_reg_valid(start_reg))
4710 					return -EINVAL;
4711 			} else {
4712 				for (i = 0; i < (pkt->count - 2); i++) {
4713 					reg = start_reg + (4 * i);
4714 					if (!si_vm_reg_valid(reg))
4715 						return -EINVAL;
4716 				}
4717 			}
4718 		}
4719 		break;
4720 	case PACKET3_COND_WRITE:
4721 		if (idx_value & 0x100) {
4722 			reg = ib[idx + 5] * 4;
4723 			if (!si_vm_reg_valid(reg))
4724 				return -EINVAL;
4725 		}
4726 		break;
4727 	case PACKET3_COPY_DW:
4728 		if (idx_value & 0x2) {
4729 			reg = ib[idx + 3] * 4;
4730 			if (!si_vm_reg_valid(reg))
4731 				return -EINVAL;
4732 		}
4733 		break;
4734 	case PACKET3_CP_DMA:
4735 		r = si_vm_packet3_cp_dma_check(ib, idx);
4736 		if (r)
4737 			return r;
4738 		break;
4739 	default:
4740 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4741 		return -EINVAL;
4742 	}
4743 	return 0;
4744 }
4745 
4746 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4747 {
4748 	int ret = 0;
4749 	u32 idx = 0, i;
4750 	struct radeon_cs_packet pkt;
4751 
4752 	do {
4753 		pkt.idx = idx;
4754 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4755 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4756 		pkt.one_reg_wr = 0;
4757 		switch (pkt.type) {
4758 		case RADEON_PACKET_TYPE0:
4759 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4760 			ret = -EINVAL;
4761 			break;
4762 		case RADEON_PACKET_TYPE2:
4763 			idx += 1;
4764 			break;
4765 		case RADEON_PACKET_TYPE3:
4766 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4767 			if (ib->is_const_ib)
4768 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4769 			else {
4770 				switch (ib->ring) {
4771 				case RADEON_RING_TYPE_GFX_INDEX:
4772 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4773 					break;
4774 				case CAYMAN_RING_TYPE_CP1_INDEX:
4775 				case CAYMAN_RING_TYPE_CP2_INDEX:
4776 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4777 					break;
4778 				default:
4779 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4780 					ret = -EINVAL;
4781 					break;
4782 				}
4783 			}
4784 			idx += pkt.count + 2;
4785 			break;
4786 		default:
4787 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4788 			ret = -EINVAL;
4789 			break;
4790 		}
4791 		if (ret) {
4792 			for (i = 0; i < ib->length_dw; i++) {
4793 				if (i == idx)
4794 					printk("\t0x%08x <---\n", ib->ptr[i]);
4795 				else
4796 					printk("\t0x%08x\n", ib->ptr[i]);
4797 			}
4798 			break;
4799 		}
4800 	} while (idx < ib->length_dw);
4801 
4802 	return ret;
4803 }
4804 
4805 /*
4806  * vm
4807  */
4808 int si_vm_init(struct radeon_device *rdev)
4809 {
4810 	/* number of VMs */
4811 	rdev->vm_manager.nvm = 16;
4812 	/* base offset of vram pages */
4813 	rdev->vm_manager.vram_base_offset = 0;
4814 
4815 	return 0;
4816 }
4817 
4818 void si_vm_fini(struct radeon_device *rdev)
4819 {
4820 }
4821 
4822 /**
4823  * si_vm_decode_fault - print human readable fault info
4824  *
4825  * @rdev: radeon_device pointer
4826  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4827  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4828  *
4829  * Print human readable fault information (SI).
4830  */
4831 static void si_vm_decode_fault(struct radeon_device *rdev,
4832 			       u32 status, u32 addr)
4833 {
4834 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4835 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4836 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4837 	char *block;
4838 
4839 	if (rdev->family == CHIP_TAHITI) {
4840 		switch (mc_id) {
4841 		case 160:
4842 		case 144:
4843 		case 96:
4844 		case 80:
4845 		case 224:
4846 		case 208:
4847 		case 32:
4848 		case 16:
4849 			block = "CB";
4850 			break;
4851 		case 161:
4852 		case 145:
4853 		case 97:
4854 		case 81:
4855 		case 225:
4856 		case 209:
4857 		case 33:
4858 		case 17:
4859 			block = "CB_FMASK";
4860 			break;
4861 		case 162:
4862 		case 146:
4863 		case 98:
4864 		case 82:
4865 		case 226:
4866 		case 210:
4867 		case 34:
4868 		case 18:
4869 			block = "CB_CMASK";
4870 			break;
4871 		case 163:
4872 		case 147:
4873 		case 99:
4874 		case 83:
4875 		case 227:
4876 		case 211:
4877 		case 35:
4878 		case 19:
4879 			block = "CB_IMMED";
4880 			break;
4881 		case 164:
4882 		case 148:
4883 		case 100:
4884 		case 84:
4885 		case 228:
4886 		case 212:
4887 		case 36:
4888 		case 20:
4889 			block = "DB";
4890 			break;
4891 		case 165:
4892 		case 149:
4893 		case 101:
4894 		case 85:
4895 		case 229:
4896 		case 213:
4897 		case 37:
4898 		case 21:
4899 			block = "DB_HTILE";
4900 			break;
4901 		case 167:
4902 		case 151:
4903 		case 103:
4904 		case 87:
4905 		case 231:
4906 		case 215:
4907 		case 39:
4908 		case 23:
4909 			block = "DB_STEN";
4910 			break;
4911 		case 72:
4912 		case 68:
4913 		case 64:
4914 		case 8:
4915 		case 4:
4916 		case 0:
4917 		case 136:
4918 		case 132:
4919 		case 128:
4920 		case 200:
4921 		case 196:
4922 		case 192:
4923 			block = "TC";
4924 			break;
4925 		case 112:
4926 		case 48:
4927 			block = "CP";
4928 			break;
4929 		case 49:
4930 		case 177:
4931 		case 50:
4932 		case 178:
4933 			block = "SH";
4934 			break;
4935 		case 53:
4936 		case 190:
4937 			block = "VGT";
4938 			break;
4939 		case 117:
4940 			block = "IH";
4941 			break;
4942 		case 51:
4943 		case 115:
4944 			block = "RLC";
4945 			break;
4946 		case 119:
4947 		case 183:
4948 			block = "DMA0";
4949 			break;
4950 		case 61:
4951 			block = "DMA1";
4952 			break;
4953 		case 248:
4954 		case 120:
4955 			block = "HDP";
4956 			break;
4957 		default:
4958 			block = "unknown";
4959 			break;
4960 		}
4961 	} else {
4962 		switch (mc_id) {
4963 		case 32:
4964 		case 16:
4965 		case 96:
4966 		case 80:
4967 		case 160:
4968 		case 144:
4969 		case 224:
4970 		case 208:
4971 			block = "CB";
4972 			break;
4973 		case 33:
4974 		case 17:
4975 		case 97:
4976 		case 81:
4977 		case 161:
4978 		case 145:
4979 		case 225:
4980 		case 209:
4981 			block = "CB_FMASK";
4982 			break;
4983 		case 34:
4984 		case 18:
4985 		case 98:
4986 		case 82:
4987 		case 162:
4988 		case 146:
4989 		case 226:
4990 		case 210:
4991 			block = "CB_CMASK";
4992 			break;
4993 		case 35:
4994 		case 19:
4995 		case 99:
4996 		case 83:
4997 		case 163:
4998 		case 147:
4999 		case 227:
5000 		case 211:
5001 			block = "CB_IMMED";
5002 			break;
5003 		case 36:
5004 		case 20:
5005 		case 100:
5006 		case 84:
5007 		case 164:
5008 		case 148:
5009 		case 228:
5010 		case 212:
5011 			block = "DB";
5012 			break;
5013 		case 37:
5014 		case 21:
5015 		case 101:
5016 		case 85:
5017 		case 165:
5018 		case 149:
5019 		case 229:
5020 		case 213:
5021 			block = "DB_HTILE";
5022 			break;
5023 		case 39:
5024 		case 23:
5025 		case 103:
5026 		case 87:
5027 		case 167:
5028 		case 151:
5029 		case 231:
5030 		case 215:
5031 			block = "DB_STEN";
5032 			break;
5033 		case 72:
5034 		case 68:
5035 		case 8:
5036 		case 4:
5037 		case 136:
5038 		case 132:
5039 		case 200:
5040 		case 196:
5041 			block = "TC";
5042 			break;
5043 		case 112:
5044 		case 48:
5045 			block = "CP";
5046 			break;
5047 		case 49:
5048 		case 177:
5049 		case 50:
5050 		case 178:
5051 			block = "SH";
5052 			break;
5053 		case 53:
5054 			block = "VGT";
5055 			break;
5056 		case 117:
5057 			block = "IH";
5058 			break;
5059 		case 51:
5060 		case 115:
5061 			block = "RLC";
5062 			break;
5063 		case 119:
5064 		case 183:
5065 			block = "DMA0";
5066 			break;
5067 		case 61:
5068 			block = "DMA1";
5069 			break;
5070 		case 248:
5071 		case 120:
5072 			block = "HDP";
5073 			break;
5074 		default:
5075 			block = "unknown";
5076 			break;
5077 		}
5078 	}
5079 
5080 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5081 	       protections, vmid, addr,
5082 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5083 	       block, mc_id);
5084 }
5085 
5086 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5087 		 unsigned vm_id, uint64_t pd_addr)
5088 {
5089 	/* write new base address */
5090 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5091 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5092 				 WRITE_DATA_DST_SEL(0)));
5093 
5094 	if (vm_id < 8) {
5095 		radeon_ring_write(ring,
5096 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5097 	} else {
5098 		radeon_ring_write(ring,
5099 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5100 	}
5101 	radeon_ring_write(ring, 0);
5102 	radeon_ring_write(ring, pd_addr >> 12);
5103 
5104 	/* flush hdp cache */
5105 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5106 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5107 				 WRITE_DATA_DST_SEL(0)));
5108 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5109 	radeon_ring_write(ring, 0);
5110 	radeon_ring_write(ring, 0x1);
5111 
5112 	/* bits 0-15 are the VM contexts0-15 */
5113 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5114 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5115 				 WRITE_DATA_DST_SEL(0)));
5116 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5117 	radeon_ring_write(ring, 0);
5118 	radeon_ring_write(ring, 1 << vm_id);
5119 
5120 	/* wait for the invalidate to complete */
5121 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5122 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5123 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5124 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5125 	radeon_ring_write(ring, 0);
5126 	radeon_ring_write(ring, 0); /* ref */
5127 	radeon_ring_write(ring, 0); /* mask */
5128 	radeon_ring_write(ring, 0x20); /* poll interval */
5129 
5130 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5131 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5132 	radeon_ring_write(ring, 0x0);
5133 }
5134 
5135 /*
5136  *  Power and clock gating
5137  */
5138 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5139 {
5140 	int i;
5141 
5142 	for (i = 0; i < rdev->usec_timeout; i++) {
5143 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5144 			break;
5145 		udelay(1);
5146 	}
5147 
5148 	for (i = 0; i < rdev->usec_timeout; i++) {
5149 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5150 			break;
5151 		udelay(1);
5152 	}
5153 }
5154 
5155 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5156 					 bool enable)
5157 {
5158 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5159 	u32 mask;
5160 	int i;
5161 
5162 	if (enable)
5163 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5164 	else
5165 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5166 	WREG32(CP_INT_CNTL_RING0, tmp);
5167 
5168 	if (!enable) {
5169 		/* read a gfx register */
5170 		tmp = RREG32(DB_DEPTH_INFO);
5171 
5172 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5173 		for (i = 0; i < rdev->usec_timeout; i++) {
5174 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5175 				break;
5176 			udelay(1);
5177 		}
5178 	}
5179 }
5180 
5181 static void si_set_uvd_dcm(struct radeon_device *rdev,
5182 			   bool sw_mode)
5183 {
5184 	u32 tmp, tmp2;
5185 
5186 	tmp = RREG32(UVD_CGC_CTRL);
5187 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5188 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5189 
5190 	if (sw_mode) {
5191 		tmp &= ~0x7ffff800;
5192 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5193 	} else {
5194 		tmp |= 0x7ffff800;
5195 		tmp2 = 0;
5196 	}
5197 
5198 	WREG32(UVD_CGC_CTRL, tmp);
5199 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5200 }
5201 
5202 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5203 {
5204 	bool hw_mode = true;
5205 
5206 	if (hw_mode) {
5207 		si_set_uvd_dcm(rdev, false);
5208 	} else {
5209 		u32 tmp = RREG32(UVD_CGC_CTRL);
5210 		tmp &= ~DCM;
5211 		WREG32(UVD_CGC_CTRL, tmp);
5212 	}
5213 }
5214 
5215 static u32 si_halt_rlc(struct radeon_device *rdev)
5216 {
5217 	u32 data, orig;
5218 
5219 	orig = data = RREG32(RLC_CNTL);
5220 
5221 	if (data & RLC_ENABLE) {
5222 		data &= ~RLC_ENABLE;
5223 		WREG32(RLC_CNTL, data);
5224 
5225 		si_wait_for_rlc_serdes(rdev);
5226 	}
5227 
5228 	return orig;
5229 }
5230 
5231 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5232 {
5233 	u32 tmp;
5234 
5235 	tmp = RREG32(RLC_CNTL);
5236 	if (tmp != rlc)
5237 		WREG32(RLC_CNTL, rlc);
5238 }
5239 
5240 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5241 {
5242 	u32 data, orig;
5243 
5244 	orig = data = RREG32(DMA_PG);
5245 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5246 		data |= PG_CNTL_ENABLE;
5247 	else
5248 		data &= ~PG_CNTL_ENABLE;
5249 	if (orig != data)
5250 		WREG32(DMA_PG, data);
5251 }
5252 
5253 static void si_init_dma_pg(struct radeon_device *rdev)
5254 {
5255 	u32 tmp;
5256 
5257 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5258 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5259 
5260 	for (tmp = 0; tmp < 5; tmp++)
5261 		WREG32(DMA_PGFSM_WRITE, 0);
5262 }
5263 
5264 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5265 			       bool enable)
5266 {
5267 	u32 tmp;
5268 
5269 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5270 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5271 		WREG32(RLC_TTOP_D, tmp);
5272 
5273 		tmp = RREG32(RLC_PG_CNTL);
5274 		tmp |= GFX_PG_ENABLE;
5275 		WREG32(RLC_PG_CNTL, tmp);
5276 
5277 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5278 		tmp |= AUTO_PG_EN;
5279 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5280 	} else {
5281 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5282 		tmp &= ~AUTO_PG_EN;
5283 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5284 
5285 		tmp = RREG32(DB_RENDER_CONTROL);
5286 	}
5287 }
5288 
5289 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5290 {
5291 	u32 tmp;
5292 
5293 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5294 
5295 	tmp = RREG32(RLC_PG_CNTL);
5296 	tmp |= GFX_PG_SRC;
5297 	WREG32(RLC_PG_CNTL, tmp);
5298 
5299 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5300 
5301 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5302 
5303 	tmp &= ~GRBM_REG_SGIT_MASK;
5304 	tmp |= GRBM_REG_SGIT(0x700);
5305 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5306 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5307 }
5308 
5309 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5310 {
5311 	u32 mask = 0, tmp, tmp1;
5312 	int i;
5313 
5314 	si_select_se_sh(rdev, se, sh);
5315 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5316 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5317 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5318 
5319 	tmp &= 0xffff0000;
5320 
5321 	tmp |= tmp1;
5322 	tmp >>= 16;
5323 
5324 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5325 		mask <<= 1;
5326 		mask |= 1;
5327 	}
5328 
5329 	return (~tmp) & mask;
5330 }
5331 
5332 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5333 {
5334 	u32 i, j, k, active_cu_number = 0;
5335 	u32 mask, counter, cu_bitmap;
5336 	u32 tmp = 0;
5337 
5338 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5339 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5340 			mask = 1;
5341 			cu_bitmap = 0;
5342 			counter  = 0;
5343 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5344 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5345 					if (counter < 2)
5346 						cu_bitmap |= mask;
5347 					counter++;
5348 				}
5349 				mask <<= 1;
5350 			}
5351 
5352 			active_cu_number += counter;
5353 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5354 		}
5355 	}
5356 
5357 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5358 
5359 	tmp = RREG32(RLC_MAX_PG_CU);
5360 	tmp &= ~MAX_PU_CU_MASK;
5361 	tmp |= MAX_PU_CU(active_cu_number);
5362 	WREG32(RLC_MAX_PG_CU, tmp);
5363 }
5364 
5365 static void si_enable_cgcg(struct radeon_device *rdev,
5366 			   bool enable)
5367 {
5368 	u32 data, orig, tmp;
5369 
5370 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5371 
5372 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5373 		si_enable_gui_idle_interrupt(rdev, true);
5374 
5375 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5376 
5377 		tmp = si_halt_rlc(rdev);
5378 
5379 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5380 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5381 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5382 
5383 		si_wait_for_rlc_serdes(rdev);
5384 
5385 		si_update_rlc(rdev, tmp);
5386 
5387 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5388 
5389 		data |= CGCG_EN | CGLS_EN;
5390 	} else {
5391 		si_enable_gui_idle_interrupt(rdev, false);
5392 
5393 		RREG32(CB_CGTT_SCLK_CTRL);
5394 		RREG32(CB_CGTT_SCLK_CTRL);
5395 		RREG32(CB_CGTT_SCLK_CTRL);
5396 		RREG32(CB_CGTT_SCLK_CTRL);
5397 
5398 		data &= ~(CGCG_EN | CGLS_EN);
5399 	}
5400 
5401 	if (orig != data)
5402 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5403 }
5404 
5405 static void si_enable_mgcg(struct radeon_device *rdev,
5406 			   bool enable)
5407 {
5408 	u32 data, orig, tmp = 0;
5409 
5410 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5411 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5412 		data = 0x96940200;
5413 		if (orig != data)
5414 			WREG32(CGTS_SM_CTRL_REG, data);
5415 
5416 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5417 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5418 			data |= CP_MEM_LS_EN;
5419 			if (orig != data)
5420 				WREG32(CP_MEM_SLP_CNTL, data);
5421 		}
5422 
5423 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5424 		data &= 0xffffffc0;
5425 		if (orig != data)
5426 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5427 
5428 		tmp = si_halt_rlc(rdev);
5429 
5430 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5431 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5432 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5433 
5434 		si_update_rlc(rdev, tmp);
5435 	} else {
5436 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5437 		data |= 0x00000003;
5438 		if (orig != data)
5439 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5440 
5441 		data = RREG32(CP_MEM_SLP_CNTL);
5442 		if (data & CP_MEM_LS_EN) {
5443 			data &= ~CP_MEM_LS_EN;
5444 			WREG32(CP_MEM_SLP_CNTL, data);
5445 		}
5446 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5447 		data |= LS_OVERRIDE | OVERRIDE;
5448 		if (orig != data)
5449 			WREG32(CGTS_SM_CTRL_REG, data);
5450 
5451 		tmp = si_halt_rlc(rdev);
5452 
5453 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5454 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5455 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5456 
5457 		si_update_rlc(rdev, tmp);
5458 	}
5459 }
5460 
5461 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5462 			       bool enable)
5463 {
5464 	u32 orig, data, tmp;
5465 
5466 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5467 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5468 		tmp |= 0x3fff;
5469 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5470 
5471 		orig = data = RREG32(UVD_CGC_CTRL);
5472 		data |= DCM;
5473 		if (orig != data)
5474 			WREG32(UVD_CGC_CTRL, data);
5475 
5476 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5477 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5478 	} else {
5479 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5480 		tmp &= ~0x3fff;
5481 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5482 
5483 		orig = data = RREG32(UVD_CGC_CTRL);
5484 		data &= ~DCM;
5485 		if (orig != data)
5486 			WREG32(UVD_CGC_CTRL, data);
5487 
5488 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5489 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5490 	}
5491 }
5492 
5493 static const u32 mc_cg_registers[] =
5494 {
5495 	MC_HUB_MISC_HUB_CG,
5496 	MC_HUB_MISC_SIP_CG,
5497 	MC_HUB_MISC_VM_CG,
5498 	MC_XPB_CLK_GAT,
5499 	ATC_MISC_CG,
5500 	MC_CITF_MISC_WR_CG,
5501 	MC_CITF_MISC_RD_CG,
5502 	MC_CITF_MISC_VM_CG,
5503 	VM_L2_CG,
5504 };
5505 
5506 static void si_enable_mc_ls(struct radeon_device *rdev,
5507 			    bool enable)
5508 {
5509 	int i;
5510 	u32 orig, data;
5511 
5512 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5513 		orig = data = RREG32(mc_cg_registers[i]);
5514 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5515 			data |= MC_LS_ENABLE;
5516 		else
5517 			data &= ~MC_LS_ENABLE;
5518 		if (data != orig)
5519 			WREG32(mc_cg_registers[i], data);
5520 	}
5521 }
5522 
5523 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5524 			       bool enable)
5525 {
5526 	int i;
5527 	u32 orig, data;
5528 
5529 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5530 		orig = data = RREG32(mc_cg_registers[i]);
5531 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5532 			data |= MC_CG_ENABLE;
5533 		else
5534 			data &= ~MC_CG_ENABLE;
5535 		if (data != orig)
5536 			WREG32(mc_cg_registers[i], data);
5537 	}
5538 }
5539 
5540 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5541 			       bool enable)
5542 {
5543 	u32 orig, data, offset;
5544 	int i;
5545 
5546 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5547 		for (i = 0; i < 2; i++) {
5548 			if (i == 0)
5549 				offset = DMA0_REGISTER_OFFSET;
5550 			else
5551 				offset = DMA1_REGISTER_OFFSET;
5552 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5553 			data &= ~MEM_POWER_OVERRIDE;
5554 			if (data != orig)
5555 				WREG32(DMA_POWER_CNTL + offset, data);
5556 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5557 		}
5558 	} else {
5559 		for (i = 0; i < 2; i++) {
5560 			if (i == 0)
5561 				offset = DMA0_REGISTER_OFFSET;
5562 			else
5563 				offset = DMA1_REGISTER_OFFSET;
5564 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5565 			data |= MEM_POWER_OVERRIDE;
5566 			if (data != orig)
5567 				WREG32(DMA_POWER_CNTL + offset, data);
5568 
5569 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5570 			data = 0xff000000;
5571 			if (data != orig)
5572 				WREG32(DMA_CLK_CTRL + offset, data);
5573 		}
5574 	}
5575 }
5576 
5577 static void si_enable_bif_mgls(struct radeon_device *rdev,
5578 			       bool enable)
5579 {
5580 	u32 orig, data;
5581 
5582 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5583 
5584 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5585 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5586 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5587 	else
5588 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5589 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5590 
5591 	if (orig != data)
5592 		WREG32_PCIE(PCIE_CNTL2, data);
5593 }
5594 
5595 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5596 			       bool enable)
5597 {
5598 	u32 orig, data;
5599 
5600 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5601 
5602 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5603 		data &= ~CLOCK_GATING_DIS;
5604 	else
5605 		data |= CLOCK_GATING_DIS;
5606 
5607 	if (orig != data)
5608 		WREG32(HDP_HOST_PATH_CNTL, data);
5609 }
5610 
5611 static void si_enable_hdp_ls(struct radeon_device *rdev,
5612 			     bool enable)
5613 {
5614 	u32 orig, data;
5615 
5616 	orig = data = RREG32(HDP_MEM_POWER_LS);
5617 
5618 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5619 		data |= HDP_LS_ENABLE;
5620 	else
5621 		data &= ~HDP_LS_ENABLE;
5622 
5623 	if (orig != data)
5624 		WREG32(HDP_MEM_POWER_LS, data);
5625 }
5626 
5627 static void si_update_cg(struct radeon_device *rdev,
5628 			 u32 block, bool enable)
5629 {
5630 	if (block & RADEON_CG_BLOCK_GFX) {
5631 		si_enable_gui_idle_interrupt(rdev, false);
5632 		/* order matters! */
5633 		if (enable) {
5634 			si_enable_mgcg(rdev, true);
5635 			si_enable_cgcg(rdev, true);
5636 		} else {
5637 			si_enable_cgcg(rdev, false);
5638 			si_enable_mgcg(rdev, false);
5639 		}
5640 		si_enable_gui_idle_interrupt(rdev, true);
5641 	}
5642 
5643 	if (block & RADEON_CG_BLOCK_MC) {
5644 		si_enable_mc_mgcg(rdev, enable);
5645 		si_enable_mc_ls(rdev, enable);
5646 	}
5647 
5648 	if (block & RADEON_CG_BLOCK_SDMA) {
5649 		si_enable_dma_mgcg(rdev, enable);
5650 	}
5651 
5652 	if (block & RADEON_CG_BLOCK_BIF) {
5653 		si_enable_bif_mgls(rdev, enable);
5654 	}
5655 
5656 	if (block & RADEON_CG_BLOCK_UVD) {
5657 		if (rdev->has_uvd) {
5658 			si_enable_uvd_mgcg(rdev, enable);
5659 		}
5660 	}
5661 
5662 	if (block & RADEON_CG_BLOCK_HDP) {
5663 		si_enable_hdp_mgcg(rdev, enable);
5664 		si_enable_hdp_ls(rdev, enable);
5665 	}
5666 }
5667 
5668 static void si_init_cg(struct radeon_device *rdev)
5669 {
5670 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5671 			    RADEON_CG_BLOCK_MC |
5672 			    RADEON_CG_BLOCK_SDMA |
5673 			    RADEON_CG_BLOCK_BIF |
5674 			    RADEON_CG_BLOCK_HDP), true);
5675 	if (rdev->has_uvd) {
5676 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5677 		si_init_uvd_internal_cg(rdev);
5678 	}
5679 }
5680 
5681 static void si_fini_cg(struct radeon_device *rdev)
5682 {
5683 	if (rdev->has_uvd) {
5684 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5685 	}
5686 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5687 			    RADEON_CG_BLOCK_MC |
5688 			    RADEON_CG_BLOCK_SDMA |
5689 			    RADEON_CG_BLOCK_BIF |
5690 			    RADEON_CG_BLOCK_HDP), false);
5691 }
5692 
5693 u32 si_get_csb_size(struct radeon_device *rdev)
5694 {
5695 	u32 count = 0;
5696 	const struct cs_section_def *sect = NULL;
5697 	const struct cs_extent_def *ext = NULL;
5698 
5699 	if (rdev->rlc.cs_data == NULL)
5700 		return 0;
5701 
5702 	/* begin clear state */
5703 	count += 2;
5704 	/* context control state */
5705 	count += 3;
5706 
5707 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5708 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5709 			if (sect->id == SECT_CONTEXT)
5710 				count += 2 + ext->reg_count;
5711 			else
5712 				return 0;
5713 		}
5714 	}
5715 	/* pa_sc_raster_config */
5716 	count += 3;
5717 	/* end clear state */
5718 	count += 2;
5719 	/* clear state */
5720 	count += 2;
5721 
5722 	return count;
5723 }
5724 
5725 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5726 {
5727 	u32 count = 0, i;
5728 	const struct cs_section_def *sect = NULL;
5729 	const struct cs_extent_def *ext = NULL;
5730 
5731 	if (rdev->rlc.cs_data == NULL)
5732 		return;
5733 	if (buffer == NULL)
5734 		return;
5735 
5736 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5737 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5738 
5739 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5740 	buffer[count++] = cpu_to_le32(0x80000000);
5741 	buffer[count++] = cpu_to_le32(0x80000000);
5742 
5743 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5744 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5745 			if (sect->id == SECT_CONTEXT) {
5746 				buffer[count++] =
5747 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5748 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5749 				for (i = 0; i < ext->reg_count; i++)
5750 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5751 			} else {
5752 				return;
5753 			}
5754 		}
5755 	}
5756 
5757 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5758 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5759 	switch (rdev->family) {
5760 	case CHIP_TAHITI:
5761 	case CHIP_PITCAIRN:
5762 		buffer[count++] = cpu_to_le32(0x2a00126a);
5763 		break;
5764 	case CHIP_VERDE:
5765 		buffer[count++] = cpu_to_le32(0x0000124a);
5766 		break;
5767 	case CHIP_OLAND:
5768 		buffer[count++] = cpu_to_le32(0x00000082);
5769 		break;
5770 	case CHIP_HAINAN:
5771 		buffer[count++] = cpu_to_le32(0x00000000);
5772 		break;
5773 	default:
5774 		buffer[count++] = cpu_to_le32(0x00000000);
5775 		break;
5776 	}
5777 
5778 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5779 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5780 
5781 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5782 	buffer[count++] = cpu_to_le32(0);
5783 }
5784 
5785 static void si_init_pg(struct radeon_device *rdev)
5786 {
5787 	if (rdev->pg_flags) {
5788 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5789 			si_init_dma_pg(rdev);
5790 		}
5791 		si_init_ao_cu_mask(rdev);
5792 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5793 			si_init_gfx_cgpg(rdev);
5794 		} else {
5795 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5796 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5797 		}
5798 		si_enable_dma_pg(rdev, true);
5799 		si_enable_gfx_cgpg(rdev, true);
5800 	} else {
5801 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5802 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5803 	}
5804 }
5805 
5806 static void si_fini_pg(struct radeon_device *rdev)
5807 {
5808 	if (rdev->pg_flags) {
5809 		si_enable_dma_pg(rdev, false);
5810 		si_enable_gfx_cgpg(rdev, false);
5811 	}
5812 }
5813 
5814 /*
5815  * RLC
5816  */
5817 void si_rlc_reset(struct radeon_device *rdev)
5818 {
5819 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5820 
5821 	tmp |= SOFT_RESET_RLC;
5822 	WREG32(GRBM_SOFT_RESET, tmp);
5823 	udelay(50);
5824 	tmp &= ~SOFT_RESET_RLC;
5825 	WREG32(GRBM_SOFT_RESET, tmp);
5826 	udelay(50);
5827 }
5828 
5829 static void si_rlc_stop(struct radeon_device *rdev)
5830 {
5831 	WREG32(RLC_CNTL, 0);
5832 
5833 	si_enable_gui_idle_interrupt(rdev, false);
5834 
5835 	si_wait_for_rlc_serdes(rdev);
5836 }
5837 
5838 static void si_rlc_start(struct radeon_device *rdev)
5839 {
5840 	WREG32(RLC_CNTL, RLC_ENABLE);
5841 
5842 	si_enable_gui_idle_interrupt(rdev, true);
5843 
5844 	udelay(50);
5845 }
5846 
5847 static bool si_lbpw_supported(struct radeon_device *rdev)
5848 {
5849 	u32 tmp;
5850 
5851 	/* Enable LBPW only for DDR3 */
5852 	tmp = RREG32(MC_SEQ_MISC0);
5853 	if ((tmp & 0xF0000000) == 0xB0000000)
5854 		return true;
5855 	return false;
5856 }
5857 
5858 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5859 {
5860 	u32 tmp;
5861 
5862 	tmp = RREG32(RLC_LB_CNTL);
5863 	if (enable)
5864 		tmp |= LOAD_BALANCE_ENABLE;
5865 	else
5866 		tmp &= ~LOAD_BALANCE_ENABLE;
5867 	WREG32(RLC_LB_CNTL, tmp);
5868 
5869 	if (!enable) {
5870 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5871 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5872 	}
5873 }
5874 
5875 static int si_rlc_resume(struct radeon_device *rdev)
5876 {
5877 	u32 i;
5878 
5879 	if (!rdev->rlc_fw)
5880 		return -EINVAL;
5881 
5882 	si_rlc_stop(rdev);
5883 
5884 	si_rlc_reset(rdev);
5885 
5886 	si_init_pg(rdev);
5887 
5888 	si_init_cg(rdev);
5889 
5890 	WREG32(RLC_RL_BASE, 0);
5891 	WREG32(RLC_RL_SIZE, 0);
5892 	WREG32(RLC_LB_CNTL, 0);
5893 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5894 	WREG32(RLC_LB_CNTR_INIT, 0);
5895 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5896 
5897 	WREG32(RLC_MC_CNTL, 0);
5898 	WREG32(RLC_UCODE_CNTL, 0);
5899 
5900 	if (rdev->new_fw) {
5901 		const struct rlc_firmware_header_v1_0 *hdr =
5902 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5903 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5904 		const __le32 *fw_data = (const __le32 *)
5905 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5906 
5907 		radeon_ucode_print_rlc_hdr(&hdr->header);
5908 
5909 		for (i = 0; i < fw_size; i++) {
5910 			WREG32(RLC_UCODE_ADDR, i);
5911 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5912 		}
5913 	} else {
5914 		const __be32 *fw_data =
5915 			(const __be32 *)rdev->rlc_fw->data;
5916 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5917 			WREG32(RLC_UCODE_ADDR, i);
5918 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5919 		}
5920 	}
5921 	WREG32(RLC_UCODE_ADDR, 0);
5922 
5923 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5924 
5925 	si_rlc_start(rdev);
5926 
5927 	return 0;
5928 }
5929 
5930 static void si_enable_interrupts(struct radeon_device *rdev)
5931 {
5932 	u32 ih_cntl = RREG32(IH_CNTL);
5933 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5934 
5935 	ih_cntl |= ENABLE_INTR;
5936 	ih_rb_cntl |= IH_RB_ENABLE;
5937 	WREG32(IH_CNTL, ih_cntl);
5938 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5939 	rdev->ih.enabled = true;
5940 }
5941 
5942 static void si_disable_interrupts(struct radeon_device *rdev)
5943 {
5944 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5945 	u32 ih_cntl = RREG32(IH_CNTL);
5946 
5947 	ih_rb_cntl &= ~IH_RB_ENABLE;
5948 	ih_cntl &= ~ENABLE_INTR;
5949 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5950 	WREG32(IH_CNTL, ih_cntl);
5951 	/* set rptr, wptr to 0 */
5952 	WREG32(IH_RB_RPTR, 0);
5953 	WREG32(IH_RB_WPTR, 0);
5954 	rdev->ih.enabled = false;
5955 	rdev->ih.rptr = 0;
5956 }
5957 
5958 static void si_disable_interrupt_state(struct radeon_device *rdev)
5959 {
5960 	u32 tmp;
5961 
5962 	tmp = RREG32(CP_INT_CNTL_RING0) &
5963 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5964 	WREG32(CP_INT_CNTL_RING0, tmp);
5965 	WREG32(CP_INT_CNTL_RING1, 0);
5966 	WREG32(CP_INT_CNTL_RING2, 0);
5967 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5968 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5969 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5970 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5971 	WREG32(GRBM_INT_CNTL, 0);
5972 	WREG32(SRBM_INT_CNTL, 0);
5973 	if (rdev->num_crtc >= 2) {
5974 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5975 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5976 	}
5977 	if (rdev->num_crtc >= 4) {
5978 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5979 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5980 	}
5981 	if (rdev->num_crtc >= 6) {
5982 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5983 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5984 	}
5985 
5986 	if (rdev->num_crtc >= 2) {
5987 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5988 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5989 	}
5990 	if (rdev->num_crtc >= 4) {
5991 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5992 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5993 	}
5994 	if (rdev->num_crtc >= 6) {
5995 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5996 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5997 	}
5998 
5999 	if (!ASIC_IS_NODCE(rdev)) {
6000 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6001 
6002 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6003 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6004 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6005 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6006 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6007 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6008 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6009 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6010 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6011 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6012 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6013 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6014 	}
6015 }
6016 
6017 static int si_irq_init(struct radeon_device *rdev)
6018 {
6019 	int ret = 0;
6020 	int rb_bufsz;
6021 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6022 
6023 	/* allocate ring */
6024 	ret = r600_ih_ring_alloc(rdev);
6025 	if (ret)
6026 		return ret;
6027 
6028 	/* disable irqs */
6029 	si_disable_interrupts(rdev);
6030 
6031 	/* init rlc */
6032 	ret = si_rlc_resume(rdev);
6033 	if (ret) {
6034 		r600_ih_ring_fini(rdev);
6035 		return ret;
6036 	}
6037 
6038 	/* setup interrupt control */
6039 	/* set dummy read address to ring address */
6040 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6041 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6042 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6043 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6044 	 */
6045 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6046 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6047 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6048 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6049 
6050 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6051 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6052 
6053 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6054 		      IH_WPTR_OVERFLOW_CLEAR |
6055 		      (rb_bufsz << 1));
6056 
6057 	if (rdev->wb.enabled)
6058 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6059 
6060 	/* set the writeback address whether it's enabled or not */
6061 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6062 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6063 
6064 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6065 
6066 	/* set rptr, wptr to 0 */
6067 	WREG32(IH_RB_RPTR, 0);
6068 	WREG32(IH_RB_WPTR, 0);
6069 
6070 	/* Default settings for IH_CNTL (disabled at first) */
6071 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6072 	/* RPTR_REARM only works if msi's are enabled */
6073 	if (rdev->msi_enabled)
6074 		ih_cntl |= RPTR_REARM;
6075 	WREG32(IH_CNTL, ih_cntl);
6076 
6077 	/* force the active interrupt state to all disabled */
6078 	si_disable_interrupt_state(rdev);
6079 
6080 	pci_set_master(rdev->pdev);
6081 
6082 	/* enable irqs */
6083 	si_enable_interrupts(rdev);
6084 
6085 	return ret;
6086 }
6087 
6088 int si_irq_set(struct radeon_device *rdev)
6089 {
6090 	u32 cp_int_cntl;
6091 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6092 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6093 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6094 	u32 grbm_int_cntl = 0;
6095 	u32 dma_cntl, dma_cntl1;
6096 	u32 thermal_int = 0;
6097 
6098 	if (!rdev->irq.installed) {
6099 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6100 		return -EINVAL;
6101 	}
6102 	/* don't enable anything if the ih is disabled */
6103 	if (!rdev->ih.enabled) {
6104 		si_disable_interrupts(rdev);
6105 		/* force the active interrupt state to all disabled */
6106 		si_disable_interrupt_state(rdev);
6107 		return 0;
6108 	}
6109 
6110 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6111 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6112 
6113 	if (!ASIC_IS_NODCE(rdev)) {
6114 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6115 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6116 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6117 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6118 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6119 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6120 	}
6121 
6122 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6123 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6124 
6125 	thermal_int = RREG32(CG_THERMAL_INT) &
6126 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6127 
6128 	/* enable CP interrupts on all rings */
6129 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6130 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6131 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6132 	}
6133 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6134 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6135 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6136 	}
6137 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6138 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6139 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6140 	}
6141 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6142 		DRM_DEBUG("si_irq_set: sw int dma\n");
6143 		dma_cntl |= TRAP_ENABLE;
6144 	}
6145 
6146 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6147 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6148 		dma_cntl1 |= TRAP_ENABLE;
6149 	}
6150 	if (rdev->irq.crtc_vblank_int[0] ||
6151 	    atomic_read(&rdev->irq.pflip[0])) {
6152 		DRM_DEBUG("si_irq_set: vblank 0\n");
6153 		crtc1 |= VBLANK_INT_MASK;
6154 	}
6155 	if (rdev->irq.crtc_vblank_int[1] ||
6156 	    atomic_read(&rdev->irq.pflip[1])) {
6157 		DRM_DEBUG("si_irq_set: vblank 1\n");
6158 		crtc2 |= VBLANK_INT_MASK;
6159 	}
6160 	if (rdev->irq.crtc_vblank_int[2] ||
6161 	    atomic_read(&rdev->irq.pflip[2])) {
6162 		DRM_DEBUG("si_irq_set: vblank 2\n");
6163 		crtc3 |= VBLANK_INT_MASK;
6164 	}
6165 	if (rdev->irq.crtc_vblank_int[3] ||
6166 	    atomic_read(&rdev->irq.pflip[3])) {
6167 		DRM_DEBUG("si_irq_set: vblank 3\n");
6168 		crtc4 |= VBLANK_INT_MASK;
6169 	}
6170 	if (rdev->irq.crtc_vblank_int[4] ||
6171 	    atomic_read(&rdev->irq.pflip[4])) {
6172 		DRM_DEBUG("si_irq_set: vblank 4\n");
6173 		crtc5 |= VBLANK_INT_MASK;
6174 	}
6175 	if (rdev->irq.crtc_vblank_int[5] ||
6176 	    atomic_read(&rdev->irq.pflip[5])) {
6177 		DRM_DEBUG("si_irq_set: vblank 5\n");
6178 		crtc6 |= VBLANK_INT_MASK;
6179 	}
6180 	if (rdev->irq.hpd[0]) {
6181 		DRM_DEBUG("si_irq_set: hpd 1\n");
6182 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6183 	}
6184 	if (rdev->irq.hpd[1]) {
6185 		DRM_DEBUG("si_irq_set: hpd 2\n");
6186 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6187 	}
6188 	if (rdev->irq.hpd[2]) {
6189 		DRM_DEBUG("si_irq_set: hpd 3\n");
6190 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6191 	}
6192 	if (rdev->irq.hpd[3]) {
6193 		DRM_DEBUG("si_irq_set: hpd 4\n");
6194 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6195 	}
6196 	if (rdev->irq.hpd[4]) {
6197 		DRM_DEBUG("si_irq_set: hpd 5\n");
6198 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6199 	}
6200 	if (rdev->irq.hpd[5]) {
6201 		DRM_DEBUG("si_irq_set: hpd 6\n");
6202 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6203 	}
6204 
6205 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6206 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6207 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6208 
6209 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6210 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6211 
6212 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6213 
6214 	if (rdev->irq.dpm_thermal) {
6215 		DRM_DEBUG("dpm thermal\n");
6216 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6217 	}
6218 
6219 	if (rdev->num_crtc >= 2) {
6220 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6221 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6222 	}
6223 	if (rdev->num_crtc >= 4) {
6224 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6225 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6226 	}
6227 	if (rdev->num_crtc >= 6) {
6228 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6229 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6230 	}
6231 
6232 	if (rdev->num_crtc >= 2) {
6233 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6234 		       GRPH_PFLIP_INT_MASK);
6235 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6236 		       GRPH_PFLIP_INT_MASK);
6237 	}
6238 	if (rdev->num_crtc >= 4) {
6239 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6240 		       GRPH_PFLIP_INT_MASK);
6241 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6242 		       GRPH_PFLIP_INT_MASK);
6243 	}
6244 	if (rdev->num_crtc >= 6) {
6245 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6246 		       GRPH_PFLIP_INT_MASK);
6247 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6248 		       GRPH_PFLIP_INT_MASK);
6249 	}
6250 
6251 	if (!ASIC_IS_NODCE(rdev)) {
6252 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6253 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6254 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6255 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6256 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6257 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6258 	}
6259 
6260 	WREG32(CG_THERMAL_INT, thermal_int);
6261 
6262 	/* posting read */
6263 	RREG32(SRBM_STATUS);
6264 
6265 	return 0;
6266 }
6267 
6268 static inline void si_irq_ack(struct radeon_device *rdev)
6269 {
6270 	u32 tmp;
6271 
6272 	if (ASIC_IS_NODCE(rdev))
6273 		return;
6274 
6275 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6276 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6277 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6278 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6279 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6280 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6281 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6282 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6283 	if (rdev->num_crtc >= 4) {
6284 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6285 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6286 	}
6287 	if (rdev->num_crtc >= 6) {
6288 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6289 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6290 	}
6291 
6292 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6293 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6294 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6295 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6296 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6297 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6298 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6299 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6300 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6301 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6302 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6303 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6304 
6305 	if (rdev->num_crtc >= 4) {
6306 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6307 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6308 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6309 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6310 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6311 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6312 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6313 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6314 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6315 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6316 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6317 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6318 	}
6319 
6320 	if (rdev->num_crtc >= 6) {
6321 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6322 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6323 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6324 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6325 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6326 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6327 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6328 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6329 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6330 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6331 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6332 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6333 	}
6334 
6335 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6336 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6337 		tmp |= DC_HPDx_INT_ACK;
6338 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6339 	}
6340 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6341 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6342 		tmp |= DC_HPDx_INT_ACK;
6343 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6344 	}
6345 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6346 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6347 		tmp |= DC_HPDx_INT_ACK;
6348 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6349 	}
6350 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6351 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6352 		tmp |= DC_HPDx_INT_ACK;
6353 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6354 	}
6355 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6356 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6357 		tmp |= DC_HPDx_INT_ACK;
6358 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6359 	}
6360 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6361 		tmp = RREG32(DC_HPD6_INT_CONTROL);
6362 		tmp |= DC_HPDx_INT_ACK;
6363 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6364 	}
6365 
6366 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6367 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6368 		tmp |= DC_HPDx_RX_INT_ACK;
6369 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6370 	}
6371 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6372 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6373 		tmp |= DC_HPDx_RX_INT_ACK;
6374 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6375 	}
6376 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6377 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6378 		tmp |= DC_HPDx_RX_INT_ACK;
6379 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6380 	}
6381 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6382 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6383 		tmp |= DC_HPDx_RX_INT_ACK;
6384 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6385 	}
6386 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6387 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6388 		tmp |= DC_HPDx_RX_INT_ACK;
6389 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6390 	}
6391 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6392 		tmp = RREG32(DC_HPD6_INT_CONTROL);
6393 		tmp |= DC_HPDx_RX_INT_ACK;
6394 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6395 	}
6396 }
6397 
6398 static void si_irq_disable(struct radeon_device *rdev)
6399 {
6400 	si_disable_interrupts(rdev);
6401 	/* Wait and acknowledge irq */
6402 	mdelay(1);
6403 	si_irq_ack(rdev);
6404 	si_disable_interrupt_state(rdev);
6405 }
6406 
6407 static void si_irq_suspend(struct radeon_device *rdev)
6408 {
6409 	si_irq_disable(rdev);
6410 	si_rlc_stop(rdev);
6411 }
6412 
6413 static void si_irq_fini(struct radeon_device *rdev)
6414 {
6415 	si_irq_suspend(rdev);
6416 	r600_ih_ring_fini(rdev);
6417 }
6418 
6419 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6420 {
6421 	u32 wptr, tmp;
6422 
6423 	if (rdev->wb.enabled)
6424 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6425 	else
6426 		wptr = RREG32(IH_RB_WPTR);
6427 
6428 	if (wptr & RB_OVERFLOW) {
6429 		wptr &= ~RB_OVERFLOW;
6430 		/* When a ring buffer overflow happen start parsing interrupt
6431 		 * from the last not overwritten vector (wptr + 16). Hopefully
6432 		 * this should allow us to catchup.
6433 		 */
6434 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6435 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6436 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6437 		tmp = RREG32(IH_RB_CNTL);
6438 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6439 		WREG32(IH_RB_CNTL, tmp);
6440 	}
6441 	return (wptr & rdev->ih.ptr_mask);
6442 }
6443 
6444 /*        SI IV Ring
6445  * Each IV ring entry is 128 bits:
6446  * [7:0]    - interrupt source id
6447  * [31:8]   - reserved
6448  * [59:32]  - interrupt source data
6449  * [63:60]  - reserved
6450  * [71:64]  - RINGID
6451  * [79:72]  - VMID
6452  * [127:80] - reserved
6453  */
6454 irqreturn_t si_irq_process(struct radeon_device *rdev)
6455 {
6456 	u32 wptr;
6457 	u32 rptr;
6458 	u32 src_id, src_data, ring_id;
6459 	u32 ring_index;
6460 	bool queue_hotplug = false;
6461 	bool queue_dp = false;
6462 	bool queue_thermal = false;
6463 	u32 status, addr;
6464 
6465 	if (!rdev->ih.enabled || rdev->shutdown)
6466 		return IRQ_NONE;
6467 
6468 	wptr = si_get_ih_wptr(rdev);
6469 
6470 restart_ih:
6471 	/* is somebody else already processing irqs? */
6472 	if (atomic_xchg(&rdev->ih.lock, 1))
6473 		return IRQ_NONE;
6474 
6475 	rptr = rdev->ih.rptr;
6476 	DRM_DEBUG_VBLANK("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6477 
6478 	/* Order reading of wptr vs. reading of IH ring data */
6479 	rmb();
6480 
6481 	/* display interrupts */
6482 	si_irq_ack(rdev);
6483 
6484 	while (rptr != wptr) {
6485 		/* wptr/rptr are in bytes! */
6486 		ring_index = rptr / 4;
6487 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6488 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6489 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6490 
6491 		switch (src_id) {
6492 		case 1: /* D1 vblank/vline */
6493 			switch (src_data) {
6494 			case 0: /* D1 vblank */
6495 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6496 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6497 
6498 				if (rdev->irq.crtc_vblank_int[0]) {
6499 					drm_handle_vblank(rdev->ddev, 0);
6500 					rdev->pm.vblank_sync = true;
6501 					wake_up(&rdev->irq.vblank_queue);
6502 				}
6503 				if (atomic_read(&rdev->irq.pflip[0]))
6504 					radeon_crtc_handle_vblank(rdev, 0);
6505 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6506 				DRM_DEBUG_VBLANK("IH: D1 vblank\n");
6507 
6508 				break;
6509 			case 1: /* D1 vline */
6510 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6511 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6512 
6513 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6514 				DRM_DEBUG_VBLANK("IH: D1 vline\n");
6515 
6516 				break;
6517 			default:
6518 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6519 				break;
6520 			}
6521 			break;
6522 		case 2: /* D2 vblank/vline */
6523 			switch (src_data) {
6524 			case 0: /* D2 vblank */
6525 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6526 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6527 
6528 				if (rdev->irq.crtc_vblank_int[1]) {
6529 					drm_handle_vblank(rdev->ddev, 1);
6530 					rdev->pm.vblank_sync = true;
6531 					wake_up(&rdev->irq.vblank_queue);
6532 				}
6533 				if (atomic_read(&rdev->irq.pflip[1]))
6534 					radeon_crtc_handle_vblank(rdev, 1);
6535 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6536 				DRM_DEBUG_VBLANK("IH: D2 vblank\n");
6537 
6538 				break;
6539 			case 1: /* D2 vline */
6540 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6541 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6542 
6543 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6544 				DRM_DEBUG_VBLANK("IH: D2 vline\n");
6545 
6546 				break;
6547 			default:
6548 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6549 				break;
6550 			}
6551 			break;
6552 		case 3: /* D3 vblank/vline */
6553 			switch (src_data) {
6554 			case 0: /* D3 vblank */
6555 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6556 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6557 
6558 				if (rdev->irq.crtc_vblank_int[2]) {
6559 					drm_handle_vblank(rdev->ddev, 2);
6560 					rdev->pm.vblank_sync = true;
6561 					wake_up(&rdev->irq.vblank_queue);
6562 				}
6563 				if (atomic_read(&rdev->irq.pflip[2]))
6564 					radeon_crtc_handle_vblank(rdev, 2);
6565 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6566 				DRM_DEBUG_VBLANK("IH: D3 vblank\n");
6567 
6568 				break;
6569 			case 1: /* D3 vline */
6570 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6571 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6572 
6573 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6574 				DRM_DEBUG_VBLANK("IH: D3 vline\n");
6575 
6576 				break;
6577 			default:
6578 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6579 				break;
6580 			}
6581 			break;
6582 		case 4: /* D4 vblank/vline */
6583 			switch (src_data) {
6584 			case 0: /* D4 vblank */
6585 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6586 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6587 
6588 				if (rdev->irq.crtc_vblank_int[3]) {
6589 					drm_handle_vblank(rdev->ddev, 3);
6590 					rdev->pm.vblank_sync = true;
6591 					wake_up(&rdev->irq.vblank_queue);
6592 				}
6593 				if (atomic_read(&rdev->irq.pflip[3]))
6594 					radeon_crtc_handle_vblank(rdev, 3);
6595 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6596 				DRM_DEBUG_VBLANK("IH: D4 vblank\n");
6597 
6598 				break;
6599 			case 1: /* D4 vline */
6600 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6601 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6602 
6603 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6604 				DRM_DEBUG_VBLANK("IH: D4 vline\n");
6605 
6606 				break;
6607 			default:
6608 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6609 				break;
6610 			}
6611 			break;
6612 		case 5: /* D5 vblank/vline */
6613 			switch (src_data) {
6614 			case 0: /* D5 vblank */
6615 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6616 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6617 
6618 				if (rdev->irq.crtc_vblank_int[4]) {
6619 					drm_handle_vblank(rdev->ddev, 4);
6620 					rdev->pm.vblank_sync = true;
6621 					wake_up(&rdev->irq.vblank_queue);
6622 				}
6623 				if (atomic_read(&rdev->irq.pflip[4]))
6624 					radeon_crtc_handle_vblank(rdev, 4);
6625 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6626 				DRM_DEBUG_VBLANK("IH: D5 vblank\n");
6627 
6628 				break;
6629 			case 1: /* D5 vline */
6630 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6631 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6632 
6633 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6634 				DRM_DEBUG_VBLANK("IH: D5 vline\n");
6635 
6636 				break;
6637 			default:
6638 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6639 				break;
6640 			}
6641 			break;
6642 		case 6: /* D6 vblank/vline */
6643 			switch (src_data) {
6644 			case 0: /* D6 vblank */
6645 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6646 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6647 
6648 				if (rdev->irq.crtc_vblank_int[5]) {
6649 					drm_handle_vblank(rdev->ddev, 5);
6650 					rdev->pm.vblank_sync = true;
6651 					wake_up(&rdev->irq.vblank_queue);
6652 				}
6653 				if (atomic_read(&rdev->irq.pflip[5]))
6654 					radeon_crtc_handle_vblank(rdev, 5);
6655 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6656 				DRM_DEBUG_VBLANK("IH: D6 vblank\n");
6657 
6658 				break;
6659 			case 1: /* D6 vline */
6660 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6661 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6662 
6663 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6664 				DRM_DEBUG_VBLANK("IH: D6 vline\n");
6665 
6666 				break;
6667 			default:
6668 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6669 				break;
6670 			}
6671 			break;
6672 		case 8: /* D1 page flip */
6673 		case 10: /* D2 page flip */
6674 		case 12: /* D3 page flip */
6675 		case 14: /* D4 page flip */
6676 		case 16: /* D5 page flip */
6677 		case 18: /* D6 page flip */
6678 			DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6679 			if (radeon_use_pflipirq > 0)
6680 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6681 			break;
6682 		case 42: /* HPD hotplug */
6683 			switch (src_data) {
6684 			case 0:
6685 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6686 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6687 
6688 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6689 				queue_hotplug = true;
6690 				DRM_DEBUG("IH: HPD1\n");
6691 
6692 				break;
6693 			case 1:
6694 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6695 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6696 
6697 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6698 				queue_hotplug = true;
6699 				DRM_DEBUG("IH: HPD2\n");
6700 
6701 				break;
6702 			case 2:
6703 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6704 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6705 
6706 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6707 				queue_hotplug = true;
6708 				DRM_DEBUG("IH: HPD3\n");
6709 
6710 				break;
6711 			case 3:
6712 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6713 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6714 
6715 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6716 				queue_hotplug = true;
6717 				DRM_DEBUG("IH: HPD4\n");
6718 
6719 				break;
6720 			case 4:
6721 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6722 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6723 
6724 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6725 				queue_hotplug = true;
6726 				DRM_DEBUG("IH: HPD5\n");
6727 
6728 				break;
6729 			case 5:
6730 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6731 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6732 
6733 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6734 				queue_hotplug = true;
6735 				DRM_DEBUG("IH: HPD6\n");
6736 
6737 				break;
6738 			case 6:
6739 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6740 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6741 
6742 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6743 				queue_dp = true;
6744 				DRM_DEBUG("IH: HPD_RX 1\n");
6745 
6746 				break;
6747 			case 7:
6748 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6749 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6750 
6751 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6752 				queue_dp = true;
6753 				DRM_DEBUG("IH: HPD_RX 2\n");
6754 
6755 				break;
6756 			case 8:
6757 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6758 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6759 
6760 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6761 				queue_dp = true;
6762 				DRM_DEBUG("IH: HPD_RX 3\n");
6763 
6764 				break;
6765 			case 9:
6766 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6767 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6768 
6769 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6770 				queue_dp = true;
6771 				DRM_DEBUG("IH: HPD_RX 4\n");
6772 
6773 				break;
6774 			case 10:
6775 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6776 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6777 
6778 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6779 				queue_dp = true;
6780 				DRM_DEBUG("IH: HPD_RX 5\n");
6781 
6782 				break;
6783 			case 11:
6784 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6785 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6786 
6787 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6788 				queue_dp = true;
6789 				DRM_DEBUG("IH: HPD_RX 6\n");
6790 
6791 				break;
6792 			default:
6793 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6794 				break;
6795 			}
6796 			break;
6797 		case 96:
6798 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6799 			WREG32(SRBM_INT_ACK, 0x1);
6800 			break;
6801 		case 124: /* UVD */
6802 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6803 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6804 			break;
6805 		case 146:
6806 		case 147:
6807 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6808 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6809 			/* reset addr and status */
6810 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6811 			if (addr == 0x0 && status == 0x0)
6812 				break;
6813 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6814 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6815 				addr);
6816 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6817 				status);
6818 			si_vm_decode_fault(rdev, status, addr);
6819 			break;
6820 		case 176: /* RINGID0 CP_INT */
6821 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6822 			break;
6823 		case 177: /* RINGID1 CP_INT */
6824 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6825 			break;
6826 		case 178: /* RINGID2 CP_INT */
6827 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6828 			break;
6829 		case 181: /* CP EOP event */
6830 			DRM_DEBUG("IH: CP EOP\n");
6831 			switch (ring_id) {
6832 			case 0:
6833 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6834 				break;
6835 			case 1:
6836 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6837 				break;
6838 			case 2:
6839 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6840 				break;
6841 			}
6842 			break;
6843 		case 224: /* DMA trap event */
6844 			DRM_DEBUG("IH: DMA trap\n");
6845 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6846 			break;
6847 		case 230: /* thermal low to high */
6848 			DRM_DEBUG("IH: thermal low to high\n");
6849 			rdev->pm.dpm.thermal.high_to_low = false;
6850 			queue_thermal = true;
6851 			break;
6852 		case 231: /* thermal high to low */
6853 			DRM_DEBUG("IH: thermal high to low\n");
6854 			rdev->pm.dpm.thermal.high_to_low = true;
6855 			queue_thermal = true;
6856 			break;
6857 		case 233: /* GUI IDLE */
6858 			DRM_DEBUG("IH: GUI idle\n");
6859 			break;
6860 		case 244: /* DMA trap event */
6861 			DRM_DEBUG("IH: DMA1 trap\n");
6862 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6863 			break;
6864 		default:
6865 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6866 			break;
6867 		}
6868 
6869 		/* wptr/rptr are in bytes! */
6870 		rptr += 16;
6871 		rptr &= rdev->ih.ptr_mask;
6872 		WREG32(IH_RB_RPTR, rptr);
6873 	}
6874 	if (queue_dp)
6875 		schedule_work(&rdev->dp_work);
6876 	if (queue_hotplug)
6877 		schedule_delayed_work(&rdev->hotplug_work, 0);
6878 	if (queue_thermal && rdev->pm.dpm_enabled)
6879 		schedule_work(&rdev->pm.dpm.thermal.work);
6880 	rdev->ih.rptr = rptr;
6881 	atomic_set(&rdev->ih.lock, 0);
6882 
6883 	/* make sure wptr hasn't changed while processing */
6884 	wptr = si_get_ih_wptr(rdev);
6885 	if (wptr != rptr)
6886 		goto restart_ih;
6887 
6888 	return IRQ_HANDLED;
6889 }
6890 
6891 /*
6892  * startup/shutdown callbacks
6893  */
6894 static void si_uvd_init(struct radeon_device *rdev)
6895 {
6896 	int r;
6897 
6898 	if (!rdev->has_uvd)
6899 		return;
6900 
6901 	r = radeon_uvd_init(rdev);
6902 	if (r) {
6903 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6904 		/*
6905 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6906 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6907 		 * there. So it is pointless to try to go through that code
6908 		 * hence why we disable uvd here.
6909 		 */
6910 		rdev->has_uvd = 0;
6911 		return;
6912 	}
6913 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6914 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6915 }
6916 
6917 static void si_uvd_start(struct radeon_device *rdev)
6918 {
6919 	int r;
6920 
6921 	if (!rdev->has_uvd)
6922 		return;
6923 
6924 	r = uvd_v2_2_resume(rdev);
6925 	if (r) {
6926 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6927 		goto error;
6928 	}
6929 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6930 	if (r) {
6931 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6932 		goto error;
6933 	}
6934 	return;
6935 
6936 error:
6937 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6938 }
6939 
6940 static void si_uvd_resume(struct radeon_device *rdev)
6941 {
6942 	struct radeon_ring *ring;
6943 	int r;
6944 
6945 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6946 		return;
6947 
6948 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6949 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6950 	if (r) {
6951 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6952 		return;
6953 	}
6954 	r = uvd_v1_0_init(rdev);
6955 	if (r) {
6956 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6957 		return;
6958 	}
6959 }
6960 
6961 static void si_vce_init(struct radeon_device *rdev)
6962 {
6963 	int r;
6964 
6965 	if (!rdev->has_vce)
6966 		return;
6967 
6968 	r = radeon_vce_init(rdev);
6969 	if (r) {
6970 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6971 		/*
6972 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6973 		 * to early fails si_vce_start() and thus nothing happens
6974 		 * there. So it is pointless to try to go through that code
6975 		 * hence why we disable vce here.
6976 		 */
6977 		rdev->has_vce = 0;
6978 		return;
6979 	}
6980 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6981 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6982 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6983 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6984 }
6985 
6986 static void si_vce_start(struct radeon_device *rdev)
6987 {
6988 	int r;
6989 
6990 	if (!rdev->has_vce)
6991 		return;
6992 
6993 	r = radeon_vce_resume(rdev);
6994 	if (r) {
6995 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6996 		goto error;
6997 	}
6998 	r = vce_v1_0_resume(rdev);
6999 	if (r) {
7000 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
7001 		goto error;
7002 	}
7003 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
7004 	if (r) {
7005 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
7006 		goto error;
7007 	}
7008 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
7009 	if (r) {
7010 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
7011 		goto error;
7012 	}
7013 	return;
7014 
7015 error:
7016 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
7017 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
7018 }
7019 
7020 static void si_vce_resume(struct radeon_device *rdev)
7021 {
7022 	struct radeon_ring *ring;
7023 	int r;
7024 
7025 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
7026 		return;
7027 
7028 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7029 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7030 	if (r) {
7031 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7032 		return;
7033 	}
7034 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7035 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7036 	if (r) {
7037 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7038 		return;
7039 	}
7040 	r = vce_v1_0_init(rdev);
7041 	if (r) {
7042 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
7043 		return;
7044 	}
7045 }
7046 
7047 static int si_startup(struct radeon_device *rdev)
7048 {
7049 	struct radeon_ring *ring;
7050 	int r;
7051 
7052 	/* enable pcie gen2/3 link */
7053 	si_pcie_gen3_enable(rdev);
7054 	/* enable aspm */
7055 	si_program_aspm(rdev);
7056 
7057 	/* scratch needs to be initialized before MC */
7058 	r = r600_vram_scratch_init(rdev);
7059 	if (r)
7060 		return r;
7061 
7062 	si_mc_program(rdev);
7063 
7064 	if (!rdev->pm.dpm_enabled) {
7065 		r = si_mc_load_microcode(rdev);
7066 		if (r) {
7067 			DRM_ERROR("Failed to load MC firmware!\n");
7068 			return r;
7069 		}
7070 	}
7071 
7072 	r = si_pcie_gart_enable(rdev);
7073 	if (r)
7074 		return r;
7075 	si_gpu_init(rdev);
7076 
7077 	/* allocate rlc buffers */
7078 	if (rdev->family == CHIP_VERDE) {
7079 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7080 		rdev->rlc.reg_list_size =
7081 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7082 	}
7083 	rdev->rlc.cs_data = si_cs_data;
7084 	r = sumo_rlc_init(rdev);
7085 	if (r) {
7086 		DRM_ERROR("Failed to init rlc BOs!\n");
7087 		return r;
7088 	}
7089 
7090 	/* allocate wb buffer */
7091 	r = radeon_wb_init(rdev);
7092 	if (r)
7093 		return r;
7094 
7095 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7096 	if (r) {
7097 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7098 		return r;
7099 	}
7100 
7101 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7102 	if (r) {
7103 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7104 		return r;
7105 	}
7106 
7107 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7108 	if (r) {
7109 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7110 		return r;
7111 	}
7112 
7113 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7114 	if (r) {
7115 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7116 		return r;
7117 	}
7118 
7119 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7120 	if (r) {
7121 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7122 		return r;
7123 	}
7124 
7125 	si_uvd_start(rdev);
7126 	si_vce_start(rdev);
7127 
7128 	/* Enable IRQ */
7129 	if (!rdev->irq.installed) {
7130 		r = radeon_irq_kms_init(rdev);
7131 		if (r)
7132 			return r;
7133 	}
7134 
7135 	r = si_irq_init(rdev);
7136 	if (r) {
7137 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7138 		radeon_irq_kms_fini(rdev);
7139 		return r;
7140 	}
7141 	si_irq_set(rdev);
7142 
7143 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7144 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7145 			     RADEON_CP_PACKET2);
7146 	if (r)
7147 		return r;
7148 
7149 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7150 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7151 			     RADEON_CP_PACKET2);
7152 	if (r)
7153 		return r;
7154 
7155 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7156 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7157 			     RADEON_CP_PACKET2);
7158 	if (r)
7159 		return r;
7160 
7161 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7162 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7163 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7164 	if (r)
7165 		return r;
7166 
7167 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7168 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7169 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7170 	if (r)
7171 		return r;
7172 
7173 	r = si_cp_load_microcode(rdev);
7174 	if (r)
7175 		return r;
7176 	r = si_cp_resume(rdev);
7177 	if (r)
7178 		return r;
7179 
7180 	r = cayman_dma_resume(rdev);
7181 	if (r)
7182 		return r;
7183 
7184 	si_uvd_resume(rdev);
7185 	si_vce_resume(rdev);
7186 
7187 	r = radeon_ib_pool_init(rdev);
7188 	if (r) {
7189 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7190 		return r;
7191 	}
7192 
7193 	r = radeon_vm_manager_init(rdev);
7194 	if (r) {
7195 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7196 		return r;
7197 	}
7198 
7199 	r = radeon_audio_init(rdev);
7200 	if (r)
7201 		return r;
7202 
7203 	return 0;
7204 }
7205 
7206 int si_resume(struct radeon_device *rdev)
7207 {
7208 	int r;
7209 
7210 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7211 	 * posting will perform necessary task to bring back GPU into good
7212 	 * shape.
7213 	 */
7214 	/* post card */
7215 	atom_asic_init(rdev->mode_info.atom_context);
7216 
7217 	/* init golden registers */
7218 	si_init_golden_registers(rdev);
7219 
7220 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7221 		radeon_pm_resume(rdev);
7222 
7223 	rdev->accel_working = true;
7224 	r = si_startup(rdev);
7225 	if (r) {
7226 		DRM_ERROR("si startup failed on resume\n");
7227 		rdev->accel_working = false;
7228 		return r;
7229 	}
7230 
7231 	return r;
7232 
7233 }
7234 
7235 int si_suspend(struct radeon_device *rdev)
7236 {
7237 	radeon_pm_suspend(rdev);
7238 	radeon_audio_fini(rdev);
7239 	radeon_vm_manager_fini(rdev);
7240 	si_cp_enable(rdev, false);
7241 	cayman_dma_stop(rdev);
7242 	if (rdev->has_uvd) {
7243 		uvd_v1_0_fini(rdev);
7244 		radeon_uvd_suspend(rdev);
7245 	}
7246 	if (rdev->has_vce)
7247 		radeon_vce_suspend(rdev);
7248 	si_fini_pg(rdev);
7249 	si_fini_cg(rdev);
7250 	si_irq_suspend(rdev);
7251 	radeon_wb_disable(rdev);
7252 	si_pcie_gart_disable(rdev);
7253 	return 0;
7254 }
7255 
7256 /* Plan is to move initialization in that function and use
7257  * helper function so that radeon_device_init pretty much
7258  * do nothing more than calling asic specific function. This
7259  * should also allow to remove a bunch of callback function
7260  * like vram_info.
7261  */
7262 int si_init(struct radeon_device *rdev)
7263 {
7264 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7265 	int r;
7266 
7267 	/* Read BIOS */
7268 	if (!radeon_get_bios(rdev)) {
7269 		if (ASIC_IS_AVIVO(rdev))
7270 			return -EINVAL;
7271 	}
7272 	/* Must be an ATOMBIOS */
7273 	if (!rdev->is_atom_bios) {
7274 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7275 		return -EINVAL;
7276 	}
7277 	r = radeon_atombios_init(rdev);
7278 	if (r)
7279 		return r;
7280 
7281 	/* Post card if necessary */
7282 	if (!radeon_card_posted(rdev)) {
7283 		if (!rdev->bios) {
7284 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7285 			return -EINVAL;
7286 		}
7287 		DRM_INFO("GPU not posted. posting now...\n");
7288 		atom_asic_init(rdev->mode_info.atom_context);
7289 	}
7290 	/* init golden registers */
7291 	si_init_golden_registers(rdev);
7292 	/* Initialize scratch registers */
7293 	si_scratch_init(rdev);
7294 	/* Initialize surface registers */
7295 	radeon_surface_init(rdev);
7296 	/* Initialize clocks */
7297 	radeon_get_clock_info(rdev->ddev);
7298 
7299 	/* Fence driver */
7300 	r = radeon_fence_driver_init(rdev);
7301 	if (r)
7302 		return r;
7303 
7304 	/* initialize memory controller */
7305 	r = si_mc_init(rdev);
7306 	if (r)
7307 		return r;
7308 	/* Memory manager */
7309 	r = radeon_bo_init(rdev);
7310 	if (r)
7311 		return r;
7312 
7313 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7314 	    !rdev->rlc_fw || !rdev->mc_fw) {
7315 		r = si_init_microcode(rdev);
7316 		if (r) {
7317 			DRM_ERROR("Failed to load firmware!\n");
7318 			return r;
7319 		}
7320 	}
7321 
7322 	/* Initialize power management */
7323 	radeon_pm_init(rdev);
7324 
7325 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7326 	ring->ring_obj = NULL;
7327 	r600_ring_init(rdev, ring, 1024 * 1024);
7328 
7329 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7330 	ring->ring_obj = NULL;
7331 	r600_ring_init(rdev, ring, 1024 * 1024);
7332 
7333 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7334 	ring->ring_obj = NULL;
7335 	r600_ring_init(rdev, ring, 1024 * 1024);
7336 
7337 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7338 	ring->ring_obj = NULL;
7339 	r600_ring_init(rdev, ring, 64 * 1024);
7340 
7341 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7342 	ring->ring_obj = NULL;
7343 	r600_ring_init(rdev, ring, 64 * 1024);
7344 
7345 	si_uvd_init(rdev);
7346 	si_vce_init(rdev);
7347 
7348 	rdev->ih.ring_obj = NULL;
7349 	r600_ih_ring_init(rdev, 64 * 1024);
7350 
7351 	r = r600_pcie_gart_init(rdev);
7352 	if (r)
7353 		return r;
7354 
7355 #ifdef __DragonFly__
7356 	/*
7357 	   Some glx operations (xfce 4.14) hang on si hardware,
7358 	   tell userland acceleration is not working properly
7359 	*/
7360 	rdev->accel_working = false;
7361 	DRM_ERROR("GPU acceleration disabled for now on DragonFly\n");
7362 #else
7363 	rdev->accel_working = true;
7364 #endif
7365 	r = si_startup(rdev);
7366 	if (r) {
7367 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7368 		si_cp_fini(rdev);
7369 		cayman_dma_fini(rdev);
7370 		si_irq_fini(rdev);
7371 		sumo_rlc_fini(rdev);
7372 		radeon_wb_fini(rdev);
7373 		radeon_ib_pool_fini(rdev);
7374 		radeon_vm_manager_fini(rdev);
7375 		radeon_irq_kms_fini(rdev);
7376 		si_pcie_gart_fini(rdev);
7377 		rdev->accel_working = false;
7378 	}
7379 
7380 	/* Don't start up if the MC ucode is missing.
7381 	 * The default clocks and voltages before the MC ucode
7382 	 * is loaded are not suffient for advanced operations.
7383 	 */
7384 	if (!rdev->mc_fw) {
7385 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7386 		return -EINVAL;
7387 	}
7388 
7389 	return 0;
7390 }
7391 
7392 void si_fini(struct radeon_device *rdev)
7393 {
7394 	radeon_pm_fini(rdev);
7395 	si_cp_fini(rdev);
7396 	cayman_dma_fini(rdev);
7397 	si_fini_pg(rdev);
7398 	si_fini_cg(rdev);
7399 	si_irq_fini(rdev);
7400 	sumo_rlc_fini(rdev);
7401 	radeon_wb_fini(rdev);
7402 	radeon_vm_manager_fini(rdev);
7403 	radeon_ib_pool_fini(rdev);
7404 	radeon_irq_kms_fini(rdev);
7405 	if (rdev->has_uvd) {
7406 		uvd_v1_0_fini(rdev);
7407 		radeon_uvd_fini(rdev);
7408 	}
7409 	if (rdev->has_vce)
7410 		radeon_vce_fini(rdev);
7411 	si_pcie_gart_fini(rdev);
7412 	r600_vram_scratch_fini(rdev);
7413 	radeon_gem_fini(rdev);
7414 	radeon_fence_driver_fini(rdev);
7415 	radeon_bo_fini(rdev);
7416 	radeon_atombios_fini(rdev);
7417 	si_fini_microcode(rdev);
7418 	kfree(rdev->bios);
7419 	rdev->bios = NULL;
7420 }
7421 
7422 /**
7423  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7424  *
7425  * @rdev: radeon_device pointer
7426  *
7427  * Fetches a GPU clock counter snapshot (SI).
7428  * Returns the 64 bit clock counter snapshot.
7429  */
7430 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7431 {
7432 	uint64_t clock;
7433 
7434 	mutex_lock(&rdev->gpu_clock_mutex);
7435 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7436 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7437 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7438 	mutex_unlock(&rdev->gpu_clock_mutex);
7439 	return clock;
7440 }
7441 
7442 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7443 {
7444 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7445 	int r;
7446 
7447 	/* bypass vclk and dclk with bclk */
7448 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7449 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7450 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7451 
7452 	/* put PLL in bypass mode */
7453 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7454 
7455 	if (!vclk || !dclk) {
7456 		/* keep the Bypass mode */
7457 		return 0;
7458 	}
7459 
7460 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7461 					  16384, 0x03FFFFFF, 0, 128, 5,
7462 					  &fb_div, &vclk_div, &dclk_div);
7463 	if (r)
7464 		return r;
7465 
7466 	/* set RESET_ANTI_MUX to 0 */
7467 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7468 
7469 	/* set VCO_MODE to 1 */
7470 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7471 
7472 	/* disable sleep mode */
7473 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7474 
7475 	/* deassert UPLL_RESET */
7476 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7477 
7478 	mdelay(1);
7479 
7480 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7481 	if (r)
7482 		return r;
7483 
7484 	/* assert UPLL_RESET again */
7485 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7486 
7487 	/* disable spread spectrum. */
7488 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7489 
7490 	/* set feedback divider */
7491 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7492 
7493 	/* set ref divider to 0 */
7494 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7495 
7496 	if (fb_div < 307200)
7497 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7498 	else
7499 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7500 
7501 	/* set PDIV_A and PDIV_B */
7502 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7503 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7504 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7505 
7506 	/* give the PLL some time to settle */
7507 	mdelay(15);
7508 
7509 	/* deassert PLL_RESET */
7510 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7511 
7512 	mdelay(15);
7513 
7514 	/* switch from bypass mode to normal mode */
7515 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7516 
7517 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7518 	if (r)
7519 		return r;
7520 
7521 	/* switch VCLK and DCLK selection */
7522 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7523 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7524 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7525 
7526 	mdelay(100);
7527 
7528 	return 0;
7529 }
7530 
7531 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7532 {
7533 	struct pci_dev *root = rdev->pdev->bus->self;
7534 	int bridge_pos, gpu_pos;
7535 	u32 speed_cntl, mask, current_data_rate;
7536 	int ret, i;
7537 	u16 tmp16;
7538 
7539 #if 0
7540 	if (pci_is_root_bus(rdev->pdev->bus))
7541 		return;
7542 #endif
7543 
7544 	if (radeon_pcie_gen2 == 0)
7545 		return;
7546 
7547 	if (rdev->flags & RADEON_IS_IGP)
7548 		return;
7549 
7550 	if (!(rdev->flags & RADEON_IS_PCIE))
7551 		return;
7552 
7553 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7554 	if (ret != 0)
7555 		return;
7556 
7557 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7558 		return;
7559 
7560 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7561 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7562 		LC_CURRENT_DATA_RATE_SHIFT;
7563 	if (mask & DRM_PCIE_SPEED_80) {
7564 		if (current_data_rate == 2) {
7565 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7566 			return;
7567 		}
7568 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7569 	} else if (mask & DRM_PCIE_SPEED_50) {
7570 		if (current_data_rate == 1) {
7571 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7572 			return;
7573 		}
7574 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7575 	}
7576 
7577 	bridge_pos = pci_pcie_cap(root);
7578 	if (!bridge_pos)
7579 		return;
7580 
7581 	gpu_pos = pci_pcie_cap(rdev->pdev);
7582 	if (!gpu_pos)
7583 		return;
7584 
7585 	if (mask & DRM_PCIE_SPEED_80) {
7586 		/* re-try equalization if gen3 is not already enabled */
7587 		if (current_data_rate != 2) {
7588 			u16 bridge_cfg, gpu_cfg;
7589 			u16 bridge_cfg2, gpu_cfg2;
7590 			u32 max_lw, current_lw, tmp;
7591 
7592 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7593 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7594 
7595 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7596 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7597 
7598 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7599 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7600 
7601 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7602 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7603 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7604 
7605 			if (current_lw < max_lw) {
7606 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7607 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7608 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7609 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7610 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7611 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7612 				}
7613 			}
7614 
7615 			for (i = 0; i < 10; i++) {
7616 				/* check status */
7617 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7618 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7619 					break;
7620 
7621 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7622 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7623 
7624 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7625 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7626 
7627 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7628 				tmp |= LC_SET_QUIESCE;
7629 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7630 
7631 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7632 				tmp |= LC_REDO_EQ;
7633 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7634 
7635 				mdelay(100);
7636 
7637 				/* linkctl */
7638 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7639 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7640 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7641 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7642 
7643 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7644 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7645 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7646 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7647 
7648 				/* linkctl2 */
7649 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7650 				tmp16 &= ~((1 << 4) | (7 << 9));
7651 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7652 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7653 
7654 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7655 				tmp16 &= ~((1 << 4) | (7 << 9));
7656 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7657 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7658 
7659 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7660 				tmp &= ~LC_SET_QUIESCE;
7661 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7662 			}
7663 		}
7664 	}
7665 
7666 	/* set the link speed */
7667 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7668 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7669 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7670 
7671 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7672 	tmp16 &= ~0xf;
7673 	if (mask & DRM_PCIE_SPEED_80)
7674 		tmp16 |= 3; /* gen3 */
7675 	else if (mask & DRM_PCIE_SPEED_50)
7676 		tmp16 |= 2; /* gen2 */
7677 	else
7678 		tmp16 |= 1; /* gen1 */
7679 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7680 
7681 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7682 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7683 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7684 
7685 	for (i = 0; i < rdev->usec_timeout; i++) {
7686 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7687 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7688 			break;
7689 		udelay(1);
7690 	}
7691 }
7692 
7693 static void si_program_aspm(struct radeon_device *rdev)
7694 {
7695 	u32 data, orig;
7696 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7697 #if 0
7698 	bool disable_clkreq = false;
7699 #endif
7700 
7701 	if (radeon_aspm == 0)
7702 		return;
7703 
7704 	if (!(rdev->flags & RADEON_IS_PCIE))
7705 		return;
7706 
7707 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7708 	data &= ~LC_XMIT_N_FTS_MASK;
7709 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7710 	if (orig != data)
7711 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7712 
7713 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7714 	data |= LC_GO_TO_RECOVERY;
7715 	if (orig != data)
7716 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7717 
7718 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7719 	data |= P_IGNORE_EDB_ERR;
7720 	if (orig != data)
7721 		WREG32_PCIE(PCIE_P_CNTL, data);
7722 
7723 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7724 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7725 	data |= LC_PMI_TO_L1_DIS;
7726 	if (!disable_l0s)
7727 		data |= LC_L0S_INACTIVITY(7);
7728 
7729 	if (!disable_l1) {
7730 		data |= LC_L1_INACTIVITY(7);
7731 		data &= ~LC_PMI_TO_L1_DIS;
7732 		if (orig != data)
7733 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7734 
7735 		if (!disable_plloff_in_l1) {
7736 			bool clk_req_support;
7737 
7738 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7739 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7740 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7741 			if (orig != data)
7742 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7743 
7744 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7745 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7746 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7747 			if (orig != data)
7748 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7749 
7750 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7751 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7752 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7753 			if (orig != data)
7754 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7755 
7756 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7757 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7758 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7759 			if (orig != data)
7760 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7761 
7762 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7763 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7764 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7765 				if (orig != data)
7766 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7767 
7768 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7769 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7770 				if (orig != data)
7771 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7772 
7773 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7774 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7775 				if (orig != data)
7776 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7777 
7778 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7779 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7780 				if (orig != data)
7781 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7782 
7783 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7784 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7785 				if (orig != data)
7786 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7787 
7788 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7789 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7790 				if (orig != data)
7791 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7792 
7793 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7794 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7795 				if (orig != data)
7796 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7797 
7798 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7799 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7800 				if (orig != data)
7801 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7802 			}
7803 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7804 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7805 			data |= LC_DYN_LANES_PWR_STATE(3);
7806 			if (orig != data)
7807 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7808 
7809 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7810 			data &= ~LS2_EXIT_TIME_MASK;
7811 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7812 				data |= LS2_EXIT_TIME(5);
7813 			if (orig != data)
7814 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7815 
7816 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7817 			data &= ~LS2_EXIT_TIME_MASK;
7818 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7819 				data |= LS2_EXIT_TIME(5);
7820 			if (orig != data)
7821 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7822 
7823 #ifdef zMN_TODO
7824 			if (!disable_clkreq &&
7825 			    !pci_is_root_bus(rdev->pdev->bus)) {
7826 				struct pci_dev *root = rdev->pdev->bus->self;
7827 				u32 lnkcap;
7828 
7829 				clk_req_support = false;
7830 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7831 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7832 					clk_req_support = true;
7833 			} else {
7834 				clk_req_support = false;
7835 			}
7836 #else
7837 			clk_req_support = false;
7838 #endif
7839 
7840 			if (clk_req_support) {
7841 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7842 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7843 				if (orig != data)
7844 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7845 
7846 				orig = data = RREG32(THM_CLK_CNTL);
7847 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7848 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7849 				if (orig != data)
7850 					WREG32(THM_CLK_CNTL, data);
7851 
7852 				orig = data = RREG32(MISC_CLK_CNTL);
7853 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7854 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7855 				if (orig != data)
7856 					WREG32(MISC_CLK_CNTL, data);
7857 
7858 				orig = data = RREG32(CG_CLKPIN_CNTL);
7859 				data &= ~BCLK_AS_XCLK;
7860 				if (orig != data)
7861 					WREG32(CG_CLKPIN_CNTL, data);
7862 
7863 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7864 				data &= ~FORCE_BIF_REFCLK_EN;
7865 				if (orig != data)
7866 					WREG32(CG_CLKPIN_CNTL_2, data);
7867 
7868 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7869 				data &= ~MPLL_CLKOUT_SEL_MASK;
7870 				data |= MPLL_CLKOUT_SEL(4);
7871 				if (orig != data)
7872 					WREG32(MPLL_BYPASSCLK_SEL, data);
7873 
7874 				orig = data = RREG32(SPLL_CNTL_MODE);
7875 				data &= ~SPLL_REFCLK_SEL_MASK;
7876 				if (orig != data)
7877 					WREG32(SPLL_CNTL_MODE, data);
7878 			}
7879 		}
7880 	} else {
7881 		if (orig != data)
7882 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7883 	}
7884 
7885 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7886 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7887 	if (orig != data)
7888 		WREG32_PCIE(PCIE_CNTL2, data);
7889 
7890 	if (!disable_l0s) {
7891 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7892 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7893 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7894 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7895 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7896 				data &= ~LC_L0S_INACTIVITY_MASK;
7897 				if (orig != data)
7898 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7899 			}
7900 		}
7901 	}
7902 }
7903 
7904 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7905 {
7906 	unsigned i;
7907 
7908 	/* make sure VCEPLL_CTLREQ is deasserted */
7909 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7910 
7911 	mdelay(10);
7912 
7913 	/* assert UPLL_CTLREQ */
7914 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7915 
7916 	/* wait for CTLACK and CTLACK2 to get asserted */
7917 	for (i = 0; i < 100; ++i) {
7918 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7919 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7920 			break;
7921 		mdelay(10);
7922 	}
7923 
7924 	/* deassert UPLL_CTLREQ */
7925 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7926 
7927 	if (i == 100) {
7928 		DRM_ERROR("Timeout setting UVD clocks!\n");
7929 		return -ETIMEDOUT;
7930 	}
7931 
7932 	return 0;
7933 }
7934 
7935 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7936 {
7937 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7938 	int r;
7939 
7940 	/* bypass evclk and ecclk with bclk */
7941 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7942 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7943 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7944 
7945 	/* put PLL in bypass mode */
7946 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7947 		     ~VCEPLL_BYPASS_EN_MASK);
7948 
7949 	if (!evclk || !ecclk) {
7950 		/* keep the Bypass mode, put PLL to sleep */
7951 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7952 			     ~VCEPLL_SLEEP_MASK);
7953 		return 0;
7954 	}
7955 
7956 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7957 					  16384, 0x03FFFFFF, 0, 128, 5,
7958 					  &fb_div, &evclk_div, &ecclk_div);
7959 	if (r)
7960 		return r;
7961 
7962 	/* set RESET_ANTI_MUX to 0 */
7963 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7964 
7965 	/* set VCO_MODE to 1 */
7966 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7967 		     ~VCEPLL_VCO_MODE_MASK);
7968 
7969 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7970 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7971 		     ~VCEPLL_SLEEP_MASK);
7972 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7973 
7974 	/* deassert VCEPLL_RESET */
7975 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7976 
7977 	mdelay(1);
7978 
7979 	r = si_vce_send_vcepll_ctlreq(rdev);
7980 	if (r)
7981 		return r;
7982 
7983 	/* assert VCEPLL_RESET again */
7984 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7985 
7986 	/* disable spread spectrum. */
7987 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7988 
7989 	/* set feedback divider */
7990 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7991 
7992 	/* set ref divider to 0 */
7993 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7994 
7995 	/* set PDIV_A and PDIV_B */
7996 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7997 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7998 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7999 
8000 	/* give the PLL some time to settle */
8001 	mdelay(15);
8002 
8003 	/* deassert PLL_RESET */
8004 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
8005 
8006 	mdelay(15);
8007 
8008 	/* switch from bypass mode to normal mode */
8009 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
8010 
8011 	r = si_vce_send_vcepll_ctlreq(rdev);
8012 	if (r)
8013 		return r;
8014 
8015 	/* switch VCLK and DCLK selection */
8016 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
8017 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
8018 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
8019 
8020 	mdelay(100);
8021 
8022 	return 0;
8023 }
8024