xref: /dragonfly/sys/dev/drm/radeon/si.c (revision 40657594)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "radeon_audio.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
47 MODULE_FIRMWARE("radeon/tahiti_me.bin");
48 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
49 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
50 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_k_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61 
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
69 
70 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
71 MODULE_FIRMWARE("radeon/VERDE_me.bin");
72 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
75 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
76 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
77 
78 MODULE_FIRMWARE("radeon/verde_pfp.bin");
79 MODULE_FIRMWARE("radeon/verde_me.bin");
80 MODULE_FIRMWARE("radeon/verde_ce.bin");
81 MODULE_FIRMWARE("radeon/verde_mc.bin");
82 MODULE_FIRMWARE("radeon/verde_rlc.bin");
83 MODULE_FIRMWARE("radeon/verde_smc.bin");
84 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
85 
86 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
87 MODULE_FIRMWARE("radeon/OLAND_me.bin");
88 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
89 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
91 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
92 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
93 
94 MODULE_FIRMWARE("radeon/oland_pfp.bin");
95 MODULE_FIRMWARE("radeon/oland_me.bin");
96 MODULE_FIRMWARE("radeon/oland_ce.bin");
97 MODULE_FIRMWARE("radeon/oland_mc.bin");
98 MODULE_FIRMWARE("radeon/oland_rlc.bin");
99 MODULE_FIRMWARE("radeon/oland_smc.bin");
100 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
101 
102 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
109 
110 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
111 MODULE_FIRMWARE("radeon/hainan_me.bin");
112 MODULE_FIRMWARE("radeon/hainan_ce.bin");
113 MODULE_FIRMWARE("radeon/hainan_mc.bin");
114 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
115 MODULE_FIRMWARE("radeon/hainan_smc.bin");
116 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
117 
118 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
119 static void si_pcie_gen3_enable(struct radeon_device *rdev);
120 static void si_program_aspm(struct radeon_device *rdev);
121 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
122 					 bool enable);
123 static void si_init_pg(struct radeon_device *rdev);
124 static void si_init_cg(struct radeon_device *rdev);
125 static void si_fini_pg(struct radeon_device *rdev);
126 static void si_fini_cg(struct radeon_device *rdev);
127 static void si_rlc_stop(struct radeon_device *rdev);
128 
129 static const u32 verde_rlc_save_restore_register_list[] =
130 {
131 	(0x8000 << 16) | (0x98f4 >> 2),
132 	0x00000000,
133 	(0x8040 << 16) | (0x98f4 >> 2),
134 	0x00000000,
135 	(0x8000 << 16) | (0xe80 >> 2),
136 	0x00000000,
137 	(0x8040 << 16) | (0xe80 >> 2),
138 	0x00000000,
139 	(0x8000 << 16) | (0x89bc >> 2),
140 	0x00000000,
141 	(0x8040 << 16) | (0x89bc >> 2),
142 	0x00000000,
143 	(0x8000 << 16) | (0x8c1c >> 2),
144 	0x00000000,
145 	(0x8040 << 16) | (0x8c1c >> 2),
146 	0x00000000,
147 	(0x9c00 << 16) | (0x98f0 >> 2),
148 	0x00000000,
149 	(0x9c00 << 16) | (0xe7c >> 2),
150 	0x00000000,
151 	(0x8000 << 16) | (0x9148 >> 2),
152 	0x00000000,
153 	(0x8040 << 16) | (0x9148 >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0x9150 >> 2),
156 	0x00000000,
157 	(0x9c00 << 16) | (0x897c >> 2),
158 	0x00000000,
159 	(0x9c00 << 16) | (0x8d8c >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0xac54 >> 2),
162 	0X00000000,
163 	0x3,
164 	(0x9c00 << 16) | (0x98f8 >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x9910 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9914 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x9918 >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x991c >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x9920 >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0x9924 >> 2),
177 	0x00000000,
178 	(0x9c00 << 16) | (0x9928 >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x992c >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x9930 >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x9934 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x9938 >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x993c >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x9940 >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x9944 >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0x9948 >> 2),
195 	0x00000000,
196 	(0x9c00 << 16) | (0x994c >> 2),
197 	0x00000000,
198 	(0x9c00 << 16) | (0x9950 >> 2),
199 	0x00000000,
200 	(0x9c00 << 16) | (0x9954 >> 2),
201 	0x00000000,
202 	(0x9c00 << 16) | (0x9958 >> 2),
203 	0x00000000,
204 	(0x9c00 << 16) | (0x995c >> 2),
205 	0x00000000,
206 	(0x9c00 << 16) | (0x9960 >> 2),
207 	0x00000000,
208 	(0x9c00 << 16) | (0x9964 >> 2),
209 	0x00000000,
210 	(0x9c00 << 16) | (0x9968 >> 2),
211 	0x00000000,
212 	(0x9c00 << 16) | (0x996c >> 2),
213 	0x00000000,
214 	(0x9c00 << 16) | (0x9970 >> 2),
215 	0x00000000,
216 	(0x9c00 << 16) | (0x9974 >> 2),
217 	0x00000000,
218 	(0x9c00 << 16) | (0x9978 >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x997c >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x9980 >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x9984 >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x9988 >> 2),
227 	0x00000000,
228 	(0x9c00 << 16) | (0x998c >> 2),
229 	0x00000000,
230 	(0x9c00 << 16) | (0x8c00 >> 2),
231 	0x00000000,
232 	(0x9c00 << 16) | (0x8c14 >> 2),
233 	0x00000000,
234 	(0x9c00 << 16) | (0x8c04 >> 2),
235 	0x00000000,
236 	(0x9c00 << 16) | (0x8c08 >> 2),
237 	0x00000000,
238 	(0x8000 << 16) | (0x9b7c >> 2),
239 	0x00000000,
240 	(0x8040 << 16) | (0x9b7c >> 2),
241 	0x00000000,
242 	(0x8000 << 16) | (0xe84 >> 2),
243 	0x00000000,
244 	(0x8040 << 16) | (0xe84 >> 2),
245 	0x00000000,
246 	(0x8000 << 16) | (0x89c0 >> 2),
247 	0x00000000,
248 	(0x8040 << 16) | (0x89c0 >> 2),
249 	0x00000000,
250 	(0x8000 << 16) | (0x914c >> 2),
251 	0x00000000,
252 	(0x8040 << 16) | (0x914c >> 2),
253 	0x00000000,
254 	(0x8000 << 16) | (0x8c20 >> 2),
255 	0x00000000,
256 	(0x8040 << 16) | (0x8c20 >> 2),
257 	0x00000000,
258 	(0x8000 << 16) | (0x9354 >> 2),
259 	0x00000000,
260 	(0x8040 << 16) | (0x9354 >> 2),
261 	0x00000000,
262 	(0x9c00 << 16) | (0x9060 >> 2),
263 	0x00000000,
264 	(0x9c00 << 16) | (0x9364 >> 2),
265 	0x00000000,
266 	(0x9c00 << 16) | (0x9100 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0x913c >> 2),
269 	0x00000000,
270 	(0x8000 << 16) | (0x90e0 >> 2),
271 	0x00000000,
272 	(0x8000 << 16) | (0x90e4 >> 2),
273 	0x00000000,
274 	(0x8000 << 16) | (0x90e8 >> 2),
275 	0x00000000,
276 	(0x8040 << 16) | (0x90e0 >> 2),
277 	0x00000000,
278 	(0x8040 << 16) | (0x90e4 >> 2),
279 	0x00000000,
280 	(0x8040 << 16) | (0x90e8 >> 2),
281 	0x00000000,
282 	(0x9c00 << 16) | (0x8bcc >> 2),
283 	0x00000000,
284 	(0x9c00 << 16) | (0x8b24 >> 2),
285 	0x00000000,
286 	(0x9c00 << 16) | (0x88c4 >> 2),
287 	0x00000000,
288 	(0x9c00 << 16) | (0x8e50 >> 2),
289 	0x00000000,
290 	(0x9c00 << 16) | (0x8c0c >> 2),
291 	0x00000000,
292 	(0x9c00 << 16) | (0x8e58 >> 2),
293 	0x00000000,
294 	(0x9c00 << 16) | (0x8e5c >> 2),
295 	0x00000000,
296 	(0x9c00 << 16) | (0x9508 >> 2),
297 	0x00000000,
298 	(0x9c00 << 16) | (0x950c >> 2),
299 	0x00000000,
300 	(0x9c00 << 16) | (0x9494 >> 2),
301 	0x00000000,
302 	(0x9c00 << 16) | (0xac0c >> 2),
303 	0x00000000,
304 	(0x9c00 << 16) | (0xac10 >> 2),
305 	0x00000000,
306 	(0x9c00 << 16) | (0xac14 >> 2),
307 	0x00000000,
308 	(0x9c00 << 16) | (0xae00 >> 2),
309 	0x00000000,
310 	(0x9c00 << 16) | (0xac08 >> 2),
311 	0x00000000,
312 	(0x9c00 << 16) | (0x88d4 >> 2),
313 	0x00000000,
314 	(0x9c00 << 16) | (0x88c8 >> 2),
315 	0x00000000,
316 	(0x9c00 << 16) | (0x88cc >> 2),
317 	0x00000000,
318 	(0x9c00 << 16) | (0x89b0 >> 2),
319 	0x00000000,
320 	(0x9c00 << 16) | (0x8b10 >> 2),
321 	0x00000000,
322 	(0x9c00 << 16) | (0x8a14 >> 2),
323 	0x00000000,
324 	(0x9c00 << 16) | (0x9830 >> 2),
325 	0x00000000,
326 	(0x9c00 << 16) | (0x9834 >> 2),
327 	0x00000000,
328 	(0x9c00 << 16) | (0x9838 >> 2),
329 	0x00000000,
330 	(0x9c00 << 16) | (0x9a10 >> 2),
331 	0x00000000,
332 	(0x8000 << 16) | (0x9870 >> 2),
333 	0x00000000,
334 	(0x8000 << 16) | (0x9874 >> 2),
335 	0x00000000,
336 	(0x8001 << 16) | (0x9870 >> 2),
337 	0x00000000,
338 	(0x8001 << 16) | (0x9874 >> 2),
339 	0x00000000,
340 	(0x8040 << 16) | (0x9870 >> 2),
341 	0x00000000,
342 	(0x8040 << 16) | (0x9874 >> 2),
343 	0x00000000,
344 	(0x8041 << 16) | (0x9870 >> 2),
345 	0x00000000,
346 	(0x8041 << 16) | (0x9874 >> 2),
347 	0x00000000,
348 	0x00000000
349 };
350 
351 static const u32 tahiti_golden_rlc_registers[] =
352 {
353 	0xc424, 0xffffffff, 0x00601005,
354 	0xc47c, 0xffffffff, 0x10104040,
355 	0xc488, 0xffffffff, 0x0100000a,
356 	0xc314, 0xffffffff, 0x00000800,
357 	0xc30c, 0xffffffff, 0x800000f4,
358 	0xf4a8, 0xffffffff, 0x00000000
359 };
360 
361 static const u32 tahiti_golden_registers[] =
362 {
363 	0x9a10, 0x00010000, 0x00018208,
364 	0x9830, 0xffffffff, 0x00000000,
365 	0x9834, 0xf00fffff, 0x00000400,
366 	0x9838, 0x0002021c, 0x00020200,
367 	0xc78, 0x00000080, 0x00000000,
368 	0xd030, 0x000300c0, 0x00800040,
369 	0xd830, 0x000300c0, 0x00800040,
370 	0x5bb0, 0x000000f0, 0x00000070,
371 	0x5bc0, 0x00200000, 0x50100000,
372 	0x7030, 0x31000311, 0x00000011,
373 	0x277c, 0x00000003, 0x000007ff,
374 	0x240c, 0x000007ff, 0x00000000,
375 	0x8a14, 0xf000001f, 0x00000007,
376 	0x8b24, 0xffffffff, 0x00ffffff,
377 	0x8b10, 0x0000ff0f, 0x00000000,
378 	0x28a4c, 0x07ffffff, 0x4e000000,
379 	0x28350, 0x3f3f3fff, 0x2a00126a,
380 	0x30, 0x000000ff, 0x0040,
381 	0x34, 0x00000040, 0x00004040,
382 	0x9100, 0x07ffffff, 0x03000000,
383 	0x8e88, 0x01ff1f3f, 0x00000000,
384 	0x8e84, 0x01ff1f3f, 0x00000000,
385 	0x9060, 0x0000007f, 0x00000020,
386 	0x9508, 0x00010000, 0x00010000,
387 	0xac14, 0x00000200, 0x000002fb,
388 	0xac10, 0xffffffff, 0x0000543b,
389 	0xac0c, 0xffffffff, 0xa9210876,
390 	0x88d0, 0xffffffff, 0x000fff40,
391 	0x88d4, 0x0000001f, 0x00000010,
392 	0x1410, 0x20000000, 0x20fffed8,
393 	0x15c0, 0x000c0fc0, 0x000c0400
394 };
395 
396 static const u32 tahiti_golden_registers2[] =
397 {
398 	0xc64, 0x00000001, 0x00000001
399 };
400 
401 static const u32 pitcairn_golden_rlc_registers[] =
402 {
403 	0xc424, 0xffffffff, 0x00601004,
404 	0xc47c, 0xffffffff, 0x10102020,
405 	0xc488, 0xffffffff, 0x01000020,
406 	0xc314, 0xffffffff, 0x00000800,
407 	0xc30c, 0xffffffff, 0x800000a4
408 };
409 
410 static const u32 pitcairn_golden_registers[] =
411 {
412 	0x9a10, 0x00010000, 0x00018208,
413 	0x9830, 0xffffffff, 0x00000000,
414 	0x9834, 0xf00fffff, 0x00000400,
415 	0x9838, 0x0002021c, 0x00020200,
416 	0xc78, 0x00000080, 0x00000000,
417 	0xd030, 0x000300c0, 0x00800040,
418 	0xd830, 0x000300c0, 0x00800040,
419 	0x5bb0, 0x000000f0, 0x00000070,
420 	0x5bc0, 0x00200000, 0x50100000,
421 	0x7030, 0x31000311, 0x00000011,
422 	0x2ae4, 0x00073ffe, 0x000022a2,
423 	0x240c, 0x000007ff, 0x00000000,
424 	0x8a14, 0xf000001f, 0x00000007,
425 	0x8b24, 0xffffffff, 0x00ffffff,
426 	0x8b10, 0x0000ff0f, 0x00000000,
427 	0x28a4c, 0x07ffffff, 0x4e000000,
428 	0x28350, 0x3f3f3fff, 0x2a00126a,
429 	0x30, 0x000000ff, 0x0040,
430 	0x34, 0x00000040, 0x00004040,
431 	0x9100, 0x07ffffff, 0x03000000,
432 	0x9060, 0x0000007f, 0x00000020,
433 	0x9508, 0x00010000, 0x00010000,
434 	0xac14, 0x000003ff, 0x000000f7,
435 	0xac10, 0xffffffff, 0x00000000,
436 	0xac0c, 0xffffffff, 0x32761054,
437 	0x88d4, 0x0000001f, 0x00000010,
438 	0x15c0, 0x000c0fc0, 0x000c0400
439 };
440 
441 static const u32 verde_golden_rlc_registers[] =
442 {
443 	0xc424, 0xffffffff, 0x033f1005,
444 	0xc47c, 0xffffffff, 0x10808020,
445 	0xc488, 0xffffffff, 0x00800008,
446 	0xc314, 0xffffffff, 0x00001000,
447 	0xc30c, 0xffffffff, 0x80010014
448 };
449 
450 static const u32 verde_golden_registers[] =
451 {
452 	0x9a10, 0x00010000, 0x00018208,
453 	0x9830, 0xffffffff, 0x00000000,
454 	0x9834, 0xf00fffff, 0x00000400,
455 	0x9838, 0x0002021c, 0x00020200,
456 	0xc78, 0x00000080, 0x00000000,
457 	0xd030, 0x000300c0, 0x00800040,
458 	0xd030, 0x000300c0, 0x00800040,
459 	0xd830, 0x000300c0, 0x00800040,
460 	0xd830, 0x000300c0, 0x00800040,
461 	0x5bb0, 0x000000f0, 0x00000070,
462 	0x5bc0, 0x00200000, 0x50100000,
463 	0x7030, 0x31000311, 0x00000011,
464 	0x2ae4, 0x00073ffe, 0x000022a2,
465 	0x2ae4, 0x00073ffe, 0x000022a2,
466 	0x2ae4, 0x00073ffe, 0x000022a2,
467 	0x240c, 0x000007ff, 0x00000000,
468 	0x240c, 0x000007ff, 0x00000000,
469 	0x240c, 0x000007ff, 0x00000000,
470 	0x8a14, 0xf000001f, 0x00000007,
471 	0x8a14, 0xf000001f, 0x00000007,
472 	0x8a14, 0xf000001f, 0x00000007,
473 	0x8b24, 0xffffffff, 0x00ffffff,
474 	0x8b10, 0x0000ff0f, 0x00000000,
475 	0x28a4c, 0x07ffffff, 0x4e000000,
476 	0x28350, 0x3f3f3fff, 0x0000124a,
477 	0x28350, 0x3f3f3fff, 0x0000124a,
478 	0x28350, 0x3f3f3fff, 0x0000124a,
479 	0x30, 0x000000ff, 0x0040,
480 	0x34, 0x00000040, 0x00004040,
481 	0x9100, 0x07ffffff, 0x03000000,
482 	0x9100, 0x07ffffff, 0x03000000,
483 	0x8e88, 0x01ff1f3f, 0x00000000,
484 	0x8e88, 0x01ff1f3f, 0x00000000,
485 	0x8e88, 0x01ff1f3f, 0x00000000,
486 	0x8e84, 0x01ff1f3f, 0x00000000,
487 	0x8e84, 0x01ff1f3f, 0x00000000,
488 	0x8e84, 0x01ff1f3f, 0x00000000,
489 	0x9060, 0x0000007f, 0x00000020,
490 	0x9508, 0x00010000, 0x00010000,
491 	0xac14, 0x000003ff, 0x00000003,
492 	0xac14, 0x000003ff, 0x00000003,
493 	0xac14, 0x000003ff, 0x00000003,
494 	0xac10, 0xffffffff, 0x00000000,
495 	0xac10, 0xffffffff, 0x00000000,
496 	0xac10, 0xffffffff, 0x00000000,
497 	0xac0c, 0xffffffff, 0x00001032,
498 	0xac0c, 0xffffffff, 0x00001032,
499 	0xac0c, 0xffffffff, 0x00001032,
500 	0x88d4, 0x0000001f, 0x00000010,
501 	0x88d4, 0x0000001f, 0x00000010,
502 	0x88d4, 0x0000001f, 0x00000010,
503 	0x15c0, 0x000c0fc0, 0x000c0400
504 };
505 
506 static const u32 oland_golden_rlc_registers[] =
507 {
508 	0xc424, 0xffffffff, 0x00601005,
509 	0xc47c, 0xffffffff, 0x10104040,
510 	0xc488, 0xffffffff, 0x0100000a,
511 	0xc314, 0xffffffff, 0x00000800,
512 	0xc30c, 0xffffffff, 0x800000f4
513 };
514 
515 static const u32 oland_golden_registers[] =
516 {
517 	0x9a10, 0x00010000, 0x00018208,
518 	0x9830, 0xffffffff, 0x00000000,
519 	0x9834, 0xf00fffff, 0x00000400,
520 	0x9838, 0x0002021c, 0x00020200,
521 	0xc78, 0x00000080, 0x00000000,
522 	0xd030, 0x000300c0, 0x00800040,
523 	0xd830, 0x000300c0, 0x00800040,
524 	0x5bb0, 0x000000f0, 0x00000070,
525 	0x5bc0, 0x00200000, 0x50100000,
526 	0x7030, 0x31000311, 0x00000011,
527 	0x2ae4, 0x00073ffe, 0x000022a2,
528 	0x240c, 0x000007ff, 0x00000000,
529 	0x8a14, 0xf000001f, 0x00000007,
530 	0x8b24, 0xffffffff, 0x00ffffff,
531 	0x8b10, 0x0000ff0f, 0x00000000,
532 	0x28a4c, 0x07ffffff, 0x4e000000,
533 	0x28350, 0x3f3f3fff, 0x00000082,
534 	0x30, 0x000000ff, 0x0040,
535 	0x34, 0x00000040, 0x00004040,
536 	0x9100, 0x07ffffff, 0x03000000,
537 	0x9060, 0x0000007f, 0x00000020,
538 	0x9508, 0x00010000, 0x00010000,
539 	0xac14, 0x000003ff, 0x000000f3,
540 	0xac10, 0xffffffff, 0x00000000,
541 	0xac0c, 0xffffffff, 0x00003210,
542 	0x88d4, 0x0000001f, 0x00000010,
543 	0x15c0, 0x000c0fc0, 0x000c0400
544 };
545 
546 static const u32 hainan_golden_registers[] =
547 {
548 	0x9a10, 0x00010000, 0x00018208,
549 	0x9830, 0xffffffff, 0x00000000,
550 	0x9834, 0xf00fffff, 0x00000400,
551 	0x9838, 0x0002021c, 0x00020200,
552 	0xd0c0, 0xff000fff, 0x00000100,
553 	0xd030, 0x000300c0, 0x00800040,
554 	0xd8c0, 0xff000fff, 0x00000100,
555 	0xd830, 0x000300c0, 0x00800040,
556 	0x2ae4, 0x00073ffe, 0x000022a2,
557 	0x240c, 0x000007ff, 0x00000000,
558 	0x8a14, 0xf000001f, 0x00000007,
559 	0x8b24, 0xffffffff, 0x00ffffff,
560 	0x8b10, 0x0000ff0f, 0x00000000,
561 	0x28a4c, 0x07ffffff, 0x4e000000,
562 	0x28350, 0x3f3f3fff, 0x00000000,
563 	0x30, 0x000000ff, 0x0040,
564 	0x34, 0x00000040, 0x00004040,
565 	0x9100, 0x03e00000, 0x03600000,
566 	0x9060, 0x0000007f, 0x00000020,
567 	0x9508, 0x00010000, 0x00010000,
568 	0xac14, 0x000003ff, 0x000000f1,
569 	0xac10, 0xffffffff, 0x00000000,
570 	0xac0c, 0xffffffff, 0x00003210,
571 	0x88d4, 0x0000001f, 0x00000010,
572 	0x15c0, 0x000c0fc0, 0x000c0400
573 };
574 
575 static const u32 hainan_golden_registers2[] =
576 {
577 	0x98f8, 0xffffffff, 0x02010001
578 };
579 
580 static const u32 tahiti_mgcg_cgcg_init[] =
581 {
582 	0xc400, 0xffffffff, 0xfffffffc,
583 	0x802c, 0xffffffff, 0xe0000000,
584 	0x9a60, 0xffffffff, 0x00000100,
585 	0x92a4, 0xffffffff, 0x00000100,
586 	0xc164, 0xffffffff, 0x00000100,
587 	0x9774, 0xffffffff, 0x00000100,
588 	0x8984, 0xffffffff, 0x06000100,
589 	0x8a18, 0xffffffff, 0x00000100,
590 	0x92a0, 0xffffffff, 0x00000100,
591 	0xc380, 0xffffffff, 0x00000100,
592 	0x8b28, 0xffffffff, 0x00000100,
593 	0x9144, 0xffffffff, 0x00000100,
594 	0x8d88, 0xffffffff, 0x00000100,
595 	0x8d8c, 0xffffffff, 0x00000100,
596 	0x9030, 0xffffffff, 0x00000100,
597 	0x9034, 0xffffffff, 0x00000100,
598 	0x9038, 0xffffffff, 0x00000100,
599 	0x903c, 0xffffffff, 0x00000100,
600 	0xad80, 0xffffffff, 0x00000100,
601 	0xac54, 0xffffffff, 0x00000100,
602 	0x897c, 0xffffffff, 0x06000100,
603 	0x9868, 0xffffffff, 0x00000100,
604 	0x9510, 0xffffffff, 0x00000100,
605 	0xaf04, 0xffffffff, 0x00000100,
606 	0xae04, 0xffffffff, 0x00000100,
607 	0x949c, 0xffffffff, 0x00000100,
608 	0x802c, 0xffffffff, 0xe0000000,
609 	0x9160, 0xffffffff, 0x00010000,
610 	0x9164, 0xffffffff, 0x00030002,
611 	0x9168, 0xffffffff, 0x00040007,
612 	0x916c, 0xffffffff, 0x00060005,
613 	0x9170, 0xffffffff, 0x00090008,
614 	0x9174, 0xffffffff, 0x00020001,
615 	0x9178, 0xffffffff, 0x00040003,
616 	0x917c, 0xffffffff, 0x00000007,
617 	0x9180, 0xffffffff, 0x00060005,
618 	0x9184, 0xffffffff, 0x00090008,
619 	0x9188, 0xffffffff, 0x00030002,
620 	0x918c, 0xffffffff, 0x00050004,
621 	0x9190, 0xffffffff, 0x00000008,
622 	0x9194, 0xffffffff, 0x00070006,
623 	0x9198, 0xffffffff, 0x000a0009,
624 	0x919c, 0xffffffff, 0x00040003,
625 	0x91a0, 0xffffffff, 0x00060005,
626 	0x91a4, 0xffffffff, 0x00000009,
627 	0x91a8, 0xffffffff, 0x00080007,
628 	0x91ac, 0xffffffff, 0x000b000a,
629 	0x91b0, 0xffffffff, 0x00050004,
630 	0x91b4, 0xffffffff, 0x00070006,
631 	0x91b8, 0xffffffff, 0x0008000b,
632 	0x91bc, 0xffffffff, 0x000a0009,
633 	0x91c0, 0xffffffff, 0x000d000c,
634 	0x91c4, 0xffffffff, 0x00060005,
635 	0x91c8, 0xffffffff, 0x00080007,
636 	0x91cc, 0xffffffff, 0x0000000b,
637 	0x91d0, 0xffffffff, 0x000a0009,
638 	0x91d4, 0xffffffff, 0x000d000c,
639 	0x91d8, 0xffffffff, 0x00070006,
640 	0x91dc, 0xffffffff, 0x00090008,
641 	0x91e0, 0xffffffff, 0x0000000c,
642 	0x91e4, 0xffffffff, 0x000b000a,
643 	0x91e8, 0xffffffff, 0x000e000d,
644 	0x91ec, 0xffffffff, 0x00080007,
645 	0x91f0, 0xffffffff, 0x000a0009,
646 	0x91f4, 0xffffffff, 0x0000000d,
647 	0x91f8, 0xffffffff, 0x000c000b,
648 	0x91fc, 0xffffffff, 0x000f000e,
649 	0x9200, 0xffffffff, 0x00090008,
650 	0x9204, 0xffffffff, 0x000b000a,
651 	0x9208, 0xffffffff, 0x000c000f,
652 	0x920c, 0xffffffff, 0x000e000d,
653 	0x9210, 0xffffffff, 0x00110010,
654 	0x9214, 0xffffffff, 0x000a0009,
655 	0x9218, 0xffffffff, 0x000c000b,
656 	0x921c, 0xffffffff, 0x0000000f,
657 	0x9220, 0xffffffff, 0x000e000d,
658 	0x9224, 0xffffffff, 0x00110010,
659 	0x9228, 0xffffffff, 0x000b000a,
660 	0x922c, 0xffffffff, 0x000d000c,
661 	0x9230, 0xffffffff, 0x00000010,
662 	0x9234, 0xffffffff, 0x000f000e,
663 	0x9238, 0xffffffff, 0x00120011,
664 	0x923c, 0xffffffff, 0x000c000b,
665 	0x9240, 0xffffffff, 0x000e000d,
666 	0x9244, 0xffffffff, 0x00000011,
667 	0x9248, 0xffffffff, 0x0010000f,
668 	0x924c, 0xffffffff, 0x00130012,
669 	0x9250, 0xffffffff, 0x000d000c,
670 	0x9254, 0xffffffff, 0x000f000e,
671 	0x9258, 0xffffffff, 0x00100013,
672 	0x925c, 0xffffffff, 0x00120011,
673 	0x9260, 0xffffffff, 0x00150014,
674 	0x9264, 0xffffffff, 0x000e000d,
675 	0x9268, 0xffffffff, 0x0010000f,
676 	0x926c, 0xffffffff, 0x00000013,
677 	0x9270, 0xffffffff, 0x00120011,
678 	0x9274, 0xffffffff, 0x00150014,
679 	0x9278, 0xffffffff, 0x000f000e,
680 	0x927c, 0xffffffff, 0x00110010,
681 	0x9280, 0xffffffff, 0x00000014,
682 	0x9284, 0xffffffff, 0x00130012,
683 	0x9288, 0xffffffff, 0x00160015,
684 	0x928c, 0xffffffff, 0x0010000f,
685 	0x9290, 0xffffffff, 0x00120011,
686 	0x9294, 0xffffffff, 0x00000015,
687 	0x9298, 0xffffffff, 0x00140013,
688 	0x929c, 0xffffffff, 0x00170016,
689 	0x9150, 0xffffffff, 0x96940200,
690 	0x8708, 0xffffffff, 0x00900100,
691 	0xc478, 0xffffffff, 0x00000080,
692 	0xc404, 0xffffffff, 0x0020003f,
693 	0x30, 0xffffffff, 0x0000001c,
694 	0x34, 0x000f0000, 0x000f0000,
695 	0x160c, 0xffffffff, 0x00000100,
696 	0x1024, 0xffffffff, 0x00000100,
697 	0x102c, 0x00000101, 0x00000000,
698 	0x20a8, 0xffffffff, 0x00000104,
699 	0x264c, 0x000c0000, 0x000c0000,
700 	0x2648, 0x000c0000, 0x000c0000,
701 	0x55e4, 0xff000fff, 0x00000100,
702 	0x55e8, 0x00000001, 0x00000001,
703 	0x2f50, 0x00000001, 0x00000001,
704 	0x30cc, 0xc0000fff, 0x00000104,
705 	0xc1e4, 0x00000001, 0x00000001,
706 	0xd0c0, 0xfffffff0, 0x00000100,
707 	0xd8c0, 0xfffffff0, 0x00000100
708 };
709 
710 static const u32 pitcairn_mgcg_cgcg_init[] =
711 {
712 	0xc400, 0xffffffff, 0xfffffffc,
713 	0x802c, 0xffffffff, 0xe0000000,
714 	0x9a60, 0xffffffff, 0x00000100,
715 	0x92a4, 0xffffffff, 0x00000100,
716 	0xc164, 0xffffffff, 0x00000100,
717 	0x9774, 0xffffffff, 0x00000100,
718 	0x8984, 0xffffffff, 0x06000100,
719 	0x8a18, 0xffffffff, 0x00000100,
720 	0x92a0, 0xffffffff, 0x00000100,
721 	0xc380, 0xffffffff, 0x00000100,
722 	0x8b28, 0xffffffff, 0x00000100,
723 	0x9144, 0xffffffff, 0x00000100,
724 	0x8d88, 0xffffffff, 0x00000100,
725 	0x8d8c, 0xffffffff, 0x00000100,
726 	0x9030, 0xffffffff, 0x00000100,
727 	0x9034, 0xffffffff, 0x00000100,
728 	0x9038, 0xffffffff, 0x00000100,
729 	0x903c, 0xffffffff, 0x00000100,
730 	0xad80, 0xffffffff, 0x00000100,
731 	0xac54, 0xffffffff, 0x00000100,
732 	0x897c, 0xffffffff, 0x06000100,
733 	0x9868, 0xffffffff, 0x00000100,
734 	0x9510, 0xffffffff, 0x00000100,
735 	0xaf04, 0xffffffff, 0x00000100,
736 	0xae04, 0xffffffff, 0x00000100,
737 	0x949c, 0xffffffff, 0x00000100,
738 	0x802c, 0xffffffff, 0xe0000000,
739 	0x9160, 0xffffffff, 0x00010000,
740 	0x9164, 0xffffffff, 0x00030002,
741 	0x9168, 0xffffffff, 0x00040007,
742 	0x916c, 0xffffffff, 0x00060005,
743 	0x9170, 0xffffffff, 0x00090008,
744 	0x9174, 0xffffffff, 0x00020001,
745 	0x9178, 0xffffffff, 0x00040003,
746 	0x917c, 0xffffffff, 0x00000007,
747 	0x9180, 0xffffffff, 0x00060005,
748 	0x9184, 0xffffffff, 0x00090008,
749 	0x9188, 0xffffffff, 0x00030002,
750 	0x918c, 0xffffffff, 0x00050004,
751 	0x9190, 0xffffffff, 0x00000008,
752 	0x9194, 0xffffffff, 0x00070006,
753 	0x9198, 0xffffffff, 0x000a0009,
754 	0x919c, 0xffffffff, 0x00040003,
755 	0x91a0, 0xffffffff, 0x00060005,
756 	0x91a4, 0xffffffff, 0x00000009,
757 	0x91a8, 0xffffffff, 0x00080007,
758 	0x91ac, 0xffffffff, 0x000b000a,
759 	0x91b0, 0xffffffff, 0x00050004,
760 	0x91b4, 0xffffffff, 0x00070006,
761 	0x91b8, 0xffffffff, 0x0008000b,
762 	0x91bc, 0xffffffff, 0x000a0009,
763 	0x91c0, 0xffffffff, 0x000d000c,
764 	0x9200, 0xffffffff, 0x00090008,
765 	0x9204, 0xffffffff, 0x000b000a,
766 	0x9208, 0xffffffff, 0x000c000f,
767 	0x920c, 0xffffffff, 0x000e000d,
768 	0x9210, 0xffffffff, 0x00110010,
769 	0x9214, 0xffffffff, 0x000a0009,
770 	0x9218, 0xffffffff, 0x000c000b,
771 	0x921c, 0xffffffff, 0x0000000f,
772 	0x9220, 0xffffffff, 0x000e000d,
773 	0x9224, 0xffffffff, 0x00110010,
774 	0x9228, 0xffffffff, 0x000b000a,
775 	0x922c, 0xffffffff, 0x000d000c,
776 	0x9230, 0xffffffff, 0x00000010,
777 	0x9234, 0xffffffff, 0x000f000e,
778 	0x9238, 0xffffffff, 0x00120011,
779 	0x923c, 0xffffffff, 0x000c000b,
780 	0x9240, 0xffffffff, 0x000e000d,
781 	0x9244, 0xffffffff, 0x00000011,
782 	0x9248, 0xffffffff, 0x0010000f,
783 	0x924c, 0xffffffff, 0x00130012,
784 	0x9250, 0xffffffff, 0x000d000c,
785 	0x9254, 0xffffffff, 0x000f000e,
786 	0x9258, 0xffffffff, 0x00100013,
787 	0x925c, 0xffffffff, 0x00120011,
788 	0x9260, 0xffffffff, 0x00150014,
789 	0x9150, 0xffffffff, 0x96940200,
790 	0x8708, 0xffffffff, 0x00900100,
791 	0xc478, 0xffffffff, 0x00000080,
792 	0xc404, 0xffffffff, 0x0020003f,
793 	0x30, 0xffffffff, 0x0000001c,
794 	0x34, 0x000f0000, 0x000f0000,
795 	0x160c, 0xffffffff, 0x00000100,
796 	0x1024, 0xffffffff, 0x00000100,
797 	0x102c, 0x00000101, 0x00000000,
798 	0x20a8, 0xffffffff, 0x00000104,
799 	0x55e4, 0xff000fff, 0x00000100,
800 	0x55e8, 0x00000001, 0x00000001,
801 	0x2f50, 0x00000001, 0x00000001,
802 	0x30cc, 0xc0000fff, 0x00000104,
803 	0xc1e4, 0x00000001, 0x00000001,
804 	0xd0c0, 0xfffffff0, 0x00000100,
805 	0xd8c0, 0xfffffff0, 0x00000100
806 };
807 
808 static const u32 verde_mgcg_cgcg_init[] =
809 {
810 	0xc400, 0xffffffff, 0xfffffffc,
811 	0x802c, 0xffffffff, 0xe0000000,
812 	0x9a60, 0xffffffff, 0x00000100,
813 	0x92a4, 0xffffffff, 0x00000100,
814 	0xc164, 0xffffffff, 0x00000100,
815 	0x9774, 0xffffffff, 0x00000100,
816 	0x8984, 0xffffffff, 0x06000100,
817 	0x8a18, 0xffffffff, 0x00000100,
818 	0x92a0, 0xffffffff, 0x00000100,
819 	0xc380, 0xffffffff, 0x00000100,
820 	0x8b28, 0xffffffff, 0x00000100,
821 	0x9144, 0xffffffff, 0x00000100,
822 	0x8d88, 0xffffffff, 0x00000100,
823 	0x8d8c, 0xffffffff, 0x00000100,
824 	0x9030, 0xffffffff, 0x00000100,
825 	0x9034, 0xffffffff, 0x00000100,
826 	0x9038, 0xffffffff, 0x00000100,
827 	0x903c, 0xffffffff, 0x00000100,
828 	0xad80, 0xffffffff, 0x00000100,
829 	0xac54, 0xffffffff, 0x00000100,
830 	0x897c, 0xffffffff, 0x06000100,
831 	0x9868, 0xffffffff, 0x00000100,
832 	0x9510, 0xffffffff, 0x00000100,
833 	0xaf04, 0xffffffff, 0x00000100,
834 	0xae04, 0xffffffff, 0x00000100,
835 	0x949c, 0xffffffff, 0x00000100,
836 	0x802c, 0xffffffff, 0xe0000000,
837 	0x9160, 0xffffffff, 0x00010000,
838 	0x9164, 0xffffffff, 0x00030002,
839 	0x9168, 0xffffffff, 0x00040007,
840 	0x916c, 0xffffffff, 0x00060005,
841 	0x9170, 0xffffffff, 0x00090008,
842 	0x9174, 0xffffffff, 0x00020001,
843 	0x9178, 0xffffffff, 0x00040003,
844 	0x917c, 0xffffffff, 0x00000007,
845 	0x9180, 0xffffffff, 0x00060005,
846 	0x9184, 0xffffffff, 0x00090008,
847 	0x9188, 0xffffffff, 0x00030002,
848 	0x918c, 0xffffffff, 0x00050004,
849 	0x9190, 0xffffffff, 0x00000008,
850 	0x9194, 0xffffffff, 0x00070006,
851 	0x9198, 0xffffffff, 0x000a0009,
852 	0x919c, 0xffffffff, 0x00040003,
853 	0x91a0, 0xffffffff, 0x00060005,
854 	0x91a4, 0xffffffff, 0x00000009,
855 	0x91a8, 0xffffffff, 0x00080007,
856 	0x91ac, 0xffffffff, 0x000b000a,
857 	0x91b0, 0xffffffff, 0x00050004,
858 	0x91b4, 0xffffffff, 0x00070006,
859 	0x91b8, 0xffffffff, 0x0008000b,
860 	0x91bc, 0xffffffff, 0x000a0009,
861 	0x91c0, 0xffffffff, 0x000d000c,
862 	0x9200, 0xffffffff, 0x00090008,
863 	0x9204, 0xffffffff, 0x000b000a,
864 	0x9208, 0xffffffff, 0x000c000f,
865 	0x920c, 0xffffffff, 0x000e000d,
866 	0x9210, 0xffffffff, 0x00110010,
867 	0x9214, 0xffffffff, 0x000a0009,
868 	0x9218, 0xffffffff, 0x000c000b,
869 	0x921c, 0xffffffff, 0x0000000f,
870 	0x9220, 0xffffffff, 0x000e000d,
871 	0x9224, 0xffffffff, 0x00110010,
872 	0x9228, 0xffffffff, 0x000b000a,
873 	0x922c, 0xffffffff, 0x000d000c,
874 	0x9230, 0xffffffff, 0x00000010,
875 	0x9234, 0xffffffff, 0x000f000e,
876 	0x9238, 0xffffffff, 0x00120011,
877 	0x923c, 0xffffffff, 0x000c000b,
878 	0x9240, 0xffffffff, 0x000e000d,
879 	0x9244, 0xffffffff, 0x00000011,
880 	0x9248, 0xffffffff, 0x0010000f,
881 	0x924c, 0xffffffff, 0x00130012,
882 	0x9250, 0xffffffff, 0x000d000c,
883 	0x9254, 0xffffffff, 0x000f000e,
884 	0x9258, 0xffffffff, 0x00100013,
885 	0x925c, 0xffffffff, 0x00120011,
886 	0x9260, 0xffffffff, 0x00150014,
887 	0x9150, 0xffffffff, 0x96940200,
888 	0x8708, 0xffffffff, 0x00900100,
889 	0xc478, 0xffffffff, 0x00000080,
890 	0xc404, 0xffffffff, 0x0020003f,
891 	0x30, 0xffffffff, 0x0000001c,
892 	0x34, 0x000f0000, 0x000f0000,
893 	0x160c, 0xffffffff, 0x00000100,
894 	0x1024, 0xffffffff, 0x00000100,
895 	0x102c, 0x00000101, 0x00000000,
896 	0x20a8, 0xffffffff, 0x00000104,
897 	0x264c, 0x000c0000, 0x000c0000,
898 	0x2648, 0x000c0000, 0x000c0000,
899 	0x55e4, 0xff000fff, 0x00000100,
900 	0x55e8, 0x00000001, 0x00000001,
901 	0x2f50, 0x00000001, 0x00000001,
902 	0x30cc, 0xc0000fff, 0x00000104,
903 	0xc1e4, 0x00000001, 0x00000001,
904 	0xd0c0, 0xfffffff0, 0x00000100,
905 	0xd8c0, 0xfffffff0, 0x00000100
906 };
907 
908 static const u32 oland_mgcg_cgcg_init[] =
909 {
910 	0xc400, 0xffffffff, 0xfffffffc,
911 	0x802c, 0xffffffff, 0xe0000000,
912 	0x9a60, 0xffffffff, 0x00000100,
913 	0x92a4, 0xffffffff, 0x00000100,
914 	0xc164, 0xffffffff, 0x00000100,
915 	0x9774, 0xffffffff, 0x00000100,
916 	0x8984, 0xffffffff, 0x06000100,
917 	0x8a18, 0xffffffff, 0x00000100,
918 	0x92a0, 0xffffffff, 0x00000100,
919 	0xc380, 0xffffffff, 0x00000100,
920 	0x8b28, 0xffffffff, 0x00000100,
921 	0x9144, 0xffffffff, 0x00000100,
922 	0x8d88, 0xffffffff, 0x00000100,
923 	0x8d8c, 0xffffffff, 0x00000100,
924 	0x9030, 0xffffffff, 0x00000100,
925 	0x9034, 0xffffffff, 0x00000100,
926 	0x9038, 0xffffffff, 0x00000100,
927 	0x903c, 0xffffffff, 0x00000100,
928 	0xad80, 0xffffffff, 0x00000100,
929 	0xac54, 0xffffffff, 0x00000100,
930 	0x897c, 0xffffffff, 0x06000100,
931 	0x9868, 0xffffffff, 0x00000100,
932 	0x9510, 0xffffffff, 0x00000100,
933 	0xaf04, 0xffffffff, 0x00000100,
934 	0xae04, 0xffffffff, 0x00000100,
935 	0x949c, 0xffffffff, 0x00000100,
936 	0x802c, 0xffffffff, 0xe0000000,
937 	0x9160, 0xffffffff, 0x00010000,
938 	0x9164, 0xffffffff, 0x00030002,
939 	0x9168, 0xffffffff, 0x00040007,
940 	0x916c, 0xffffffff, 0x00060005,
941 	0x9170, 0xffffffff, 0x00090008,
942 	0x9174, 0xffffffff, 0x00020001,
943 	0x9178, 0xffffffff, 0x00040003,
944 	0x917c, 0xffffffff, 0x00000007,
945 	0x9180, 0xffffffff, 0x00060005,
946 	0x9184, 0xffffffff, 0x00090008,
947 	0x9188, 0xffffffff, 0x00030002,
948 	0x918c, 0xffffffff, 0x00050004,
949 	0x9190, 0xffffffff, 0x00000008,
950 	0x9194, 0xffffffff, 0x00070006,
951 	0x9198, 0xffffffff, 0x000a0009,
952 	0x919c, 0xffffffff, 0x00040003,
953 	0x91a0, 0xffffffff, 0x00060005,
954 	0x91a4, 0xffffffff, 0x00000009,
955 	0x91a8, 0xffffffff, 0x00080007,
956 	0x91ac, 0xffffffff, 0x000b000a,
957 	0x91b0, 0xffffffff, 0x00050004,
958 	0x91b4, 0xffffffff, 0x00070006,
959 	0x91b8, 0xffffffff, 0x0008000b,
960 	0x91bc, 0xffffffff, 0x000a0009,
961 	0x91c0, 0xffffffff, 0x000d000c,
962 	0x91c4, 0xffffffff, 0x00060005,
963 	0x91c8, 0xffffffff, 0x00080007,
964 	0x91cc, 0xffffffff, 0x0000000b,
965 	0x91d0, 0xffffffff, 0x000a0009,
966 	0x91d4, 0xffffffff, 0x000d000c,
967 	0x9150, 0xffffffff, 0x96940200,
968 	0x8708, 0xffffffff, 0x00900100,
969 	0xc478, 0xffffffff, 0x00000080,
970 	0xc404, 0xffffffff, 0x0020003f,
971 	0x30, 0xffffffff, 0x0000001c,
972 	0x34, 0x000f0000, 0x000f0000,
973 	0x160c, 0xffffffff, 0x00000100,
974 	0x1024, 0xffffffff, 0x00000100,
975 	0x102c, 0x00000101, 0x00000000,
976 	0x20a8, 0xffffffff, 0x00000104,
977 	0x264c, 0x000c0000, 0x000c0000,
978 	0x2648, 0x000c0000, 0x000c0000,
979 	0x55e4, 0xff000fff, 0x00000100,
980 	0x55e8, 0x00000001, 0x00000001,
981 	0x2f50, 0x00000001, 0x00000001,
982 	0x30cc, 0xc0000fff, 0x00000104,
983 	0xc1e4, 0x00000001, 0x00000001,
984 	0xd0c0, 0xfffffff0, 0x00000100,
985 	0xd8c0, 0xfffffff0, 0x00000100
986 };
987 
988 static const u32 hainan_mgcg_cgcg_init[] =
989 {
990 	0xc400, 0xffffffff, 0xfffffffc,
991 	0x802c, 0xffffffff, 0xe0000000,
992 	0x9a60, 0xffffffff, 0x00000100,
993 	0x92a4, 0xffffffff, 0x00000100,
994 	0xc164, 0xffffffff, 0x00000100,
995 	0x9774, 0xffffffff, 0x00000100,
996 	0x8984, 0xffffffff, 0x06000100,
997 	0x8a18, 0xffffffff, 0x00000100,
998 	0x92a0, 0xffffffff, 0x00000100,
999 	0xc380, 0xffffffff, 0x00000100,
1000 	0x8b28, 0xffffffff, 0x00000100,
1001 	0x9144, 0xffffffff, 0x00000100,
1002 	0x8d88, 0xffffffff, 0x00000100,
1003 	0x8d8c, 0xffffffff, 0x00000100,
1004 	0x9030, 0xffffffff, 0x00000100,
1005 	0x9034, 0xffffffff, 0x00000100,
1006 	0x9038, 0xffffffff, 0x00000100,
1007 	0x903c, 0xffffffff, 0x00000100,
1008 	0xad80, 0xffffffff, 0x00000100,
1009 	0xac54, 0xffffffff, 0x00000100,
1010 	0x897c, 0xffffffff, 0x06000100,
1011 	0x9868, 0xffffffff, 0x00000100,
1012 	0x9510, 0xffffffff, 0x00000100,
1013 	0xaf04, 0xffffffff, 0x00000100,
1014 	0xae04, 0xffffffff, 0x00000100,
1015 	0x949c, 0xffffffff, 0x00000100,
1016 	0x802c, 0xffffffff, 0xe0000000,
1017 	0x9160, 0xffffffff, 0x00010000,
1018 	0x9164, 0xffffffff, 0x00030002,
1019 	0x9168, 0xffffffff, 0x00040007,
1020 	0x916c, 0xffffffff, 0x00060005,
1021 	0x9170, 0xffffffff, 0x00090008,
1022 	0x9174, 0xffffffff, 0x00020001,
1023 	0x9178, 0xffffffff, 0x00040003,
1024 	0x917c, 0xffffffff, 0x00000007,
1025 	0x9180, 0xffffffff, 0x00060005,
1026 	0x9184, 0xffffffff, 0x00090008,
1027 	0x9188, 0xffffffff, 0x00030002,
1028 	0x918c, 0xffffffff, 0x00050004,
1029 	0x9190, 0xffffffff, 0x00000008,
1030 	0x9194, 0xffffffff, 0x00070006,
1031 	0x9198, 0xffffffff, 0x000a0009,
1032 	0x919c, 0xffffffff, 0x00040003,
1033 	0x91a0, 0xffffffff, 0x00060005,
1034 	0x91a4, 0xffffffff, 0x00000009,
1035 	0x91a8, 0xffffffff, 0x00080007,
1036 	0x91ac, 0xffffffff, 0x000b000a,
1037 	0x91b0, 0xffffffff, 0x00050004,
1038 	0x91b4, 0xffffffff, 0x00070006,
1039 	0x91b8, 0xffffffff, 0x0008000b,
1040 	0x91bc, 0xffffffff, 0x000a0009,
1041 	0x91c0, 0xffffffff, 0x000d000c,
1042 	0x91c4, 0xffffffff, 0x00060005,
1043 	0x91c8, 0xffffffff, 0x00080007,
1044 	0x91cc, 0xffffffff, 0x0000000b,
1045 	0x91d0, 0xffffffff, 0x000a0009,
1046 	0x91d4, 0xffffffff, 0x000d000c,
1047 	0x9150, 0xffffffff, 0x96940200,
1048 	0x8708, 0xffffffff, 0x00900100,
1049 	0xc478, 0xffffffff, 0x00000080,
1050 	0xc404, 0xffffffff, 0x0020003f,
1051 	0x30, 0xffffffff, 0x0000001c,
1052 	0x34, 0x000f0000, 0x000f0000,
1053 	0x160c, 0xffffffff, 0x00000100,
1054 	0x1024, 0xffffffff, 0x00000100,
1055 	0x20a8, 0xffffffff, 0x00000104,
1056 	0x264c, 0x000c0000, 0x000c0000,
1057 	0x2648, 0x000c0000, 0x000c0000,
1058 	0x2f50, 0x00000001, 0x00000001,
1059 	0x30cc, 0xc0000fff, 0x00000104,
1060 	0xc1e4, 0x00000001, 0x00000001,
1061 	0xd0c0, 0xfffffff0, 0x00000100,
1062 	0xd8c0, 0xfffffff0, 0x00000100
1063 };
1064 
1065 static u32 verde_pg_init[] =
1066 {
1067 	0x353c, 0xffffffff, 0x40000,
1068 	0x3538, 0xffffffff, 0x200010ff,
1069 	0x353c, 0xffffffff, 0x0,
1070 	0x353c, 0xffffffff, 0x0,
1071 	0x353c, 0xffffffff, 0x0,
1072 	0x353c, 0xffffffff, 0x0,
1073 	0x353c, 0xffffffff, 0x0,
1074 	0x353c, 0xffffffff, 0x7007,
1075 	0x3538, 0xffffffff, 0x300010ff,
1076 	0x353c, 0xffffffff, 0x0,
1077 	0x353c, 0xffffffff, 0x0,
1078 	0x353c, 0xffffffff, 0x0,
1079 	0x353c, 0xffffffff, 0x0,
1080 	0x353c, 0xffffffff, 0x0,
1081 	0x353c, 0xffffffff, 0x400000,
1082 	0x3538, 0xffffffff, 0x100010ff,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x0,
1087 	0x353c, 0xffffffff, 0x0,
1088 	0x353c, 0xffffffff, 0x120200,
1089 	0x3538, 0xffffffff, 0x500010ff,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x0,
1094 	0x353c, 0xffffffff, 0x0,
1095 	0x353c, 0xffffffff, 0x1e1e16,
1096 	0x3538, 0xffffffff, 0x600010ff,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x0,
1102 	0x353c, 0xffffffff, 0x171f1e,
1103 	0x3538, 0xffffffff, 0x700010ff,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x0,
1108 	0x353c, 0xffffffff, 0x0,
1109 	0x353c, 0xffffffff, 0x0,
1110 	0x3538, 0xffffffff, 0x9ff,
1111 	0x3500, 0xffffffff, 0x0,
1112 	0x3504, 0xffffffff, 0x10000800,
1113 	0x3504, 0xffffffff, 0xf,
1114 	0x3504, 0xffffffff, 0xf,
1115 	0x3500, 0xffffffff, 0x4,
1116 	0x3504, 0xffffffff, 0x1000051e,
1117 	0x3504, 0xffffffff, 0xffff,
1118 	0x3504, 0xffffffff, 0xffff,
1119 	0x3500, 0xffffffff, 0x8,
1120 	0x3504, 0xffffffff, 0x80500,
1121 	0x3500, 0xffffffff, 0x12,
1122 	0x3504, 0xffffffff, 0x9050c,
1123 	0x3500, 0xffffffff, 0x1d,
1124 	0x3504, 0xffffffff, 0xb052c,
1125 	0x3500, 0xffffffff, 0x2a,
1126 	0x3504, 0xffffffff, 0x1053e,
1127 	0x3500, 0xffffffff, 0x2d,
1128 	0x3504, 0xffffffff, 0x10546,
1129 	0x3500, 0xffffffff, 0x30,
1130 	0x3504, 0xffffffff, 0xa054e,
1131 	0x3500, 0xffffffff, 0x3c,
1132 	0x3504, 0xffffffff, 0x1055f,
1133 	0x3500, 0xffffffff, 0x3f,
1134 	0x3504, 0xffffffff, 0x10567,
1135 	0x3500, 0xffffffff, 0x42,
1136 	0x3504, 0xffffffff, 0x1056f,
1137 	0x3500, 0xffffffff, 0x45,
1138 	0x3504, 0xffffffff, 0x10572,
1139 	0x3500, 0xffffffff, 0x48,
1140 	0x3504, 0xffffffff, 0x20575,
1141 	0x3500, 0xffffffff, 0x4c,
1142 	0x3504, 0xffffffff, 0x190801,
1143 	0x3500, 0xffffffff, 0x67,
1144 	0x3504, 0xffffffff, 0x1082a,
1145 	0x3500, 0xffffffff, 0x6a,
1146 	0x3504, 0xffffffff, 0x1b082d,
1147 	0x3500, 0xffffffff, 0x87,
1148 	0x3504, 0xffffffff, 0x310851,
1149 	0x3500, 0xffffffff, 0xba,
1150 	0x3504, 0xffffffff, 0x891,
1151 	0x3500, 0xffffffff, 0xbc,
1152 	0x3504, 0xffffffff, 0x893,
1153 	0x3500, 0xffffffff, 0xbe,
1154 	0x3504, 0xffffffff, 0x20895,
1155 	0x3500, 0xffffffff, 0xc2,
1156 	0x3504, 0xffffffff, 0x20899,
1157 	0x3500, 0xffffffff, 0xc6,
1158 	0x3504, 0xffffffff, 0x2089d,
1159 	0x3500, 0xffffffff, 0xca,
1160 	0x3504, 0xffffffff, 0x8a1,
1161 	0x3500, 0xffffffff, 0xcc,
1162 	0x3504, 0xffffffff, 0x8a3,
1163 	0x3500, 0xffffffff, 0xce,
1164 	0x3504, 0xffffffff, 0x308a5,
1165 	0x3500, 0xffffffff, 0xd3,
1166 	0x3504, 0xffffffff, 0x6d08cd,
1167 	0x3500, 0xffffffff, 0x142,
1168 	0x3504, 0xffffffff, 0x2000095a,
1169 	0x3504, 0xffffffff, 0x1,
1170 	0x3500, 0xffffffff, 0x144,
1171 	0x3504, 0xffffffff, 0x301f095b,
1172 	0x3500, 0xffffffff, 0x165,
1173 	0x3504, 0xffffffff, 0xc094d,
1174 	0x3500, 0xffffffff, 0x173,
1175 	0x3504, 0xffffffff, 0xf096d,
1176 	0x3500, 0xffffffff, 0x184,
1177 	0x3504, 0xffffffff, 0x15097f,
1178 	0x3500, 0xffffffff, 0x19b,
1179 	0x3504, 0xffffffff, 0xc0998,
1180 	0x3500, 0xffffffff, 0x1a9,
1181 	0x3504, 0xffffffff, 0x409a7,
1182 	0x3500, 0xffffffff, 0x1af,
1183 	0x3504, 0xffffffff, 0xcdc,
1184 	0x3500, 0xffffffff, 0x1b1,
1185 	0x3504, 0xffffffff, 0x800,
1186 	0x3508, 0xffffffff, 0x6c9b2000,
1187 	0x3510, 0xfc00, 0x2000,
1188 	0x3544, 0xffffffff, 0xfc0,
1189 	0x28d4, 0x00000100, 0x100
1190 };
1191 
1192 static void si_init_golden_registers(struct radeon_device *rdev)
1193 {
1194 	switch (rdev->family) {
1195 	case CHIP_TAHITI:
1196 		radeon_program_register_sequence(rdev,
1197 						 tahiti_golden_registers,
1198 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1199 		radeon_program_register_sequence(rdev,
1200 						 tahiti_golden_rlc_registers,
1201 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1202 		radeon_program_register_sequence(rdev,
1203 						 tahiti_mgcg_cgcg_init,
1204 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1205 		radeon_program_register_sequence(rdev,
1206 						 tahiti_golden_registers2,
1207 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1208 		break;
1209 	case CHIP_PITCAIRN:
1210 		radeon_program_register_sequence(rdev,
1211 						 pitcairn_golden_registers,
1212 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1213 		radeon_program_register_sequence(rdev,
1214 						 pitcairn_golden_rlc_registers,
1215 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1216 		radeon_program_register_sequence(rdev,
1217 						 pitcairn_mgcg_cgcg_init,
1218 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1219 		break;
1220 	case CHIP_VERDE:
1221 		radeon_program_register_sequence(rdev,
1222 						 verde_golden_registers,
1223 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1224 		radeon_program_register_sequence(rdev,
1225 						 verde_golden_rlc_registers,
1226 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1227 		radeon_program_register_sequence(rdev,
1228 						 verde_mgcg_cgcg_init,
1229 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1230 		radeon_program_register_sequence(rdev,
1231 						 verde_pg_init,
1232 						 (const u32)ARRAY_SIZE(verde_pg_init));
1233 		break;
1234 	case CHIP_OLAND:
1235 		radeon_program_register_sequence(rdev,
1236 						 oland_golden_registers,
1237 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1238 		radeon_program_register_sequence(rdev,
1239 						 oland_golden_rlc_registers,
1240 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1241 		radeon_program_register_sequence(rdev,
1242 						 oland_mgcg_cgcg_init,
1243 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1244 		break;
1245 	case CHIP_HAINAN:
1246 		radeon_program_register_sequence(rdev,
1247 						 hainan_golden_registers,
1248 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1249 		radeon_program_register_sequence(rdev,
1250 						 hainan_golden_registers2,
1251 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1252 		radeon_program_register_sequence(rdev,
1253 						 hainan_mgcg_cgcg_init,
1254 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1255 		break;
1256 	default:
1257 		break;
1258 	}
1259 }
1260 
1261 /**
1262  * si_get_allowed_info_register - fetch the register for the info ioctl
1263  *
1264  * @rdev: radeon_device pointer
1265  * @reg: register offset in bytes
1266  * @val: register value
1267  *
1268  * Returns 0 for success or -EINVAL for an invalid register
1269  *
1270  */
1271 int si_get_allowed_info_register(struct radeon_device *rdev,
1272 				 u32 reg, u32 *val)
1273 {
1274 	switch (reg) {
1275 	case GRBM_STATUS:
1276 	case GRBM_STATUS2:
1277 	case GRBM_STATUS_SE0:
1278 	case GRBM_STATUS_SE1:
1279 	case SRBM_STATUS:
1280 	case SRBM_STATUS2:
1281 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1282 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1283 	case UVD_STATUS:
1284 		*val = RREG32(reg);
1285 		return 0;
1286 	default:
1287 		return -EINVAL;
1288 	}
1289 }
1290 
1291 #define PCIE_BUS_CLK                10000
1292 #define TCLK                        (PCIE_BUS_CLK / 10)
1293 
1294 /**
1295  * si_get_xclk - get the xclk
1296  *
1297  * @rdev: radeon_device pointer
1298  *
1299  * Returns the reference clock used by the gfx engine
1300  * (SI).
1301  */
1302 u32 si_get_xclk(struct radeon_device *rdev)
1303 {
1304 	u32 reference_clock = rdev->clock.spll.reference_freq;
1305 	u32 tmp;
1306 
1307 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1308 	if (tmp & MUX_TCLK_TO_XCLK)
1309 		return TCLK;
1310 
1311 	tmp = RREG32(CG_CLKPIN_CNTL);
1312 	if (tmp & XTALIN_DIVIDE)
1313 		return reference_clock / 4;
1314 
1315 	return reference_clock;
1316 }
1317 
1318 /* get temperature in millidegrees */
1319 int si_get_temp(struct radeon_device *rdev)
1320 {
1321 	u32 temp;
1322 	int actual_temp = 0;
1323 
1324 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1325 		CTF_TEMP_SHIFT;
1326 
1327 	if (temp & 0x200)
1328 		actual_temp = 255;
1329 	else
1330 		actual_temp = temp & 0x1ff;
1331 
1332 	actual_temp = (actual_temp * 1000);
1333 
1334 	return actual_temp;
1335 }
1336 
1337 #define TAHITI_IO_MC_REGS_SIZE 36
1338 
1339 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1340 	{0x0000006f, 0x03044000},
1341 	{0x00000070, 0x0480c018},
1342 	{0x00000071, 0x00000040},
1343 	{0x00000072, 0x01000000},
1344 	{0x00000074, 0x000000ff},
1345 	{0x00000075, 0x00143400},
1346 	{0x00000076, 0x08ec0800},
1347 	{0x00000077, 0x040000cc},
1348 	{0x00000079, 0x00000000},
1349 	{0x0000007a, 0x21000409},
1350 	{0x0000007c, 0x00000000},
1351 	{0x0000007d, 0xe8000000},
1352 	{0x0000007e, 0x044408a8},
1353 	{0x0000007f, 0x00000003},
1354 	{0x00000080, 0x00000000},
1355 	{0x00000081, 0x01000000},
1356 	{0x00000082, 0x02000000},
1357 	{0x00000083, 0x00000000},
1358 	{0x00000084, 0xe3f3e4f4},
1359 	{0x00000085, 0x00052024},
1360 	{0x00000087, 0x00000000},
1361 	{0x00000088, 0x66036603},
1362 	{0x00000089, 0x01000000},
1363 	{0x0000008b, 0x1c0a0000},
1364 	{0x0000008c, 0xff010000},
1365 	{0x0000008e, 0xffffefff},
1366 	{0x0000008f, 0xfff3efff},
1367 	{0x00000090, 0xfff3efbf},
1368 	{0x00000094, 0x00101101},
1369 	{0x00000095, 0x00000fff},
1370 	{0x00000096, 0x00116fff},
1371 	{0x00000097, 0x60010000},
1372 	{0x00000098, 0x10010000},
1373 	{0x00000099, 0x00006000},
1374 	{0x0000009a, 0x00001000},
1375 	{0x0000009f, 0x00a77400}
1376 };
1377 
1378 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1379 	{0x0000006f, 0x03044000},
1380 	{0x00000070, 0x0480c018},
1381 	{0x00000071, 0x00000040},
1382 	{0x00000072, 0x01000000},
1383 	{0x00000074, 0x000000ff},
1384 	{0x00000075, 0x00143400},
1385 	{0x00000076, 0x08ec0800},
1386 	{0x00000077, 0x040000cc},
1387 	{0x00000079, 0x00000000},
1388 	{0x0000007a, 0x21000409},
1389 	{0x0000007c, 0x00000000},
1390 	{0x0000007d, 0xe8000000},
1391 	{0x0000007e, 0x044408a8},
1392 	{0x0000007f, 0x00000003},
1393 	{0x00000080, 0x00000000},
1394 	{0x00000081, 0x01000000},
1395 	{0x00000082, 0x02000000},
1396 	{0x00000083, 0x00000000},
1397 	{0x00000084, 0xe3f3e4f4},
1398 	{0x00000085, 0x00052024},
1399 	{0x00000087, 0x00000000},
1400 	{0x00000088, 0x66036603},
1401 	{0x00000089, 0x01000000},
1402 	{0x0000008b, 0x1c0a0000},
1403 	{0x0000008c, 0xff010000},
1404 	{0x0000008e, 0xffffefff},
1405 	{0x0000008f, 0xfff3efff},
1406 	{0x00000090, 0xfff3efbf},
1407 	{0x00000094, 0x00101101},
1408 	{0x00000095, 0x00000fff},
1409 	{0x00000096, 0x00116fff},
1410 	{0x00000097, 0x60010000},
1411 	{0x00000098, 0x10010000},
1412 	{0x00000099, 0x00006000},
1413 	{0x0000009a, 0x00001000},
1414 	{0x0000009f, 0x00a47400}
1415 };
1416 
1417 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1418 	{0x0000006f, 0x03044000},
1419 	{0x00000070, 0x0480c018},
1420 	{0x00000071, 0x00000040},
1421 	{0x00000072, 0x01000000},
1422 	{0x00000074, 0x000000ff},
1423 	{0x00000075, 0x00143400},
1424 	{0x00000076, 0x08ec0800},
1425 	{0x00000077, 0x040000cc},
1426 	{0x00000079, 0x00000000},
1427 	{0x0000007a, 0x21000409},
1428 	{0x0000007c, 0x00000000},
1429 	{0x0000007d, 0xe8000000},
1430 	{0x0000007e, 0x044408a8},
1431 	{0x0000007f, 0x00000003},
1432 	{0x00000080, 0x00000000},
1433 	{0x00000081, 0x01000000},
1434 	{0x00000082, 0x02000000},
1435 	{0x00000083, 0x00000000},
1436 	{0x00000084, 0xe3f3e4f4},
1437 	{0x00000085, 0x00052024},
1438 	{0x00000087, 0x00000000},
1439 	{0x00000088, 0x66036603},
1440 	{0x00000089, 0x01000000},
1441 	{0x0000008b, 0x1c0a0000},
1442 	{0x0000008c, 0xff010000},
1443 	{0x0000008e, 0xffffefff},
1444 	{0x0000008f, 0xfff3efff},
1445 	{0x00000090, 0xfff3efbf},
1446 	{0x00000094, 0x00101101},
1447 	{0x00000095, 0x00000fff},
1448 	{0x00000096, 0x00116fff},
1449 	{0x00000097, 0x60010000},
1450 	{0x00000098, 0x10010000},
1451 	{0x00000099, 0x00006000},
1452 	{0x0000009a, 0x00001000},
1453 	{0x0000009f, 0x00a37400}
1454 };
1455 
1456 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1457 	{0x0000006f, 0x03044000},
1458 	{0x00000070, 0x0480c018},
1459 	{0x00000071, 0x00000040},
1460 	{0x00000072, 0x01000000},
1461 	{0x00000074, 0x000000ff},
1462 	{0x00000075, 0x00143400},
1463 	{0x00000076, 0x08ec0800},
1464 	{0x00000077, 0x040000cc},
1465 	{0x00000079, 0x00000000},
1466 	{0x0000007a, 0x21000409},
1467 	{0x0000007c, 0x00000000},
1468 	{0x0000007d, 0xe8000000},
1469 	{0x0000007e, 0x044408a8},
1470 	{0x0000007f, 0x00000003},
1471 	{0x00000080, 0x00000000},
1472 	{0x00000081, 0x01000000},
1473 	{0x00000082, 0x02000000},
1474 	{0x00000083, 0x00000000},
1475 	{0x00000084, 0xe3f3e4f4},
1476 	{0x00000085, 0x00052024},
1477 	{0x00000087, 0x00000000},
1478 	{0x00000088, 0x66036603},
1479 	{0x00000089, 0x01000000},
1480 	{0x0000008b, 0x1c0a0000},
1481 	{0x0000008c, 0xff010000},
1482 	{0x0000008e, 0xffffefff},
1483 	{0x0000008f, 0xfff3efff},
1484 	{0x00000090, 0xfff3efbf},
1485 	{0x00000094, 0x00101101},
1486 	{0x00000095, 0x00000fff},
1487 	{0x00000096, 0x00116fff},
1488 	{0x00000097, 0x60010000},
1489 	{0x00000098, 0x10010000},
1490 	{0x00000099, 0x00006000},
1491 	{0x0000009a, 0x00001000},
1492 	{0x0000009f, 0x00a17730}
1493 };
1494 
1495 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1496 	{0x0000006f, 0x03044000},
1497 	{0x00000070, 0x0480c018},
1498 	{0x00000071, 0x00000040},
1499 	{0x00000072, 0x01000000},
1500 	{0x00000074, 0x000000ff},
1501 	{0x00000075, 0x00143400},
1502 	{0x00000076, 0x08ec0800},
1503 	{0x00000077, 0x040000cc},
1504 	{0x00000079, 0x00000000},
1505 	{0x0000007a, 0x21000409},
1506 	{0x0000007c, 0x00000000},
1507 	{0x0000007d, 0xe8000000},
1508 	{0x0000007e, 0x044408a8},
1509 	{0x0000007f, 0x00000003},
1510 	{0x00000080, 0x00000000},
1511 	{0x00000081, 0x01000000},
1512 	{0x00000082, 0x02000000},
1513 	{0x00000083, 0x00000000},
1514 	{0x00000084, 0xe3f3e4f4},
1515 	{0x00000085, 0x00052024},
1516 	{0x00000087, 0x00000000},
1517 	{0x00000088, 0x66036603},
1518 	{0x00000089, 0x01000000},
1519 	{0x0000008b, 0x1c0a0000},
1520 	{0x0000008c, 0xff010000},
1521 	{0x0000008e, 0xffffefff},
1522 	{0x0000008f, 0xfff3efff},
1523 	{0x00000090, 0xfff3efbf},
1524 	{0x00000094, 0x00101101},
1525 	{0x00000095, 0x00000fff},
1526 	{0x00000096, 0x00116fff},
1527 	{0x00000097, 0x60010000},
1528 	{0x00000098, 0x10010000},
1529 	{0x00000099, 0x00006000},
1530 	{0x0000009a, 0x00001000},
1531 	{0x0000009f, 0x00a07730}
1532 };
1533 
1534 /* ucode loading */
1535 int si_mc_load_microcode(struct radeon_device *rdev)
1536 {
1537 	const __be32 *fw_data = NULL;
1538 	const __le32 *new_fw_data = NULL;
1539 	u32 running;
1540 	u32 *io_mc_regs = NULL;
1541 	const __le32 *new_io_mc_regs = NULL;
1542 	int i, regs_size, ucode_size;
1543 
1544 	if (!rdev->mc_fw)
1545 		return -EINVAL;
1546 
1547 	if (rdev->new_fw) {
1548 		const struct mc_firmware_header_v1_0 *hdr =
1549 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1550 
1551 		radeon_ucode_print_mc_hdr(&hdr->header);
1552 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1553 		new_io_mc_regs = (const __le32 *)
1554 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1555 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1556 		new_fw_data = (const __le32 *)
1557 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1558 	} else {
1559 		ucode_size = rdev->mc_fw->datasize / 4;
1560 
1561 		switch (rdev->family) {
1562 		case CHIP_TAHITI:
1563 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1564 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1565 			break;
1566 		case CHIP_PITCAIRN:
1567 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1568 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1569 			break;
1570 		case CHIP_VERDE:
1571 		default:
1572 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1573 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1574 			break;
1575 		case CHIP_OLAND:
1576 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1577 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1578 			break;
1579 		case CHIP_HAINAN:
1580 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1581 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1582 			break;
1583 		}
1584 		fw_data = (const __be32 *)rdev->mc_fw->data;
1585 	}
1586 
1587 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1588 
1589 	if (running == 0) {
1590 		/* reset the engine and set to writable */
1591 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1592 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1593 
1594 		/* load mc io regs */
1595 		for (i = 0; i < regs_size; i++) {
1596 			if (rdev->new_fw) {
1597 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1598 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1599 			} else {
1600 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1601 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1602 			}
1603 		}
1604 		/* load the MC ucode */
1605 		for (i = 0; i < ucode_size; i++) {
1606 			if (rdev->new_fw)
1607 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1608 			else
1609 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1610 		}
1611 
1612 		/* put the engine back into the active state */
1613 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1614 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1615 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1616 
1617 		/* wait for training to complete */
1618 		for (i = 0; i < rdev->usec_timeout; i++) {
1619 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1620 				break;
1621 			udelay(1);
1622 		}
1623 		for (i = 0; i < rdev->usec_timeout; i++) {
1624 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1625 				break;
1626 			udelay(1);
1627 		}
1628 	}
1629 
1630 	return 0;
1631 }
1632 
1633 static int si_init_microcode(struct radeon_device *rdev)
1634 {
1635 	const char *chip_name;
1636 	const char *new_chip_name;
1637 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1638 	size_t smc_req_size, mc2_req_size;
1639 	char fw_name[30];
1640 	int err;
1641 	int new_fw = 0;
1642 	bool new_smc = false;
1643 
1644 	DRM_DEBUG("\n");
1645 
1646 	switch (rdev->family) {
1647 	case CHIP_TAHITI:
1648 		chip_name = "TAHITI";
1649 		/* XXX: figure out which Tahitis need the new ucode */
1650 		if (0)
1651 			new_smc = true;
1652 		new_chip_name = "tahiti";
1653 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1654 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1655 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1656 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1657 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1658 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1659 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1660 		break;
1661 	case CHIP_PITCAIRN:
1662 		chip_name = "PITCAIRN";
1663 		if ((rdev->pdev->revision == 0x81) ||
1664 		    (rdev->pdev->device == 0x6810) ||
1665 		    (rdev->pdev->device == 0x6811) ||
1666 		    (rdev->pdev->device == 0x6816) ||
1667 		    (rdev->pdev->device == 0x6817) ||
1668 		    (rdev->pdev->device == 0x6806))
1669 			new_smc = true;
1670 		new_chip_name = "pitcairn";
1671 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1672 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1673 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1674 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1675 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1676 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1677 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1678 		break;
1679 	case CHIP_VERDE:
1680 		chip_name = "VERDE";
1681 		if ((rdev->pdev->revision == 0x81) ||
1682 		    (rdev->pdev->revision == 0x83) ||
1683 		    (rdev->pdev->revision == 0x87) ||
1684 		    (rdev->pdev->device == 0x6820) ||
1685 		    (rdev->pdev->device == 0x6821) ||
1686 		    (rdev->pdev->device == 0x6822) ||
1687 		    (rdev->pdev->device == 0x6823) ||
1688 		    (rdev->pdev->device == 0x682A) ||
1689 		    (rdev->pdev->device == 0x682B))
1690 			new_smc = true;
1691 		new_chip_name = "verde";
1692 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1693 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1694 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1695 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1696 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1697 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1698 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1699 		break;
1700 	case CHIP_OLAND:
1701 		chip_name = "OLAND";
1702 		if ((rdev->pdev->revision == 0xC7) ||
1703 		    (rdev->pdev->revision == 0x80) ||
1704 		    (rdev->pdev->revision == 0x81) ||
1705 		    (rdev->pdev->revision == 0x83) ||
1706 		    (rdev->pdev->device == 0x6604) ||
1707 		    (rdev->pdev->device == 0x6605))
1708 			new_smc = true;
1709 		new_chip_name = "oland";
1710 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1711 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1712 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1713 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1714 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1715 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1716 		break;
1717 	case CHIP_HAINAN:
1718 		chip_name = "HAINAN";
1719 		if ((rdev->pdev->revision == 0x81) ||
1720 		    (rdev->pdev->revision == 0x83) ||
1721 		    (rdev->pdev->revision == 0xC3) ||
1722 		    (rdev->pdev->device == 0x6664) ||
1723 		    (rdev->pdev->device == 0x6665) ||
1724 		    (rdev->pdev->device == 0x6667))
1725 			new_smc = true;
1726 		new_chip_name = "hainan";
1727 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1728 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1729 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1730 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1731 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1732 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1733 		break;
1734 	default: BUG();
1735 	}
1736 
1737 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1738 
1739 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1740 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1741 	if (err) {
1742 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1743 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1744 		if (err)
1745 			goto out;
1746 		if (rdev->pfp_fw->datasize != pfp_req_size) {
1747 			printk(KERN_ERR
1748 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1749 			       rdev->pfp_fw->datasize, fw_name);
1750 			err = -EINVAL;
1751 			goto out;
1752 		}
1753 	} else {
1754 		err = radeon_ucode_validate(rdev->pfp_fw);
1755 		if (err) {
1756 			printk(KERN_ERR
1757 			       "si_cp: validation failed for firmware \"%s\"\n",
1758 			       fw_name);
1759 			goto out;
1760 		} else {
1761 			new_fw++;
1762 		}
1763 	}
1764 
1765 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
1766 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1767 	if (err) {
1768 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1769 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1770 		if (err)
1771 			goto out;
1772 		if (rdev->me_fw->datasize != me_req_size) {
1773 			printk(KERN_ERR
1774 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1775 			       rdev->me_fw->datasize, fw_name);
1776 			err = -EINVAL;
1777 		}
1778 	} else {
1779 		err = radeon_ucode_validate(rdev->me_fw);
1780 		if (err) {
1781 			printk(KERN_ERR
1782 			       "si_cp: validation failed for firmware \"%s\"\n",
1783 			       fw_name);
1784 			goto out;
1785 		} else {
1786 			new_fw++;
1787 		}
1788 	}
1789 
1790 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
1791 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1792 	if (err) {
1793 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1794 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1795 		if (err)
1796 			goto out;
1797 		if (rdev->ce_fw->datasize != ce_req_size) {
1798 			printk(KERN_ERR
1799 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1800 			       rdev->ce_fw->datasize, fw_name);
1801 			err = -EINVAL;
1802 		}
1803 	} else {
1804 		err = radeon_ucode_validate(rdev->ce_fw);
1805 		if (err) {
1806 			printk(KERN_ERR
1807 			       "si_cp: validation failed for firmware \"%s\"\n",
1808 			       fw_name);
1809 			goto out;
1810 		} else {
1811 			new_fw++;
1812 		}
1813 	}
1814 
1815 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
1816 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1817 	if (err) {
1818 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
1819 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1820 		if (err)
1821 			goto out;
1822 		if (rdev->rlc_fw->datasize != rlc_req_size) {
1823 			printk(KERN_ERR
1824 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1825 			       rdev->rlc_fw->datasize, fw_name);
1826 			err = -EINVAL;
1827 		}
1828 	} else {
1829 		err = radeon_ucode_validate(rdev->rlc_fw);
1830 		if (err) {
1831 			printk(KERN_ERR
1832 			       "si_cp: validation failed for firmware \"%s\"\n",
1833 			       fw_name);
1834 			goto out;
1835 		} else {
1836 			new_fw++;
1837 		}
1838 	}
1839 
1840 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
1841 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1842 	if (err) {
1843 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
1844 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1845 		if (err) {
1846 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1847 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1848 			if (err)
1849 				goto out;
1850 		}
1851 		if ((rdev->mc_fw->datasize != mc_req_size) &&
1852 		    (rdev->mc_fw->datasize != mc2_req_size)) {
1853 			printk(KERN_ERR
1854 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1855 			       rdev->mc_fw->datasize, fw_name);
1856 			err = -EINVAL;
1857 		}
1858 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
1859 	} else {
1860 		err = radeon_ucode_validate(rdev->mc_fw);
1861 		if (err) {
1862 			printk(KERN_ERR
1863 			       "si_cp: validation failed for firmware \"%s\"\n",
1864 			       fw_name);
1865 			goto out;
1866 		} else {
1867 			new_fw++;
1868 		}
1869 	}
1870 
1871 	if (new_smc)
1872 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_k_smc", new_chip_name);
1873 	else
1874 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
1875 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1876 	if (err) {
1877 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1878 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1879 		if (err) {
1880 			printk(KERN_ERR
1881 			       "smc: error loading firmware \"%s\"\n",
1882 			       fw_name);
1883 			release_firmware(rdev->smc_fw);
1884 			rdev->smc_fw = NULL;
1885 			err = 0;
1886 		} else if (rdev->smc_fw->datasize != smc_req_size) {
1887 			printk(KERN_ERR
1888 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1889 			       rdev->smc_fw->datasize, fw_name);
1890 			err = -EINVAL;
1891 		}
1892 	} else {
1893 		err = radeon_ucode_validate(rdev->smc_fw);
1894 		if (err) {
1895 			printk(KERN_ERR
1896 			       "si_cp: validation failed for firmware \"%s\"\n",
1897 			       fw_name);
1898 			goto out;
1899 		} else {
1900 			new_fw++;
1901 		}
1902 	}
1903 
1904 	if (new_fw == 0) {
1905 		rdev->new_fw = false;
1906 	} else if (new_fw < 6) {
1907 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1908 		err = -EINVAL;
1909 	} else {
1910 		rdev->new_fw = true;
1911 	}
1912 out:
1913 	if (err) {
1914 		if (err != -EINVAL)
1915 			printk(KERN_ERR
1916 			       "si_cp: Failed to load firmware \"%s\"\n",
1917 			       fw_name);
1918 		release_firmware(rdev->pfp_fw);
1919 		rdev->pfp_fw = NULL;
1920 		release_firmware(rdev->me_fw);
1921 		rdev->me_fw = NULL;
1922 		release_firmware(rdev->ce_fw);
1923 		rdev->ce_fw = NULL;
1924 		release_firmware(rdev->rlc_fw);
1925 		rdev->rlc_fw = NULL;
1926 		release_firmware(rdev->mc_fw);
1927 		rdev->mc_fw = NULL;
1928 		release_firmware(rdev->smc_fw);
1929 		rdev->smc_fw = NULL;
1930 	}
1931 	return err;
1932 }
1933 
1934 /**
1935  * si_fini_microcode - drop the firmwares image references
1936  *
1937  * @rdev: radeon_device pointer
1938  *
1939  * Drop the pfp, me, rlc, mc and ce firmware image references.
1940  * Called at driver shutdown.
1941  */
1942 static void si_fini_microcode(struct radeon_device *rdev)
1943 {
1944 	release_firmware(rdev->pfp_fw);
1945 	rdev->pfp_fw = NULL;
1946 	release_firmware(rdev->me_fw);
1947 	rdev->me_fw = NULL;
1948 	release_firmware(rdev->rlc_fw);
1949 	rdev->rlc_fw = NULL;
1950 	release_firmware(rdev->mc_fw);
1951 	rdev->mc_fw = NULL;
1952 	release_firmware(rdev->smc_fw);
1953 	rdev->smc_fw = NULL;
1954 	release_firmware(rdev->ce_fw);
1955 	rdev->ce_fw = NULL;
1956 }
1957 
1958 /* watermark setup */
1959 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1960 				   struct radeon_crtc *radeon_crtc,
1961 				   struct drm_display_mode *mode,
1962 				   struct drm_display_mode *other_mode)
1963 {
1964 	u32 tmp, buffer_alloc, i;
1965 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1966 	/*
1967 	 * Line Buffer Setup
1968 	 * There are 3 line buffers, each one shared by 2 display controllers.
1969 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1970 	 * the display controllers.  The paritioning is done via one of four
1971 	 * preset allocations specified in bits 21:20:
1972 	 *  0 - half lb
1973 	 *  2 - whole lb, other crtc must be disabled
1974 	 */
1975 	/* this can get tricky if we have two large displays on a paired group
1976 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1977 	 * non-linked crtcs for maximum line buffer allocation.
1978 	 */
1979 	if (radeon_crtc->base.enabled && mode) {
1980 		if (other_mode) {
1981 			tmp = 0; /* 1/2 */
1982 			buffer_alloc = 1;
1983 		} else {
1984 			tmp = 2; /* whole */
1985 			buffer_alloc = 2;
1986 		}
1987 	} else {
1988 		tmp = 0;
1989 		buffer_alloc = 0;
1990 	}
1991 
1992 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1993 	       DC_LB_MEMORY_CONFIG(tmp));
1994 
1995 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1996 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1997 	for (i = 0; i < rdev->usec_timeout; i++) {
1998 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1999 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
2000 			break;
2001 		udelay(1);
2002 	}
2003 
2004 	if (radeon_crtc->base.enabled && mode) {
2005 		switch (tmp) {
2006 		case 0:
2007 		default:
2008 			return 4096 * 2;
2009 		case 2:
2010 			return 8192 * 2;
2011 		}
2012 	}
2013 
2014 	/* controller not enabled, so no lb used */
2015 	return 0;
2016 }
2017 
2018 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2019 {
2020 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2021 
2022 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2023 	case 0:
2024 	default:
2025 		return 1;
2026 	case 1:
2027 		return 2;
2028 	case 2:
2029 		return 4;
2030 	case 3:
2031 		return 8;
2032 	case 4:
2033 		return 3;
2034 	case 5:
2035 		return 6;
2036 	case 6:
2037 		return 10;
2038 	case 7:
2039 		return 12;
2040 	case 8:
2041 		return 16;
2042 	}
2043 }
2044 
2045 struct dce6_wm_params {
2046 	u32 dram_channels; /* number of dram channels */
2047 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2048 	u32 sclk;          /* engine clock in kHz */
2049 	u32 disp_clk;      /* display clock in kHz */
2050 	u32 src_width;     /* viewport width */
2051 	u32 active_time;   /* active display time in ns */
2052 	u32 blank_time;    /* blank time in ns */
2053 	bool interlaced;    /* mode is interlaced */
2054 	fixed20_12 vsc;    /* vertical scale ratio */
2055 	u32 num_heads;     /* number of active crtcs */
2056 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2057 	u32 lb_size;       /* line buffer allocated to pipe */
2058 	u32 vtaps;         /* vertical scaler taps */
2059 };
2060 
2061 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2062 {
2063 	/* Calculate raw DRAM Bandwidth */
2064 	fixed20_12 dram_efficiency; /* 0.7 */
2065 	fixed20_12 yclk, dram_channels, bandwidth;
2066 	fixed20_12 a;
2067 
2068 	a.full = dfixed_const(1000);
2069 	yclk.full = dfixed_const(wm->yclk);
2070 	yclk.full = dfixed_div(yclk, a);
2071 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2072 	a.full = dfixed_const(10);
2073 	dram_efficiency.full = dfixed_const(7);
2074 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2075 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2076 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2077 
2078 	return dfixed_trunc(bandwidth);
2079 }
2080 
2081 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2082 {
2083 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2084 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2085 	fixed20_12 yclk, dram_channels, bandwidth;
2086 	fixed20_12 a;
2087 
2088 	a.full = dfixed_const(1000);
2089 	yclk.full = dfixed_const(wm->yclk);
2090 	yclk.full = dfixed_div(yclk, a);
2091 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2092 	a.full = dfixed_const(10);
2093 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2094 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2095 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2096 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2097 
2098 	return dfixed_trunc(bandwidth);
2099 }
2100 
2101 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2102 {
2103 	/* Calculate the display Data return Bandwidth */
2104 	fixed20_12 return_efficiency; /* 0.8 */
2105 	fixed20_12 sclk, bandwidth;
2106 	fixed20_12 a;
2107 
2108 	a.full = dfixed_const(1000);
2109 	sclk.full = dfixed_const(wm->sclk);
2110 	sclk.full = dfixed_div(sclk, a);
2111 	a.full = dfixed_const(10);
2112 	return_efficiency.full = dfixed_const(8);
2113 	return_efficiency.full = dfixed_div(return_efficiency, a);
2114 	a.full = dfixed_const(32);
2115 	bandwidth.full = dfixed_mul(a, sclk);
2116 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2117 
2118 	return dfixed_trunc(bandwidth);
2119 }
2120 
2121 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2122 {
2123 	return 32;
2124 }
2125 
2126 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2127 {
2128 	/* Calculate the DMIF Request Bandwidth */
2129 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2130 	fixed20_12 disp_clk, sclk, bandwidth;
2131 	fixed20_12 a, b1, b2;
2132 	u32 min_bandwidth;
2133 
2134 	a.full = dfixed_const(1000);
2135 	disp_clk.full = dfixed_const(wm->disp_clk);
2136 	disp_clk.full = dfixed_div(disp_clk, a);
2137 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2138 	b1.full = dfixed_mul(a, disp_clk);
2139 
2140 	a.full = dfixed_const(1000);
2141 	sclk.full = dfixed_const(wm->sclk);
2142 	sclk.full = dfixed_div(sclk, a);
2143 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2144 	b2.full = dfixed_mul(a, sclk);
2145 
2146 	a.full = dfixed_const(10);
2147 	disp_clk_request_efficiency.full = dfixed_const(8);
2148 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2149 
2150 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2151 
2152 	a.full = dfixed_const(min_bandwidth);
2153 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2154 
2155 	return dfixed_trunc(bandwidth);
2156 }
2157 
2158 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2159 {
2160 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2161 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2162 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2163 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2164 
2165 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2166 }
2167 
2168 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2169 {
2170 	/* Calculate the display mode Average Bandwidth
2171 	 * DisplayMode should contain the source and destination dimensions,
2172 	 * timing, etc.
2173 	 */
2174 	fixed20_12 bpp;
2175 	fixed20_12 line_time;
2176 	fixed20_12 src_width;
2177 	fixed20_12 bandwidth;
2178 	fixed20_12 a;
2179 
2180 	a.full = dfixed_const(1000);
2181 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2182 	line_time.full = dfixed_div(line_time, a);
2183 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2184 	src_width.full = dfixed_const(wm->src_width);
2185 	bandwidth.full = dfixed_mul(src_width, bpp);
2186 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2187 	bandwidth.full = dfixed_div(bandwidth, line_time);
2188 
2189 	return dfixed_trunc(bandwidth);
2190 }
2191 
2192 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2193 {
2194 	/* First calcualte the latency in ns */
2195 	u32 mc_latency = 2000; /* 2000 ns. */
2196 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2197 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2198 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2199 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2200 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2201 		(wm->num_heads * cursor_line_pair_return_time);
2202 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2203 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2204 	u32 tmp, dmif_size = 12288;
2205 	fixed20_12 a, b, c;
2206 
2207 	if (wm->num_heads == 0)
2208 		return 0;
2209 
2210 	a.full = dfixed_const(2);
2211 	b.full = dfixed_const(1);
2212 	if ((wm->vsc.full > a.full) ||
2213 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2214 	    (wm->vtaps >= 5) ||
2215 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2216 		max_src_lines_per_dst_line = 4;
2217 	else
2218 		max_src_lines_per_dst_line = 2;
2219 
2220 	a.full = dfixed_const(available_bandwidth);
2221 	b.full = dfixed_const(wm->num_heads);
2222 	a.full = dfixed_div(a, b);
2223 
2224 	b.full = dfixed_const(mc_latency + 512);
2225 	c.full = dfixed_const(wm->disp_clk);
2226 	b.full = dfixed_div(b, c);
2227 
2228 	c.full = dfixed_const(dmif_size);
2229 	b.full = dfixed_div(c, b);
2230 
2231 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2232 
2233 	b.full = dfixed_const(1000);
2234 	c.full = dfixed_const(wm->disp_clk);
2235 	b.full = dfixed_div(c, b);
2236 	c.full = dfixed_const(wm->bytes_per_pixel);
2237 	b.full = dfixed_mul(b, c);
2238 
2239 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2240 
2241 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2242 	b.full = dfixed_const(1000);
2243 	c.full = dfixed_const(lb_fill_bw);
2244 	b.full = dfixed_div(c, b);
2245 	a.full = dfixed_div(a, b);
2246 	line_fill_time = dfixed_trunc(a);
2247 
2248 	if (line_fill_time < wm->active_time)
2249 		return latency;
2250 	else
2251 		return latency + (line_fill_time - wm->active_time);
2252 
2253 }
2254 
2255 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2256 {
2257 	if (dce6_average_bandwidth(wm) <=
2258 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2259 		return true;
2260 	else
2261 		return false;
2262 };
2263 
2264 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2265 {
2266 	if (dce6_average_bandwidth(wm) <=
2267 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2268 		return true;
2269 	else
2270 		return false;
2271 };
2272 
2273 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2274 {
2275 	u32 lb_partitions = wm->lb_size / wm->src_width;
2276 	u32 line_time = wm->active_time + wm->blank_time;
2277 	u32 latency_tolerant_lines;
2278 	u32 latency_hiding;
2279 	fixed20_12 a;
2280 
2281 	a.full = dfixed_const(1);
2282 	if (wm->vsc.full > a.full)
2283 		latency_tolerant_lines = 1;
2284 	else {
2285 		if (lb_partitions <= (wm->vtaps + 1))
2286 			latency_tolerant_lines = 1;
2287 		else
2288 			latency_tolerant_lines = 2;
2289 	}
2290 
2291 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2292 
2293 	if (dce6_latency_watermark(wm) <= latency_hiding)
2294 		return true;
2295 	else
2296 		return false;
2297 }
2298 
2299 static void dce6_program_watermarks(struct radeon_device *rdev,
2300 					 struct radeon_crtc *radeon_crtc,
2301 					 u32 lb_size, u32 num_heads)
2302 {
2303 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2304 	struct dce6_wm_params wm_low, wm_high;
2305 	u32 dram_channels;
2306 	u32 pixel_period;
2307 	u32 line_time = 0;
2308 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2309 	u32 priority_a_mark = 0, priority_b_mark = 0;
2310 	u32 priority_a_cnt = PRIORITY_OFF;
2311 	u32 priority_b_cnt = PRIORITY_OFF;
2312 	u32 tmp, arb_control3;
2313 	fixed20_12 a, b, c;
2314 
2315 	if (radeon_crtc->base.enabled && num_heads && mode) {
2316 		pixel_period = 1000000 / (u32)mode->clock;
2317 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2318 		priority_a_cnt = 0;
2319 		priority_b_cnt = 0;
2320 
2321 		if (rdev->family == CHIP_ARUBA)
2322 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2323 		else
2324 			dram_channels = si_get_number_of_dram_channels(rdev);
2325 
2326 		/* watermark for high clocks */
2327 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2328 			wm_high.yclk =
2329 				radeon_dpm_get_mclk(rdev, false) * 10;
2330 			wm_high.sclk =
2331 				radeon_dpm_get_sclk(rdev, false) * 10;
2332 		} else {
2333 			wm_high.yclk = rdev->pm.current_mclk * 10;
2334 			wm_high.sclk = rdev->pm.current_sclk * 10;
2335 		}
2336 
2337 		wm_high.disp_clk = mode->clock;
2338 		wm_high.src_width = mode->crtc_hdisplay;
2339 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2340 		wm_high.blank_time = line_time - wm_high.active_time;
2341 		wm_high.interlaced = false;
2342 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2343 			wm_high.interlaced = true;
2344 		wm_high.vsc = radeon_crtc->vsc;
2345 		wm_high.vtaps = 1;
2346 		if (radeon_crtc->rmx_type != RMX_OFF)
2347 			wm_high.vtaps = 2;
2348 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2349 		wm_high.lb_size = lb_size;
2350 		wm_high.dram_channels = dram_channels;
2351 		wm_high.num_heads = num_heads;
2352 
2353 		/* watermark for low clocks */
2354 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2355 			wm_low.yclk =
2356 				radeon_dpm_get_mclk(rdev, true) * 10;
2357 			wm_low.sclk =
2358 				radeon_dpm_get_sclk(rdev, true) * 10;
2359 		} else {
2360 			wm_low.yclk = rdev->pm.current_mclk * 10;
2361 			wm_low.sclk = rdev->pm.current_sclk * 10;
2362 		}
2363 
2364 		wm_low.disp_clk = mode->clock;
2365 		wm_low.src_width = mode->crtc_hdisplay;
2366 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2367 		wm_low.blank_time = line_time - wm_low.active_time;
2368 		wm_low.interlaced = false;
2369 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2370 			wm_low.interlaced = true;
2371 		wm_low.vsc = radeon_crtc->vsc;
2372 		wm_low.vtaps = 1;
2373 		if (radeon_crtc->rmx_type != RMX_OFF)
2374 			wm_low.vtaps = 2;
2375 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2376 		wm_low.lb_size = lb_size;
2377 		wm_low.dram_channels = dram_channels;
2378 		wm_low.num_heads = num_heads;
2379 
2380 		/* set for high clocks */
2381 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2382 		/* set for low clocks */
2383 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2384 
2385 		/* possibly force display priority to high */
2386 		/* should really do this at mode validation time... */
2387 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2388 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2389 		    !dce6_check_latency_hiding(&wm_high) ||
2390 		    (rdev->disp_priority == 2)) {
2391 			DRM_DEBUG_KMS("force priority to high\n");
2392 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2393 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2394 		}
2395 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2396 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2397 		    !dce6_check_latency_hiding(&wm_low) ||
2398 		    (rdev->disp_priority == 2)) {
2399 			DRM_DEBUG_KMS("force priority to high\n");
2400 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2401 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2402 		}
2403 
2404 		a.full = dfixed_const(1000);
2405 		b.full = dfixed_const(mode->clock);
2406 		b.full = dfixed_div(b, a);
2407 		c.full = dfixed_const(latency_watermark_a);
2408 		c.full = dfixed_mul(c, b);
2409 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2410 		c.full = dfixed_div(c, a);
2411 		a.full = dfixed_const(16);
2412 		c.full = dfixed_div(c, a);
2413 		priority_a_mark = dfixed_trunc(c);
2414 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2415 
2416 		a.full = dfixed_const(1000);
2417 		b.full = dfixed_const(mode->clock);
2418 		b.full = dfixed_div(b, a);
2419 		c.full = dfixed_const(latency_watermark_b);
2420 		c.full = dfixed_mul(c, b);
2421 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2422 		c.full = dfixed_div(c, a);
2423 		a.full = dfixed_const(16);
2424 		c.full = dfixed_div(c, a);
2425 		priority_b_mark = dfixed_trunc(c);
2426 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2427 
2428 		/* Save number of lines the linebuffer leads before the scanout */
2429 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2430 	}
2431 
2432 	/* select wm A */
2433 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2434 	tmp = arb_control3;
2435 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2436 	tmp |= LATENCY_WATERMARK_MASK(1);
2437 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2438 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2439 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2440 		LATENCY_HIGH_WATERMARK(line_time)));
2441 	/* select wm B */
2442 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2443 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2444 	tmp |= LATENCY_WATERMARK_MASK(2);
2445 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2446 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2447 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2448 		LATENCY_HIGH_WATERMARK(line_time)));
2449 	/* restore original selection */
2450 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2451 
2452 	/* write the priority marks */
2453 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2454 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2455 
2456 	/* save values for DPM */
2457 	radeon_crtc->line_time = line_time;
2458 	radeon_crtc->wm_high = latency_watermark_a;
2459 	radeon_crtc->wm_low = latency_watermark_b;
2460 }
2461 
2462 void dce6_bandwidth_update(struct radeon_device *rdev)
2463 {
2464 	struct drm_display_mode *mode0 = NULL;
2465 	struct drm_display_mode *mode1 = NULL;
2466 	u32 num_heads = 0, lb_size;
2467 	int i;
2468 
2469 	if (!rdev->mode_info.mode_config_initialized)
2470 		return;
2471 
2472 	radeon_update_display_priority(rdev);
2473 
2474 	for (i = 0; i < rdev->num_crtc; i++) {
2475 		if (rdev->mode_info.crtcs[i]->base.enabled)
2476 			num_heads++;
2477 	}
2478 	for (i = 0; i < rdev->num_crtc; i += 2) {
2479 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2480 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2481 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2482 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2483 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2484 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2485 	}
2486 }
2487 
2488 /*
2489  * Core functions
2490  */
2491 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2492 {
2493 	u32 *tile = rdev->config.si.tile_mode_array;
2494 	const u32 num_tile_mode_states =
2495 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2496 	u32 reg_offset, split_equal_to_row_size;
2497 
2498 	switch (rdev->config.si.mem_row_size_in_kb) {
2499 	case 1:
2500 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2501 		break;
2502 	case 2:
2503 	default:
2504 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2505 		break;
2506 	case 4:
2507 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2508 		break;
2509 	}
2510 
2511 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2512 		tile[reg_offset] = 0;
2513 
2514 	switch(rdev->family) {
2515 	case CHIP_TAHITI:
2516 	case CHIP_PITCAIRN:
2517 		/* non-AA compressed depth or any compressed stencil */
2518 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2520 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2521 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2522 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2523 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2525 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2526 		/* 2xAA/4xAA compressed depth only */
2527 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2528 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2529 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2530 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2531 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2532 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2534 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2535 		/* 8xAA compressed depth only */
2536 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2539 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2540 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2541 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2542 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2543 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2544 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2545 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2547 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2548 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2549 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2550 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2552 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2553 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2554 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2556 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2557 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2558 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2559 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2561 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2562 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2563 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2565 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2566 			   TILE_SPLIT(split_equal_to_row_size) |
2567 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2568 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2570 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2571 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2572 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2574 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2575 			   TILE_SPLIT(split_equal_to_row_size) |
2576 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2577 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2579 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2580 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2581 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2583 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2584 			   TILE_SPLIT(split_equal_to_row_size) |
2585 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2586 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2588 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2589 		/* 1D and 1D Array Surfaces */
2590 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2591 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2592 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2593 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2594 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2595 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2597 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2598 		/* Displayable maps. */
2599 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2600 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2601 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2602 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2603 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2604 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2606 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2607 		/* Display 8bpp. */
2608 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2609 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2610 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2611 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2612 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2613 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2615 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2616 		/* Display 16bpp. */
2617 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2619 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2620 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2621 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2622 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2624 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2625 		/* Display 32bpp. */
2626 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2627 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2628 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2629 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2630 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2631 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2634 		/* Thin. */
2635 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2637 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2638 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2639 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2640 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2642 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2643 		/* Thin 8 bpp. */
2644 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2646 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2647 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2648 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2649 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2651 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2652 		/* Thin 16 bpp. */
2653 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2655 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2656 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2657 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2658 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2660 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2661 		/* Thin 32 bpp. */
2662 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2663 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2664 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2665 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2666 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2667 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2670 		/* Thin 64 bpp. */
2671 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2673 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2674 			   TILE_SPLIT(split_equal_to_row_size) |
2675 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2676 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2679 		/* 8 bpp PRT. */
2680 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2682 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2683 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2684 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2685 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2686 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2687 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2688 		/* 16 bpp PRT */
2689 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2691 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2692 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2693 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2694 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2695 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2696 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2697 		/* 32 bpp PRT */
2698 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2700 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2701 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2702 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2703 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2705 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2706 		/* 64 bpp PRT */
2707 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2709 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2710 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2711 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2712 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2714 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2715 		/* 128 bpp PRT */
2716 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2717 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2718 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2719 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2720 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2721 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2723 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2724 
2725 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2726 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2727 		break;
2728 
2729 	case CHIP_VERDE:
2730 	case CHIP_OLAND:
2731 	case CHIP_HAINAN:
2732 		/* non-AA compressed depth or any compressed stencil */
2733 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2735 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2737 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2738 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2741 		/* 2xAA/4xAA compressed depth only */
2742 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2743 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2744 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2745 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2746 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2747 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2749 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2750 		/* 8xAA compressed depth only */
2751 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2752 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2753 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2755 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2756 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2758 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2759 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2760 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2764 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2765 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2767 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2768 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2769 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2770 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2771 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2773 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2774 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2776 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2777 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2778 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2779 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2780 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2781 			   TILE_SPLIT(split_equal_to_row_size) |
2782 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2783 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2784 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2785 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2786 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2787 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2789 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2790 			   TILE_SPLIT(split_equal_to_row_size) |
2791 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2792 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2795 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2796 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799 			   TILE_SPLIT(split_equal_to_row_size) |
2800 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2801 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2803 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2804 		/* 1D and 1D Array Surfaces */
2805 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2806 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2807 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2808 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2809 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2810 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2811 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2812 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2813 		/* Displayable maps. */
2814 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2816 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2818 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2819 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2821 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2822 		/* Display 8bpp. */
2823 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2824 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2826 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2827 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2828 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2830 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2831 		/* Display 16bpp. */
2832 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2833 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2834 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2835 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2836 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2837 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2839 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2840 		/* Display 32bpp. */
2841 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2843 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2844 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2845 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2846 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2849 		/* Thin. */
2850 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2851 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2852 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2853 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2854 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2855 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2857 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2858 		/* Thin 8 bpp. */
2859 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2861 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2862 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2863 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2864 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2867 		/* Thin 16 bpp. */
2868 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2869 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2870 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2871 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2872 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2873 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2875 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2876 		/* Thin 32 bpp. */
2877 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2879 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2880 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2881 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2882 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2885 		/* Thin 64 bpp. */
2886 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2887 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2888 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2889 			   TILE_SPLIT(split_equal_to_row_size) |
2890 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2891 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2893 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2894 		/* 8 bpp PRT. */
2895 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2897 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2898 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2899 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2900 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2901 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2902 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2903 		/* 16 bpp PRT */
2904 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2906 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2907 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2908 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2909 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2910 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2911 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2912 		/* 32 bpp PRT */
2913 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2915 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2916 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2917 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2918 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2920 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2921 		/* 64 bpp PRT */
2922 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2923 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2924 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2925 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2926 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2927 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2928 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2929 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2930 		/* 128 bpp PRT */
2931 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2932 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2933 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2934 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2935 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2936 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2938 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2939 
2940 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2941 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2942 		break;
2943 
2944 	default:
2945 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2946 	}
2947 }
2948 
2949 static void si_select_se_sh(struct radeon_device *rdev,
2950 			    u32 se_num, u32 sh_num)
2951 {
2952 	u32 data = INSTANCE_BROADCAST_WRITES;
2953 
2954 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2955 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2956 	else if (se_num == 0xffffffff)
2957 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2958 	else if (sh_num == 0xffffffff)
2959 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2960 	else
2961 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2962 	WREG32(GRBM_GFX_INDEX, data);
2963 }
2964 
2965 static u32 si_create_bitmask(u32 bit_width)
2966 {
2967 	u32 i, mask = 0;
2968 
2969 	for (i = 0; i < bit_width; i++) {
2970 		mask <<= 1;
2971 		mask |= 1;
2972 	}
2973 	return mask;
2974 }
2975 
2976 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2977 {
2978 	u32 data, mask;
2979 
2980 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2981 	if (data & 1)
2982 		data &= INACTIVE_CUS_MASK;
2983 	else
2984 		data = 0;
2985 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2986 
2987 	data >>= INACTIVE_CUS_SHIFT;
2988 
2989 	mask = si_create_bitmask(cu_per_sh);
2990 
2991 	return ~data & mask;
2992 }
2993 
2994 static void si_setup_spi(struct radeon_device *rdev,
2995 			 u32 se_num, u32 sh_per_se,
2996 			 u32 cu_per_sh)
2997 {
2998 	int i, j, k;
2999 	u32 data, mask, active_cu;
3000 
3001 	for (i = 0; i < se_num; i++) {
3002 		for (j = 0; j < sh_per_se; j++) {
3003 			si_select_se_sh(rdev, i, j);
3004 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3005 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3006 
3007 			mask = 1;
3008 			for (k = 0; k < 16; k++) {
3009 				mask <<= k;
3010 				if (active_cu & mask) {
3011 					data &= ~mask;
3012 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3013 					break;
3014 				}
3015 			}
3016 		}
3017 	}
3018 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3019 }
3020 
3021 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3022 			      u32 max_rb_num_per_se,
3023 			      u32 sh_per_se)
3024 {
3025 	u32 data, mask;
3026 
3027 	data = RREG32(CC_RB_BACKEND_DISABLE);
3028 	if (data & 1)
3029 		data &= BACKEND_DISABLE_MASK;
3030 	else
3031 		data = 0;
3032 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3033 
3034 	data >>= BACKEND_DISABLE_SHIFT;
3035 
3036 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3037 
3038 	return data & mask;
3039 }
3040 
3041 static void si_setup_rb(struct radeon_device *rdev,
3042 			u32 se_num, u32 sh_per_se,
3043 			u32 max_rb_num_per_se)
3044 {
3045 	int i, j;
3046 	u32 data, mask;
3047 	u32 disabled_rbs = 0;
3048 	u32 enabled_rbs = 0;
3049 
3050 	for (i = 0; i < se_num; i++) {
3051 		for (j = 0; j < sh_per_se; j++) {
3052 			si_select_se_sh(rdev, i, j);
3053 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3054 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3055 		}
3056 	}
3057 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3058 
3059 	mask = 1;
3060 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3061 		if (!(disabled_rbs & mask))
3062 			enabled_rbs |= mask;
3063 		mask <<= 1;
3064 	}
3065 
3066 	rdev->config.si.backend_enable_mask = enabled_rbs;
3067 
3068 	for (i = 0; i < se_num; i++) {
3069 		si_select_se_sh(rdev, i, 0xffffffff);
3070 		data = 0;
3071 		for (j = 0; j < sh_per_se; j++) {
3072 			switch (enabled_rbs & 3) {
3073 			case 1:
3074 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3075 				break;
3076 			case 2:
3077 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3078 				break;
3079 			case 3:
3080 			default:
3081 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3082 				break;
3083 			}
3084 			enabled_rbs >>= 2;
3085 		}
3086 		WREG32(PA_SC_RASTER_CONFIG, data);
3087 	}
3088 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3089 }
3090 
3091 static void si_gpu_init(struct radeon_device *rdev)
3092 {
3093 	u32 gb_addr_config = 0;
3094 	u32 mc_shared_chmap, mc_arb_ramcfg;
3095 	u32 sx_debug_1;
3096 	u32 hdp_host_path_cntl;
3097 	u32 tmp;
3098 	int i, j;
3099 
3100 	switch (rdev->family) {
3101 	case CHIP_TAHITI:
3102 		rdev->config.si.max_shader_engines = 2;
3103 		rdev->config.si.max_tile_pipes = 12;
3104 		rdev->config.si.max_cu_per_sh = 8;
3105 		rdev->config.si.max_sh_per_se = 2;
3106 		rdev->config.si.max_backends_per_se = 4;
3107 		rdev->config.si.max_texture_channel_caches = 12;
3108 		rdev->config.si.max_gprs = 256;
3109 		rdev->config.si.max_gs_threads = 32;
3110 		rdev->config.si.max_hw_contexts = 8;
3111 
3112 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3113 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3114 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3115 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3116 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3117 		break;
3118 	case CHIP_PITCAIRN:
3119 		rdev->config.si.max_shader_engines = 2;
3120 		rdev->config.si.max_tile_pipes = 8;
3121 		rdev->config.si.max_cu_per_sh = 5;
3122 		rdev->config.si.max_sh_per_se = 2;
3123 		rdev->config.si.max_backends_per_se = 4;
3124 		rdev->config.si.max_texture_channel_caches = 8;
3125 		rdev->config.si.max_gprs = 256;
3126 		rdev->config.si.max_gs_threads = 32;
3127 		rdev->config.si.max_hw_contexts = 8;
3128 
3129 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3130 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3131 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3132 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3133 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3134 		break;
3135 	case CHIP_VERDE:
3136 	default:
3137 		rdev->config.si.max_shader_engines = 1;
3138 		rdev->config.si.max_tile_pipes = 4;
3139 		rdev->config.si.max_cu_per_sh = 5;
3140 		rdev->config.si.max_sh_per_se = 2;
3141 		rdev->config.si.max_backends_per_se = 4;
3142 		rdev->config.si.max_texture_channel_caches = 4;
3143 		rdev->config.si.max_gprs = 256;
3144 		rdev->config.si.max_gs_threads = 32;
3145 		rdev->config.si.max_hw_contexts = 8;
3146 
3147 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3148 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3149 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3150 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3151 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3152 		break;
3153 	case CHIP_OLAND:
3154 		rdev->config.si.max_shader_engines = 1;
3155 		rdev->config.si.max_tile_pipes = 4;
3156 		rdev->config.si.max_cu_per_sh = 6;
3157 		rdev->config.si.max_sh_per_se = 1;
3158 		rdev->config.si.max_backends_per_se = 2;
3159 		rdev->config.si.max_texture_channel_caches = 4;
3160 		rdev->config.si.max_gprs = 256;
3161 		rdev->config.si.max_gs_threads = 16;
3162 		rdev->config.si.max_hw_contexts = 8;
3163 
3164 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3165 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3166 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3167 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3168 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3169 		break;
3170 	case CHIP_HAINAN:
3171 		rdev->config.si.max_shader_engines = 1;
3172 		rdev->config.si.max_tile_pipes = 4;
3173 		rdev->config.si.max_cu_per_sh = 5;
3174 		rdev->config.si.max_sh_per_se = 1;
3175 		rdev->config.si.max_backends_per_se = 1;
3176 		rdev->config.si.max_texture_channel_caches = 2;
3177 		rdev->config.si.max_gprs = 256;
3178 		rdev->config.si.max_gs_threads = 16;
3179 		rdev->config.si.max_hw_contexts = 8;
3180 
3181 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3182 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3183 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3184 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3185 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3186 		break;
3187 	}
3188 
3189 	/* Initialize HDP */
3190 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3191 		WREG32((0x2c14 + j), 0x00000000);
3192 		WREG32((0x2c18 + j), 0x00000000);
3193 		WREG32((0x2c1c + j), 0x00000000);
3194 		WREG32((0x2c20 + j), 0x00000000);
3195 		WREG32((0x2c24 + j), 0x00000000);
3196 	}
3197 
3198 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3199 	WREG32(SRBM_INT_CNTL, 1);
3200 	WREG32(SRBM_INT_ACK, 1);
3201 
3202 	evergreen_fix_pci_max_read_req_size(rdev);
3203 
3204 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3205 
3206 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3207 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3208 
3209 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3210 	rdev->config.si.mem_max_burst_length_bytes = 256;
3211 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3212 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3213 	if (rdev->config.si.mem_row_size_in_kb > 4)
3214 		rdev->config.si.mem_row_size_in_kb = 4;
3215 	/* XXX use MC settings? */
3216 	rdev->config.si.shader_engine_tile_size = 32;
3217 	rdev->config.si.num_gpus = 1;
3218 	rdev->config.si.multi_gpu_tile_size = 64;
3219 
3220 	/* fix up row size */
3221 	gb_addr_config &= ~ROW_SIZE_MASK;
3222 	switch (rdev->config.si.mem_row_size_in_kb) {
3223 	case 1:
3224 	default:
3225 		gb_addr_config |= ROW_SIZE(0);
3226 		break;
3227 	case 2:
3228 		gb_addr_config |= ROW_SIZE(1);
3229 		break;
3230 	case 4:
3231 		gb_addr_config |= ROW_SIZE(2);
3232 		break;
3233 	}
3234 
3235 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3236 	 * not have bank info, so create a custom tiling dword.
3237 	 * bits 3:0   num_pipes
3238 	 * bits 7:4   num_banks
3239 	 * bits 11:8  group_size
3240 	 * bits 15:12 row_size
3241 	 */
3242 	rdev->config.si.tile_config = 0;
3243 	switch (rdev->config.si.num_tile_pipes) {
3244 	case 1:
3245 		rdev->config.si.tile_config |= (0 << 0);
3246 		break;
3247 	case 2:
3248 		rdev->config.si.tile_config |= (1 << 0);
3249 		break;
3250 	case 4:
3251 		rdev->config.si.tile_config |= (2 << 0);
3252 		break;
3253 	case 8:
3254 	default:
3255 		/* XXX what about 12? */
3256 		rdev->config.si.tile_config |= (3 << 0);
3257 		break;
3258 	}
3259 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3260 	case 0: /* four banks */
3261 		rdev->config.si.tile_config |= 0 << 4;
3262 		break;
3263 	case 1: /* eight banks */
3264 		rdev->config.si.tile_config |= 1 << 4;
3265 		break;
3266 	case 2: /* sixteen banks */
3267 	default:
3268 		rdev->config.si.tile_config |= 2 << 4;
3269 		break;
3270 	}
3271 	rdev->config.si.tile_config |=
3272 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3273 	rdev->config.si.tile_config |=
3274 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3275 
3276 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3277 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3278 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3279 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3280 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3281 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3282 	if (rdev->has_uvd) {
3283 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3284 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3285 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3286 	}
3287 
3288 	si_tiling_mode_table_init(rdev);
3289 
3290 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3291 		    rdev->config.si.max_sh_per_se,
3292 		    rdev->config.si.max_backends_per_se);
3293 
3294 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3295 		     rdev->config.si.max_sh_per_se,
3296 		     rdev->config.si.max_cu_per_sh);
3297 
3298 	rdev->config.si.active_cus = 0;
3299 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3300 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3301 			rdev->config.si.active_cus +=
3302 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3303 		}
3304 	}
3305 
3306 	/* set HW defaults for 3D engine */
3307 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3308 				     ROQ_IB2_START(0x2b)));
3309 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3310 
3311 	sx_debug_1 = RREG32(SX_DEBUG_1);
3312 	WREG32(SX_DEBUG_1, sx_debug_1);
3313 
3314 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3315 
3316 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3317 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3318 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3319 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3320 
3321 	WREG32(VGT_NUM_INSTANCES, 1);
3322 
3323 	WREG32(CP_PERFMON_CNTL, 0);
3324 
3325 	WREG32(SQ_CONFIG, 0);
3326 
3327 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3328 					  FORCE_EOV_MAX_REZ_CNT(255)));
3329 
3330 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3331 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3332 
3333 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3334 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3335 
3336 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3337 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3338 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3339 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3340 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3341 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3342 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3343 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3344 
3345 	tmp = RREG32(HDP_MISC_CNTL);
3346 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3347 	WREG32(HDP_MISC_CNTL, tmp);
3348 
3349 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3350 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3351 
3352 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3353 
3354 	udelay(50);
3355 }
3356 
3357 /*
3358  * GPU scratch registers helpers function.
3359  */
3360 static void si_scratch_init(struct radeon_device *rdev)
3361 {
3362 	int i;
3363 
3364 	rdev->scratch.num_reg = 7;
3365 	rdev->scratch.reg_base = SCRATCH_REG0;
3366 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3367 		rdev->scratch.free[i] = true;
3368 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3369 	}
3370 }
3371 
3372 void si_fence_ring_emit(struct radeon_device *rdev,
3373 			struct radeon_fence *fence)
3374 {
3375 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3376 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3377 
3378 	/* flush read cache over gart */
3379 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3380 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3381 	radeon_ring_write(ring, 0);
3382 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3383 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3384 			  PACKET3_TC_ACTION_ENA |
3385 			  PACKET3_SH_KCACHE_ACTION_ENA |
3386 			  PACKET3_SH_ICACHE_ACTION_ENA);
3387 	radeon_ring_write(ring, 0xFFFFFFFF);
3388 	radeon_ring_write(ring, 0);
3389 	radeon_ring_write(ring, 10); /* poll interval */
3390 	/* EVENT_WRITE_EOP - flush caches, send int */
3391 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3392 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3393 	radeon_ring_write(ring, lower_32_bits(addr));
3394 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3395 	radeon_ring_write(ring, fence->seq);
3396 	radeon_ring_write(ring, 0);
3397 }
3398 
3399 /*
3400  * IB stuff
3401  */
3402 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3403 {
3404 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3405 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3406 	u32 header;
3407 
3408 	if (ib->is_const_ib) {
3409 		/* set switch buffer packet before const IB */
3410 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3411 		radeon_ring_write(ring, 0);
3412 
3413 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3414 	} else {
3415 		u32 next_rptr;
3416 		if (ring->rptr_save_reg) {
3417 			next_rptr = ring->wptr + 3 + 4 + 8;
3418 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3419 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3420 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3421 			radeon_ring_write(ring, next_rptr);
3422 		} else if (rdev->wb.enabled) {
3423 			next_rptr = ring->wptr + 5 + 4 + 8;
3424 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3425 			radeon_ring_write(ring, (1 << 8));
3426 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3427 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3428 			radeon_ring_write(ring, next_rptr);
3429 		}
3430 
3431 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3432 	}
3433 
3434 	radeon_ring_write(ring, header);
3435 	radeon_ring_write(ring,
3436 #ifdef __BIG_ENDIAN
3437 			  (2 << 0) |
3438 #endif
3439 			  (ib->gpu_addr & 0xFFFFFFFC));
3440 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3441 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3442 
3443 	if (!ib->is_const_ib) {
3444 		/* flush read cache over gart for this vmid */
3445 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3446 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3447 		radeon_ring_write(ring, vm_id);
3448 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3449 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3450 				  PACKET3_TC_ACTION_ENA |
3451 				  PACKET3_SH_KCACHE_ACTION_ENA |
3452 				  PACKET3_SH_ICACHE_ACTION_ENA);
3453 		radeon_ring_write(ring, 0xFFFFFFFF);
3454 		radeon_ring_write(ring, 0);
3455 		radeon_ring_write(ring, 10); /* poll interval */
3456 	}
3457 }
3458 
3459 /*
3460  * CP.
3461  */
3462 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3463 {
3464 	if (enable)
3465 		WREG32(CP_ME_CNTL, 0);
3466 	else {
3467 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3468 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3469 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3470 		WREG32(SCRATCH_UMSK, 0);
3471 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3472 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3473 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3474 	}
3475 	udelay(50);
3476 }
3477 
3478 static int si_cp_load_microcode(struct radeon_device *rdev)
3479 {
3480 	int i;
3481 
3482 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3483 		return -EINVAL;
3484 
3485 	si_cp_enable(rdev, false);
3486 
3487 	if (rdev->new_fw) {
3488 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3489 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3490 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3491 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3492 		const struct gfx_firmware_header_v1_0 *me_hdr =
3493 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3494 		const __le32 *fw_data;
3495 		u32 fw_size;
3496 
3497 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3498 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3499 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3500 
3501 		/* PFP */
3502 		fw_data = (const __le32 *)
3503 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3504 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3505 		WREG32(CP_PFP_UCODE_ADDR, 0);
3506 		for (i = 0; i < fw_size; i++)
3507 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3508 		WREG32(CP_PFP_UCODE_ADDR, 0);
3509 
3510 		/* CE */
3511 		fw_data = (const __le32 *)
3512 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3513 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3514 		WREG32(CP_CE_UCODE_ADDR, 0);
3515 		for (i = 0; i < fw_size; i++)
3516 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3517 		WREG32(CP_CE_UCODE_ADDR, 0);
3518 
3519 		/* ME */
3520 		fw_data = (const __be32 *)
3521 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3522 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3523 		WREG32(CP_ME_RAM_WADDR, 0);
3524 		for (i = 0; i < fw_size; i++)
3525 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3526 		WREG32(CP_ME_RAM_WADDR, 0);
3527 	} else {
3528 		const __be32 *fw_data;
3529 
3530 		/* PFP */
3531 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3532 		WREG32(CP_PFP_UCODE_ADDR, 0);
3533 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3534 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3535 		WREG32(CP_PFP_UCODE_ADDR, 0);
3536 
3537 		/* CE */
3538 		fw_data = (const __be32 *)rdev->ce_fw->data;
3539 		WREG32(CP_CE_UCODE_ADDR, 0);
3540 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3541 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3542 		WREG32(CP_CE_UCODE_ADDR, 0);
3543 
3544 		/* ME */
3545 		fw_data = (const __be32 *)rdev->me_fw->data;
3546 		WREG32(CP_ME_RAM_WADDR, 0);
3547 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3548 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3549 		WREG32(CP_ME_RAM_WADDR, 0);
3550 	}
3551 
3552 	WREG32(CP_PFP_UCODE_ADDR, 0);
3553 	WREG32(CP_CE_UCODE_ADDR, 0);
3554 	WREG32(CP_ME_RAM_WADDR, 0);
3555 	WREG32(CP_ME_RAM_RADDR, 0);
3556 	return 0;
3557 }
3558 
3559 static int si_cp_start(struct radeon_device *rdev)
3560 {
3561 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3562 	int r, i;
3563 
3564 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3565 	if (r) {
3566 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3567 		return r;
3568 	}
3569 	/* init the CP */
3570 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3571 	radeon_ring_write(ring, 0x1);
3572 	radeon_ring_write(ring, 0x0);
3573 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3574 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3575 	radeon_ring_write(ring, 0);
3576 	radeon_ring_write(ring, 0);
3577 
3578 	/* init the CE partitions */
3579 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3580 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3581 	radeon_ring_write(ring, 0xc000);
3582 	radeon_ring_write(ring, 0xe000);
3583 	radeon_ring_unlock_commit(rdev, ring, false);
3584 
3585 	si_cp_enable(rdev, true);
3586 
3587 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3588 	if (r) {
3589 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3590 		return r;
3591 	}
3592 
3593 	/* setup clear context state */
3594 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3595 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3596 
3597 	for (i = 0; i < si_default_size; i++)
3598 		radeon_ring_write(ring, si_default_state[i]);
3599 
3600 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3601 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3602 
3603 	/* set clear context state */
3604 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3605 	radeon_ring_write(ring, 0);
3606 
3607 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3608 	radeon_ring_write(ring, 0x00000316);
3609 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3610 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3611 
3612 	radeon_ring_unlock_commit(rdev, ring, false);
3613 
3614 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3615 		ring = &rdev->ring[i];
3616 		r = radeon_ring_lock(rdev, ring, 2);
3617 
3618 		/* clear the compute context state */
3619 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3620 		radeon_ring_write(ring, 0);
3621 
3622 		radeon_ring_unlock_commit(rdev, ring, false);
3623 	}
3624 
3625 	return 0;
3626 }
3627 
3628 static void si_cp_fini(struct radeon_device *rdev)
3629 {
3630 	struct radeon_ring *ring;
3631 	si_cp_enable(rdev, false);
3632 
3633 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3634 	radeon_ring_fini(rdev, ring);
3635 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3636 
3637 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3638 	radeon_ring_fini(rdev, ring);
3639 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3640 
3641 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3642 	radeon_ring_fini(rdev, ring);
3643 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3644 }
3645 
3646 static int si_cp_resume(struct radeon_device *rdev)
3647 {
3648 	struct radeon_ring *ring;
3649 	u32 tmp;
3650 	u32 rb_bufsz;
3651 	int r;
3652 
3653 	si_enable_gui_idle_interrupt(rdev, false);
3654 
3655 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3656 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3657 
3658 	/* Set the write pointer delay */
3659 	WREG32(CP_RB_WPTR_DELAY, 0);
3660 
3661 	WREG32(CP_DEBUG, 0);
3662 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3663 
3664 	/* ring 0 - compute and gfx */
3665 	/* Set ring buffer size */
3666 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3667 	rb_bufsz = order_base_2(ring->ring_size / 8);
3668 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3669 #ifdef __BIG_ENDIAN
3670 	tmp |= BUF_SWAP_32BIT;
3671 #endif
3672 	WREG32(CP_RB0_CNTL, tmp);
3673 
3674 	/* Initialize the ring buffer's read and write pointers */
3675 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3676 	ring->wptr = 0;
3677 	WREG32(CP_RB0_WPTR, ring->wptr);
3678 
3679 	/* set the wb address whether it's enabled or not */
3680 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3681 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3682 
3683 	if (rdev->wb.enabled)
3684 		WREG32(SCRATCH_UMSK, 0xff);
3685 	else {
3686 		tmp |= RB_NO_UPDATE;
3687 		WREG32(SCRATCH_UMSK, 0);
3688 	}
3689 
3690 	mdelay(1);
3691 	WREG32(CP_RB0_CNTL, tmp);
3692 
3693 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3694 
3695 	/* ring1  - compute only */
3696 	/* Set ring buffer size */
3697 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3698 	rb_bufsz = order_base_2(ring->ring_size / 8);
3699 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3700 #ifdef __BIG_ENDIAN
3701 	tmp |= BUF_SWAP_32BIT;
3702 #endif
3703 	WREG32(CP_RB1_CNTL, tmp);
3704 
3705 	/* Initialize the ring buffer's read and write pointers */
3706 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3707 	ring->wptr = 0;
3708 	WREG32(CP_RB1_WPTR, ring->wptr);
3709 
3710 	/* set the wb address whether it's enabled or not */
3711 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3712 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3713 
3714 	mdelay(1);
3715 	WREG32(CP_RB1_CNTL, tmp);
3716 
3717 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3718 
3719 	/* ring2 - compute only */
3720 	/* Set ring buffer size */
3721 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3722 	rb_bufsz = order_base_2(ring->ring_size / 8);
3723 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3724 #ifdef __BIG_ENDIAN
3725 	tmp |= BUF_SWAP_32BIT;
3726 #endif
3727 	WREG32(CP_RB2_CNTL, tmp);
3728 
3729 	/* Initialize the ring buffer's read and write pointers */
3730 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3731 	ring->wptr = 0;
3732 	WREG32(CP_RB2_WPTR, ring->wptr);
3733 
3734 	/* set the wb address whether it's enabled or not */
3735 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3736 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3737 
3738 	mdelay(1);
3739 	WREG32(CP_RB2_CNTL, tmp);
3740 
3741 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3742 
3743 	/* start the rings */
3744 	si_cp_start(rdev);
3745 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3746 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3747 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3748 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3749 	if (r) {
3750 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3751 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3752 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3753 		return r;
3754 	}
3755 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3756 	if (r) {
3757 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3758 	}
3759 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3760 	if (r) {
3761 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3762 	}
3763 
3764 	si_enable_gui_idle_interrupt(rdev, true);
3765 
3766 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3767 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3768 
3769 	return 0;
3770 }
3771 
3772 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3773 {
3774 	u32 reset_mask = 0;
3775 	u32 tmp;
3776 
3777 	/* GRBM_STATUS */
3778 	tmp = RREG32(GRBM_STATUS);
3779 	if (tmp & (PA_BUSY | SC_BUSY |
3780 		   BCI_BUSY | SX_BUSY |
3781 		   TA_BUSY | VGT_BUSY |
3782 		   DB_BUSY | CB_BUSY |
3783 		   GDS_BUSY | SPI_BUSY |
3784 		   IA_BUSY | IA_BUSY_NO_DMA))
3785 		reset_mask |= RADEON_RESET_GFX;
3786 
3787 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3788 		   CP_BUSY | CP_COHERENCY_BUSY))
3789 		reset_mask |= RADEON_RESET_CP;
3790 
3791 	if (tmp & GRBM_EE_BUSY)
3792 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3793 
3794 	/* GRBM_STATUS2 */
3795 	tmp = RREG32(GRBM_STATUS2);
3796 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3797 		reset_mask |= RADEON_RESET_RLC;
3798 
3799 	/* DMA_STATUS_REG 0 */
3800 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3801 	if (!(tmp & DMA_IDLE))
3802 		reset_mask |= RADEON_RESET_DMA;
3803 
3804 	/* DMA_STATUS_REG 1 */
3805 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3806 	if (!(tmp & DMA_IDLE))
3807 		reset_mask |= RADEON_RESET_DMA1;
3808 
3809 	/* SRBM_STATUS2 */
3810 	tmp = RREG32(SRBM_STATUS2);
3811 	if (tmp & DMA_BUSY)
3812 		reset_mask |= RADEON_RESET_DMA;
3813 
3814 	if (tmp & DMA1_BUSY)
3815 		reset_mask |= RADEON_RESET_DMA1;
3816 
3817 	/* SRBM_STATUS */
3818 	tmp = RREG32(SRBM_STATUS);
3819 
3820 	if (tmp & IH_BUSY)
3821 		reset_mask |= RADEON_RESET_IH;
3822 
3823 	if (tmp & SEM_BUSY)
3824 		reset_mask |= RADEON_RESET_SEM;
3825 
3826 	if (tmp & GRBM_RQ_PENDING)
3827 		reset_mask |= RADEON_RESET_GRBM;
3828 
3829 	if (tmp & VMC_BUSY)
3830 		reset_mask |= RADEON_RESET_VMC;
3831 
3832 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3833 		   MCC_BUSY | MCD_BUSY))
3834 		reset_mask |= RADEON_RESET_MC;
3835 
3836 	if (evergreen_is_display_hung(rdev))
3837 		reset_mask |= RADEON_RESET_DISPLAY;
3838 
3839 	/* VM_L2_STATUS */
3840 	tmp = RREG32(VM_L2_STATUS);
3841 	if (tmp & L2_BUSY)
3842 		reset_mask |= RADEON_RESET_VMC;
3843 
3844 	/* Skip MC reset as it's mostly likely not hung, just busy */
3845 	if (reset_mask & RADEON_RESET_MC) {
3846 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3847 		reset_mask &= ~RADEON_RESET_MC;
3848 	}
3849 
3850 	return reset_mask;
3851 }
3852 
3853 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3854 {
3855 	struct evergreen_mc_save save;
3856 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3857 	u32 tmp;
3858 
3859 	if (reset_mask == 0)
3860 		return;
3861 
3862 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3863 
3864 	evergreen_print_gpu_status_regs(rdev);
3865 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3866 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3867 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3868 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3869 
3870 	/* disable PG/CG */
3871 	si_fini_pg(rdev);
3872 	si_fini_cg(rdev);
3873 
3874 	/* stop the rlc */
3875 	si_rlc_stop(rdev);
3876 
3877 	/* Disable CP parsing/prefetching */
3878 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3879 
3880 	if (reset_mask & RADEON_RESET_DMA) {
3881 		/* dma0 */
3882 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3883 		tmp &= ~DMA_RB_ENABLE;
3884 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3885 	}
3886 	if (reset_mask & RADEON_RESET_DMA1) {
3887 		/* dma1 */
3888 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3889 		tmp &= ~DMA_RB_ENABLE;
3890 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3891 	}
3892 
3893 	udelay(50);
3894 
3895 	evergreen_mc_stop(rdev, &save);
3896 	if (evergreen_mc_wait_for_idle(rdev)) {
3897 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3898 	}
3899 
3900 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3901 		grbm_soft_reset = SOFT_RESET_CB |
3902 			SOFT_RESET_DB |
3903 			SOFT_RESET_GDS |
3904 			SOFT_RESET_PA |
3905 			SOFT_RESET_SC |
3906 			SOFT_RESET_BCI |
3907 			SOFT_RESET_SPI |
3908 			SOFT_RESET_SX |
3909 			SOFT_RESET_TC |
3910 			SOFT_RESET_TA |
3911 			SOFT_RESET_VGT |
3912 			SOFT_RESET_IA;
3913 	}
3914 
3915 	if (reset_mask & RADEON_RESET_CP) {
3916 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3917 
3918 		srbm_soft_reset |= SOFT_RESET_GRBM;
3919 	}
3920 
3921 	if (reset_mask & RADEON_RESET_DMA)
3922 		srbm_soft_reset |= SOFT_RESET_DMA;
3923 
3924 	if (reset_mask & RADEON_RESET_DMA1)
3925 		srbm_soft_reset |= SOFT_RESET_DMA1;
3926 
3927 	if (reset_mask & RADEON_RESET_DISPLAY)
3928 		srbm_soft_reset |= SOFT_RESET_DC;
3929 
3930 	if (reset_mask & RADEON_RESET_RLC)
3931 		grbm_soft_reset |= SOFT_RESET_RLC;
3932 
3933 	if (reset_mask & RADEON_RESET_SEM)
3934 		srbm_soft_reset |= SOFT_RESET_SEM;
3935 
3936 	if (reset_mask & RADEON_RESET_IH)
3937 		srbm_soft_reset |= SOFT_RESET_IH;
3938 
3939 	if (reset_mask & RADEON_RESET_GRBM)
3940 		srbm_soft_reset |= SOFT_RESET_GRBM;
3941 
3942 	if (reset_mask & RADEON_RESET_VMC)
3943 		srbm_soft_reset |= SOFT_RESET_VMC;
3944 
3945 	if (reset_mask & RADEON_RESET_MC)
3946 		srbm_soft_reset |= SOFT_RESET_MC;
3947 
3948 	if (grbm_soft_reset) {
3949 		tmp = RREG32(GRBM_SOFT_RESET);
3950 		tmp |= grbm_soft_reset;
3951 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3952 		WREG32(GRBM_SOFT_RESET, tmp);
3953 		tmp = RREG32(GRBM_SOFT_RESET);
3954 
3955 		udelay(50);
3956 
3957 		tmp &= ~grbm_soft_reset;
3958 		WREG32(GRBM_SOFT_RESET, tmp);
3959 		tmp = RREG32(GRBM_SOFT_RESET);
3960 	}
3961 
3962 	if (srbm_soft_reset) {
3963 		tmp = RREG32(SRBM_SOFT_RESET);
3964 		tmp |= srbm_soft_reset;
3965 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3966 		WREG32(SRBM_SOFT_RESET, tmp);
3967 		tmp = RREG32(SRBM_SOFT_RESET);
3968 
3969 		udelay(50);
3970 
3971 		tmp &= ~srbm_soft_reset;
3972 		WREG32(SRBM_SOFT_RESET, tmp);
3973 		tmp = RREG32(SRBM_SOFT_RESET);
3974 	}
3975 
3976 	/* Wait a little for things to settle down */
3977 	udelay(50);
3978 
3979 	evergreen_mc_resume(rdev, &save);
3980 	udelay(50);
3981 
3982 	evergreen_print_gpu_status_regs(rdev);
3983 }
3984 
3985 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3986 {
3987 	u32 tmp, i;
3988 
3989 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3990 	tmp |= SPLL_BYPASS_EN;
3991 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3992 
3993 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3994 	tmp |= SPLL_CTLREQ_CHG;
3995 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3996 
3997 	for (i = 0; i < rdev->usec_timeout; i++) {
3998 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3999 			break;
4000 		udelay(1);
4001 	}
4002 
4003 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4004 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4005 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4006 
4007 	tmp = RREG32(MPLL_CNTL_MODE);
4008 	tmp &= ~MPLL_MCLK_SEL;
4009 	WREG32(MPLL_CNTL_MODE, tmp);
4010 }
4011 
4012 static void si_spll_powerdown(struct radeon_device *rdev)
4013 {
4014 	u32 tmp;
4015 
4016 	tmp = RREG32(SPLL_CNTL_MODE);
4017 	tmp |= SPLL_SW_DIR_CONTROL;
4018 	WREG32(SPLL_CNTL_MODE, tmp);
4019 
4020 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4021 	tmp |= SPLL_RESET;
4022 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4023 
4024 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4025 	tmp |= SPLL_SLEEP;
4026 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4027 
4028 	tmp = RREG32(SPLL_CNTL_MODE);
4029 	tmp &= ~SPLL_SW_DIR_CONTROL;
4030 	WREG32(SPLL_CNTL_MODE, tmp);
4031 }
4032 
4033 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4034 {
4035 	struct evergreen_mc_save save;
4036 	u32 tmp, i;
4037 
4038 	dev_info(rdev->dev, "GPU pci config reset\n");
4039 
4040 	/* disable dpm? */
4041 
4042 	/* disable cg/pg */
4043 	si_fini_pg(rdev);
4044 	si_fini_cg(rdev);
4045 
4046 	/* Disable CP parsing/prefetching */
4047 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4048 	/* dma0 */
4049 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4050 	tmp &= ~DMA_RB_ENABLE;
4051 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4052 	/* dma1 */
4053 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4054 	tmp &= ~DMA_RB_ENABLE;
4055 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4056 	/* XXX other engines? */
4057 
4058 	/* halt the rlc, disable cp internal ints */
4059 	si_rlc_stop(rdev);
4060 
4061 	udelay(50);
4062 
4063 	/* disable mem access */
4064 	evergreen_mc_stop(rdev, &save);
4065 	if (evergreen_mc_wait_for_idle(rdev)) {
4066 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4067 	}
4068 
4069 	/* set mclk/sclk to bypass */
4070 	si_set_clk_bypass_mode(rdev);
4071 	/* powerdown spll */
4072 	si_spll_powerdown(rdev);
4073 	/* disable BM */
4074 	pci_clear_master(rdev->pdev);
4075 	/* reset */
4076 	radeon_pci_config_reset(rdev);
4077 	/* wait for asic to come out of reset */
4078 	for (i = 0; i < rdev->usec_timeout; i++) {
4079 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4080 			break;
4081 		udelay(1);
4082 	}
4083 }
4084 
4085 int si_asic_reset(struct radeon_device *rdev, bool hard)
4086 {
4087 	u32 reset_mask;
4088 
4089 	if (hard) {
4090 		si_gpu_pci_config_reset(rdev);
4091 		return 0;
4092 	}
4093 
4094 	reset_mask = si_gpu_check_soft_reset(rdev);
4095 
4096 	if (reset_mask)
4097 		r600_set_bios_scratch_engine_hung(rdev, true);
4098 
4099 	/* try soft reset */
4100 	si_gpu_soft_reset(rdev, reset_mask);
4101 
4102 	reset_mask = si_gpu_check_soft_reset(rdev);
4103 
4104 	/* try pci config reset */
4105 	if (reset_mask && radeon_hard_reset)
4106 		si_gpu_pci_config_reset(rdev);
4107 
4108 	reset_mask = si_gpu_check_soft_reset(rdev);
4109 
4110 	if (!reset_mask)
4111 		r600_set_bios_scratch_engine_hung(rdev, false);
4112 
4113 	return 0;
4114 }
4115 
4116 /**
4117  * si_gfx_is_lockup - Check if the GFX engine is locked up
4118  *
4119  * @rdev: radeon_device pointer
4120  * @ring: radeon_ring structure holding ring information
4121  *
4122  * Check if the GFX engine is locked up.
4123  * Returns true if the engine appears to be locked up, false if not.
4124  */
4125 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4126 {
4127 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4128 
4129 	if (!(reset_mask & (RADEON_RESET_GFX |
4130 			    RADEON_RESET_COMPUTE |
4131 			    RADEON_RESET_CP))) {
4132 		radeon_ring_lockup_update(rdev, ring);
4133 		return false;
4134 	}
4135 	return radeon_ring_test_lockup(rdev, ring);
4136 }
4137 
4138 /* MC */
4139 static void si_mc_program(struct radeon_device *rdev)
4140 {
4141 	struct evergreen_mc_save save;
4142 	u32 tmp;
4143 	int i, j;
4144 
4145 	/* Initialize HDP */
4146 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4147 		WREG32((0x2c14 + j), 0x00000000);
4148 		WREG32((0x2c18 + j), 0x00000000);
4149 		WREG32((0x2c1c + j), 0x00000000);
4150 		WREG32((0x2c20 + j), 0x00000000);
4151 		WREG32((0x2c24 + j), 0x00000000);
4152 	}
4153 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4154 
4155 	evergreen_mc_stop(rdev, &save);
4156 	if (radeon_mc_wait_for_idle(rdev)) {
4157 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4158 	}
4159 	if (!ASIC_IS_NODCE(rdev))
4160 		/* Lockout access through VGA aperture*/
4161 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4162 	/* Update configuration */
4163 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4164 	       rdev->mc.vram_start >> 12);
4165 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4166 	       rdev->mc.vram_end >> 12);
4167 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4168 	       rdev->vram_scratch.gpu_addr >> 12);
4169 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4170 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4171 	WREG32(MC_VM_FB_LOCATION, tmp);
4172 	/* XXX double check these! */
4173 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4174 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4175 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4176 	WREG32(MC_VM_AGP_BASE, 0);
4177 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4178 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4179 	if (radeon_mc_wait_for_idle(rdev)) {
4180 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4181 	}
4182 	evergreen_mc_resume(rdev, &save);
4183 	if (!ASIC_IS_NODCE(rdev)) {
4184 		/* we need to own VRAM, so turn off the VGA renderer here
4185 		 * to stop it overwriting our objects */
4186 		rv515_vga_render_disable(rdev);
4187 	}
4188 }
4189 
4190 void si_vram_gtt_location(struct radeon_device *rdev,
4191 			  struct radeon_mc *mc)
4192 {
4193 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4194 		/* leave room for at least 1024M GTT */
4195 		dev_warn(rdev->dev, "limiting VRAM\n");
4196 		mc->real_vram_size = 0xFFC0000000ULL;
4197 		mc->mc_vram_size = 0xFFC0000000ULL;
4198 	}
4199 	radeon_vram_location(rdev, &rdev->mc, 0);
4200 	rdev->mc.gtt_base_align = 0;
4201 	radeon_gtt_location(rdev, mc);
4202 }
4203 
4204 static int si_mc_init(struct radeon_device *rdev)
4205 {
4206 	u32 tmp;
4207 	int chansize, numchan;
4208 
4209 	/* Get VRAM informations */
4210 	rdev->mc.vram_is_ddr = true;
4211 	tmp = RREG32(MC_ARB_RAMCFG);
4212 	if (tmp & CHANSIZE_OVERRIDE) {
4213 		chansize = 16;
4214 	} else if (tmp & CHANSIZE_MASK) {
4215 		chansize = 64;
4216 	} else {
4217 		chansize = 32;
4218 	}
4219 	tmp = RREG32(MC_SHARED_CHMAP);
4220 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4221 	case 0:
4222 	default:
4223 		numchan = 1;
4224 		break;
4225 	case 1:
4226 		numchan = 2;
4227 		break;
4228 	case 2:
4229 		numchan = 4;
4230 		break;
4231 	case 3:
4232 		numchan = 8;
4233 		break;
4234 	case 4:
4235 		numchan = 3;
4236 		break;
4237 	case 5:
4238 		numchan = 6;
4239 		break;
4240 	case 6:
4241 		numchan = 10;
4242 		break;
4243 	case 7:
4244 		numchan = 12;
4245 		break;
4246 	case 8:
4247 		numchan = 16;
4248 		break;
4249 	}
4250 	rdev->mc.vram_width = numchan * chansize;
4251 	/* Could aper size report 0 ? */
4252 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4253 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4254 	/* size in MB on si */
4255 	tmp = RREG32(CONFIG_MEMSIZE);
4256 	/* some boards may have garbage in the upper 16 bits */
4257 	if (tmp & 0xffff0000) {
4258 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4259 		if (tmp & 0xffff)
4260 			tmp &= 0xffff;
4261 	}
4262 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4263 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4264 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4265 	si_vram_gtt_location(rdev, &rdev->mc);
4266 	radeon_update_bandwidth_info(rdev);
4267 
4268 	return 0;
4269 }
4270 
4271 /*
4272  * GART
4273  */
4274 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4275 {
4276 	/* flush hdp cache */
4277 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4278 
4279 	/* bits 0-15 are the VM contexts0-15 */
4280 	WREG32(VM_INVALIDATE_REQUEST, 1);
4281 }
4282 
4283 static int si_pcie_gart_enable(struct radeon_device *rdev)
4284 {
4285 	int r, i;
4286 
4287 	if (rdev->gart.robj == NULL) {
4288 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4289 		return -EINVAL;
4290 	}
4291 	r = radeon_gart_table_vram_pin(rdev);
4292 	if (r)
4293 		return r;
4294 	/* Setup TLB control */
4295 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4296 	       (0xA << 7) |
4297 	       ENABLE_L1_TLB |
4298 	       ENABLE_L1_FRAGMENT_PROCESSING |
4299 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4300 	       ENABLE_ADVANCED_DRIVER_MODEL |
4301 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4302 	/* Setup L2 cache */
4303 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4304 	       ENABLE_L2_FRAGMENT_PROCESSING |
4305 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4306 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4307 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4308 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4309 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4310 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4311 	       BANK_SELECT(4) |
4312 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4313 	/* setup context0 */
4314 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4315 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4316 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4317 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4318 			(u32)(rdev->dummy_page.addr >> 12));
4319 	WREG32(VM_CONTEXT0_CNTL2, 0);
4320 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4321 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4322 
4323 	WREG32(0x15D4, 0);
4324 	WREG32(0x15D8, 0);
4325 	WREG32(0x15DC, 0);
4326 
4327 	/* empty context1-15 */
4328 	/* set vm size, must be a multiple of 4 */
4329 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4330 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4331 	/* Assign the pt base to something valid for now; the pts used for
4332 	 * the VMs are determined by the application and setup and assigned
4333 	 * on the fly in the vm part of radeon_gart.c
4334 	 */
4335 	for (i = 1; i < 16; i++) {
4336 		if (i < 8)
4337 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4338 			       rdev->vm_manager.saved_table_addr[i]);
4339 		else
4340 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4341 			       rdev->vm_manager.saved_table_addr[i]);
4342 	}
4343 
4344 	/* enable context1-15 */
4345 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4346 	       (u32)(rdev->dummy_page.addr >> 12));
4347 	WREG32(VM_CONTEXT1_CNTL2, 4);
4348 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4349 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4350 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4351 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4352 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4353 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4354 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4355 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4356 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4357 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4358 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4359 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4360 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4361 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4362 
4363 	si_pcie_gart_tlb_flush(rdev);
4364 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4365 		 (unsigned)(rdev->mc.gtt_size >> 20),
4366 		 (unsigned long long)rdev->gart.table_addr);
4367 	rdev->gart.ready = true;
4368 	return 0;
4369 }
4370 
4371 static void si_pcie_gart_disable(struct radeon_device *rdev)
4372 {
4373 	unsigned i;
4374 
4375 	for (i = 1; i < 16; ++i) {
4376 		uint32_t reg;
4377 		if (i < 8)
4378 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4379 		else
4380 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4381 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4382 	}
4383 
4384 	/* Disable all tables */
4385 	WREG32(VM_CONTEXT0_CNTL, 0);
4386 	WREG32(VM_CONTEXT1_CNTL, 0);
4387 	/* Setup TLB control */
4388 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4389 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4390 	/* Setup L2 cache */
4391 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4392 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4393 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4394 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4395 	WREG32(VM_L2_CNTL2, 0);
4396 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4397 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4398 	radeon_gart_table_vram_unpin(rdev);
4399 }
4400 
4401 static void si_pcie_gart_fini(struct radeon_device *rdev)
4402 {
4403 	si_pcie_gart_disable(rdev);
4404 	radeon_gart_table_vram_free(rdev);
4405 	radeon_gart_fini(rdev);
4406 }
4407 
4408 /* vm parser */
4409 static bool si_vm_reg_valid(u32 reg)
4410 {
4411 	/* context regs are fine */
4412 	if (reg >= 0x28000)
4413 		return true;
4414 
4415 	/* shader regs are also fine */
4416 	if (reg >= 0xB000 && reg < 0xC000)
4417 		return true;
4418 
4419 	/* check config regs */
4420 	switch (reg) {
4421 	case GRBM_GFX_INDEX:
4422 	case CP_STRMOUT_CNTL:
4423 	case VGT_VTX_VECT_EJECT_REG:
4424 	case VGT_CACHE_INVALIDATION:
4425 	case VGT_ESGS_RING_SIZE:
4426 	case VGT_GSVS_RING_SIZE:
4427 	case VGT_GS_VERTEX_REUSE:
4428 	case VGT_PRIMITIVE_TYPE:
4429 	case VGT_INDEX_TYPE:
4430 	case VGT_NUM_INDICES:
4431 	case VGT_NUM_INSTANCES:
4432 	case VGT_TF_RING_SIZE:
4433 	case VGT_HS_OFFCHIP_PARAM:
4434 	case VGT_TF_MEMORY_BASE:
4435 	case PA_CL_ENHANCE:
4436 	case PA_SU_LINE_STIPPLE_VALUE:
4437 	case PA_SC_LINE_STIPPLE_STATE:
4438 	case PA_SC_ENHANCE:
4439 	case SQC_CACHES:
4440 	case SPI_STATIC_THREAD_MGMT_1:
4441 	case SPI_STATIC_THREAD_MGMT_2:
4442 	case SPI_STATIC_THREAD_MGMT_3:
4443 	case SPI_PS_MAX_WAVE_ID:
4444 	case SPI_CONFIG_CNTL:
4445 	case SPI_CONFIG_CNTL_1:
4446 	case TA_CNTL_AUX:
4447 	case TA_CS_BC_BASE_ADDR:
4448 		return true;
4449 	default:
4450 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4451 		return false;
4452 	}
4453 }
4454 
4455 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4456 				  u32 *ib, struct radeon_cs_packet *pkt)
4457 {
4458 	switch (pkt->opcode) {
4459 	case PACKET3_NOP:
4460 	case PACKET3_SET_BASE:
4461 	case PACKET3_SET_CE_DE_COUNTERS:
4462 	case PACKET3_LOAD_CONST_RAM:
4463 	case PACKET3_WRITE_CONST_RAM:
4464 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4465 	case PACKET3_DUMP_CONST_RAM:
4466 	case PACKET3_INCREMENT_CE_COUNTER:
4467 	case PACKET3_WAIT_ON_DE_COUNTER:
4468 	case PACKET3_CE_WRITE:
4469 		break;
4470 	default:
4471 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4472 		return -EINVAL;
4473 	}
4474 	return 0;
4475 }
4476 
4477 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4478 {
4479 	u32 start_reg, reg, i;
4480 	u32 command = ib[idx + 4];
4481 	u32 info = ib[idx + 1];
4482 	u32 idx_value = ib[idx];
4483 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4484 		/* src address space is register */
4485 		if (((info & 0x60000000) >> 29) == 0) {
4486 			start_reg = idx_value << 2;
4487 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4488 				reg = start_reg;
4489 				if (!si_vm_reg_valid(reg)) {
4490 					DRM_ERROR("CP DMA Bad SRC register\n");
4491 					return -EINVAL;
4492 				}
4493 			} else {
4494 				for (i = 0; i < (command & 0x1fffff); i++) {
4495 					reg = start_reg + (4 * i);
4496 					if (!si_vm_reg_valid(reg)) {
4497 						DRM_ERROR("CP DMA Bad SRC register\n");
4498 						return -EINVAL;
4499 					}
4500 				}
4501 			}
4502 		}
4503 	}
4504 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4505 		/* dst address space is register */
4506 		if (((info & 0x00300000) >> 20) == 0) {
4507 			start_reg = ib[idx + 2];
4508 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4509 				reg = start_reg;
4510 				if (!si_vm_reg_valid(reg)) {
4511 					DRM_ERROR("CP DMA Bad DST register\n");
4512 					return -EINVAL;
4513 				}
4514 			} else {
4515 				for (i = 0; i < (command & 0x1fffff); i++) {
4516 					reg = start_reg + (4 * i);
4517 				if (!si_vm_reg_valid(reg)) {
4518 						DRM_ERROR("CP DMA Bad DST register\n");
4519 						return -EINVAL;
4520 					}
4521 				}
4522 			}
4523 		}
4524 	}
4525 	return 0;
4526 }
4527 
4528 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4529 				   u32 *ib, struct radeon_cs_packet *pkt)
4530 {
4531 	int r;
4532 	u32 idx = pkt->idx + 1;
4533 	u32 idx_value = ib[idx];
4534 	u32 start_reg, end_reg, reg, i;
4535 
4536 	switch (pkt->opcode) {
4537 	case PACKET3_NOP:
4538 	case PACKET3_SET_BASE:
4539 	case PACKET3_CLEAR_STATE:
4540 	case PACKET3_INDEX_BUFFER_SIZE:
4541 	case PACKET3_DISPATCH_DIRECT:
4542 	case PACKET3_DISPATCH_INDIRECT:
4543 	case PACKET3_ALLOC_GDS:
4544 	case PACKET3_WRITE_GDS_RAM:
4545 	case PACKET3_ATOMIC_GDS:
4546 	case PACKET3_ATOMIC:
4547 	case PACKET3_OCCLUSION_QUERY:
4548 	case PACKET3_SET_PREDICATION:
4549 	case PACKET3_COND_EXEC:
4550 	case PACKET3_PRED_EXEC:
4551 	case PACKET3_DRAW_INDIRECT:
4552 	case PACKET3_DRAW_INDEX_INDIRECT:
4553 	case PACKET3_INDEX_BASE:
4554 	case PACKET3_DRAW_INDEX_2:
4555 	case PACKET3_CONTEXT_CONTROL:
4556 	case PACKET3_INDEX_TYPE:
4557 	case PACKET3_DRAW_INDIRECT_MULTI:
4558 	case PACKET3_DRAW_INDEX_AUTO:
4559 	case PACKET3_DRAW_INDEX_IMMD:
4560 	case PACKET3_NUM_INSTANCES:
4561 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4562 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4563 	case PACKET3_DRAW_INDEX_OFFSET_2:
4564 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4565 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4566 	case PACKET3_MPEG_INDEX:
4567 	case PACKET3_WAIT_REG_MEM:
4568 	case PACKET3_MEM_WRITE:
4569 	case PACKET3_PFP_SYNC_ME:
4570 	case PACKET3_SURFACE_SYNC:
4571 	case PACKET3_EVENT_WRITE:
4572 	case PACKET3_EVENT_WRITE_EOP:
4573 	case PACKET3_EVENT_WRITE_EOS:
4574 	case PACKET3_SET_CONTEXT_REG:
4575 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4576 	case PACKET3_SET_SH_REG:
4577 	case PACKET3_SET_SH_REG_OFFSET:
4578 	case PACKET3_INCREMENT_DE_COUNTER:
4579 	case PACKET3_WAIT_ON_CE_COUNTER:
4580 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4581 	case PACKET3_ME_WRITE:
4582 		break;
4583 	case PACKET3_COPY_DATA:
4584 		if ((idx_value & 0xf00) == 0) {
4585 			reg = ib[idx + 3] * 4;
4586 			if (!si_vm_reg_valid(reg))
4587 				return -EINVAL;
4588 		}
4589 		break;
4590 	case PACKET3_WRITE_DATA:
4591 		if ((idx_value & 0xf00) == 0) {
4592 			start_reg = ib[idx + 1] * 4;
4593 			if (idx_value & 0x10000) {
4594 				if (!si_vm_reg_valid(start_reg))
4595 					return -EINVAL;
4596 			} else {
4597 				for (i = 0; i < (pkt->count - 2); i++) {
4598 					reg = start_reg + (4 * i);
4599 					if (!si_vm_reg_valid(reg))
4600 						return -EINVAL;
4601 				}
4602 			}
4603 		}
4604 		break;
4605 	case PACKET3_COND_WRITE:
4606 		if (idx_value & 0x100) {
4607 			reg = ib[idx + 5] * 4;
4608 			if (!si_vm_reg_valid(reg))
4609 				return -EINVAL;
4610 		}
4611 		break;
4612 	case PACKET3_COPY_DW:
4613 		if (idx_value & 0x2) {
4614 			reg = ib[idx + 3] * 4;
4615 			if (!si_vm_reg_valid(reg))
4616 				return -EINVAL;
4617 		}
4618 		break;
4619 	case PACKET3_SET_CONFIG_REG:
4620 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4621 		end_reg = 4 * pkt->count + start_reg - 4;
4622 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4623 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4624 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4625 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4626 			return -EINVAL;
4627 		}
4628 		for (i = 0; i < pkt->count; i++) {
4629 			reg = start_reg + (4 * i);
4630 			if (!si_vm_reg_valid(reg))
4631 				return -EINVAL;
4632 		}
4633 		break;
4634 	case PACKET3_CP_DMA:
4635 		r = si_vm_packet3_cp_dma_check(ib, idx);
4636 		if (r)
4637 			return r;
4638 		break;
4639 	default:
4640 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4641 		return -EINVAL;
4642 	}
4643 	return 0;
4644 }
4645 
4646 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4647 				       u32 *ib, struct radeon_cs_packet *pkt)
4648 {
4649 	int r;
4650 	u32 idx = pkt->idx + 1;
4651 	u32 idx_value = ib[idx];
4652 	u32 start_reg, reg, i;
4653 
4654 	switch (pkt->opcode) {
4655 	case PACKET3_NOP:
4656 	case PACKET3_SET_BASE:
4657 	case PACKET3_CLEAR_STATE:
4658 	case PACKET3_DISPATCH_DIRECT:
4659 	case PACKET3_DISPATCH_INDIRECT:
4660 	case PACKET3_ALLOC_GDS:
4661 	case PACKET3_WRITE_GDS_RAM:
4662 	case PACKET3_ATOMIC_GDS:
4663 	case PACKET3_ATOMIC:
4664 	case PACKET3_OCCLUSION_QUERY:
4665 	case PACKET3_SET_PREDICATION:
4666 	case PACKET3_COND_EXEC:
4667 	case PACKET3_PRED_EXEC:
4668 	case PACKET3_CONTEXT_CONTROL:
4669 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4670 	case PACKET3_WAIT_REG_MEM:
4671 	case PACKET3_MEM_WRITE:
4672 	case PACKET3_PFP_SYNC_ME:
4673 	case PACKET3_SURFACE_SYNC:
4674 	case PACKET3_EVENT_WRITE:
4675 	case PACKET3_EVENT_WRITE_EOP:
4676 	case PACKET3_EVENT_WRITE_EOS:
4677 	case PACKET3_SET_CONTEXT_REG:
4678 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4679 	case PACKET3_SET_SH_REG:
4680 	case PACKET3_SET_SH_REG_OFFSET:
4681 	case PACKET3_INCREMENT_DE_COUNTER:
4682 	case PACKET3_WAIT_ON_CE_COUNTER:
4683 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4684 	case PACKET3_ME_WRITE:
4685 		break;
4686 	case PACKET3_COPY_DATA:
4687 		if ((idx_value & 0xf00) == 0) {
4688 			reg = ib[idx + 3] * 4;
4689 			if (!si_vm_reg_valid(reg))
4690 				return -EINVAL;
4691 		}
4692 		break;
4693 	case PACKET3_WRITE_DATA:
4694 		if ((idx_value & 0xf00) == 0) {
4695 			start_reg = ib[idx + 1] * 4;
4696 			if (idx_value & 0x10000) {
4697 				if (!si_vm_reg_valid(start_reg))
4698 					return -EINVAL;
4699 			} else {
4700 				for (i = 0; i < (pkt->count - 2); i++) {
4701 					reg = start_reg + (4 * i);
4702 					if (!si_vm_reg_valid(reg))
4703 						return -EINVAL;
4704 				}
4705 			}
4706 		}
4707 		break;
4708 	case PACKET3_COND_WRITE:
4709 		if (idx_value & 0x100) {
4710 			reg = ib[idx + 5] * 4;
4711 			if (!si_vm_reg_valid(reg))
4712 				return -EINVAL;
4713 		}
4714 		break;
4715 	case PACKET3_COPY_DW:
4716 		if (idx_value & 0x2) {
4717 			reg = ib[idx + 3] * 4;
4718 			if (!si_vm_reg_valid(reg))
4719 				return -EINVAL;
4720 		}
4721 		break;
4722 	case PACKET3_CP_DMA:
4723 		r = si_vm_packet3_cp_dma_check(ib, idx);
4724 		if (r)
4725 			return r;
4726 		break;
4727 	default:
4728 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4729 		return -EINVAL;
4730 	}
4731 	return 0;
4732 }
4733 
4734 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4735 {
4736 	int ret = 0;
4737 	u32 idx = 0, i;
4738 	struct radeon_cs_packet pkt;
4739 
4740 	do {
4741 		pkt.idx = idx;
4742 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4743 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4744 		pkt.one_reg_wr = 0;
4745 		switch (pkt.type) {
4746 		case RADEON_PACKET_TYPE0:
4747 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4748 			ret = -EINVAL;
4749 			break;
4750 		case RADEON_PACKET_TYPE2:
4751 			idx += 1;
4752 			break;
4753 		case RADEON_PACKET_TYPE3:
4754 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4755 			if (ib->is_const_ib)
4756 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4757 			else {
4758 				switch (ib->ring) {
4759 				case RADEON_RING_TYPE_GFX_INDEX:
4760 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4761 					break;
4762 				case CAYMAN_RING_TYPE_CP1_INDEX:
4763 				case CAYMAN_RING_TYPE_CP2_INDEX:
4764 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4765 					break;
4766 				default:
4767 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4768 					ret = -EINVAL;
4769 					break;
4770 				}
4771 			}
4772 			idx += pkt.count + 2;
4773 			break;
4774 		default:
4775 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4776 			ret = -EINVAL;
4777 			break;
4778 		}
4779 		if (ret) {
4780 			for (i = 0; i < ib->length_dw; i++) {
4781 				if (i == idx)
4782 					printk("\t0x%08x <---\n", ib->ptr[i]);
4783 				else
4784 					printk("\t0x%08x\n", ib->ptr[i]);
4785 			}
4786 			break;
4787 		}
4788 	} while (idx < ib->length_dw);
4789 
4790 	return ret;
4791 }
4792 
4793 /*
4794  * vm
4795  */
4796 int si_vm_init(struct radeon_device *rdev)
4797 {
4798 	/* number of VMs */
4799 	rdev->vm_manager.nvm = 16;
4800 	/* base offset of vram pages */
4801 	rdev->vm_manager.vram_base_offset = 0;
4802 
4803 	return 0;
4804 }
4805 
4806 void si_vm_fini(struct radeon_device *rdev)
4807 {
4808 }
4809 
4810 /**
4811  * si_vm_decode_fault - print human readable fault info
4812  *
4813  * @rdev: radeon_device pointer
4814  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4815  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4816  *
4817  * Print human readable fault information (SI).
4818  */
4819 static void si_vm_decode_fault(struct radeon_device *rdev,
4820 			       u32 status, u32 addr)
4821 {
4822 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4823 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4824 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4825 	char *block;
4826 
4827 	if (rdev->family == CHIP_TAHITI) {
4828 		switch (mc_id) {
4829 		case 160:
4830 		case 144:
4831 		case 96:
4832 		case 80:
4833 		case 224:
4834 		case 208:
4835 		case 32:
4836 		case 16:
4837 			block = "CB";
4838 			break;
4839 		case 161:
4840 		case 145:
4841 		case 97:
4842 		case 81:
4843 		case 225:
4844 		case 209:
4845 		case 33:
4846 		case 17:
4847 			block = "CB_FMASK";
4848 			break;
4849 		case 162:
4850 		case 146:
4851 		case 98:
4852 		case 82:
4853 		case 226:
4854 		case 210:
4855 		case 34:
4856 		case 18:
4857 			block = "CB_CMASK";
4858 			break;
4859 		case 163:
4860 		case 147:
4861 		case 99:
4862 		case 83:
4863 		case 227:
4864 		case 211:
4865 		case 35:
4866 		case 19:
4867 			block = "CB_IMMED";
4868 			break;
4869 		case 164:
4870 		case 148:
4871 		case 100:
4872 		case 84:
4873 		case 228:
4874 		case 212:
4875 		case 36:
4876 		case 20:
4877 			block = "DB";
4878 			break;
4879 		case 165:
4880 		case 149:
4881 		case 101:
4882 		case 85:
4883 		case 229:
4884 		case 213:
4885 		case 37:
4886 		case 21:
4887 			block = "DB_HTILE";
4888 			break;
4889 		case 167:
4890 		case 151:
4891 		case 103:
4892 		case 87:
4893 		case 231:
4894 		case 215:
4895 		case 39:
4896 		case 23:
4897 			block = "DB_STEN";
4898 			break;
4899 		case 72:
4900 		case 68:
4901 		case 64:
4902 		case 8:
4903 		case 4:
4904 		case 0:
4905 		case 136:
4906 		case 132:
4907 		case 128:
4908 		case 200:
4909 		case 196:
4910 		case 192:
4911 			block = "TC";
4912 			break;
4913 		case 112:
4914 		case 48:
4915 			block = "CP";
4916 			break;
4917 		case 49:
4918 		case 177:
4919 		case 50:
4920 		case 178:
4921 			block = "SH";
4922 			break;
4923 		case 53:
4924 		case 190:
4925 			block = "VGT";
4926 			break;
4927 		case 117:
4928 			block = "IH";
4929 			break;
4930 		case 51:
4931 		case 115:
4932 			block = "RLC";
4933 			break;
4934 		case 119:
4935 		case 183:
4936 			block = "DMA0";
4937 			break;
4938 		case 61:
4939 			block = "DMA1";
4940 			break;
4941 		case 248:
4942 		case 120:
4943 			block = "HDP";
4944 			break;
4945 		default:
4946 			block = "unknown";
4947 			break;
4948 		}
4949 	} else {
4950 		switch (mc_id) {
4951 		case 32:
4952 		case 16:
4953 		case 96:
4954 		case 80:
4955 		case 160:
4956 		case 144:
4957 		case 224:
4958 		case 208:
4959 			block = "CB";
4960 			break;
4961 		case 33:
4962 		case 17:
4963 		case 97:
4964 		case 81:
4965 		case 161:
4966 		case 145:
4967 		case 225:
4968 		case 209:
4969 			block = "CB_FMASK";
4970 			break;
4971 		case 34:
4972 		case 18:
4973 		case 98:
4974 		case 82:
4975 		case 162:
4976 		case 146:
4977 		case 226:
4978 		case 210:
4979 			block = "CB_CMASK";
4980 			break;
4981 		case 35:
4982 		case 19:
4983 		case 99:
4984 		case 83:
4985 		case 163:
4986 		case 147:
4987 		case 227:
4988 		case 211:
4989 			block = "CB_IMMED";
4990 			break;
4991 		case 36:
4992 		case 20:
4993 		case 100:
4994 		case 84:
4995 		case 164:
4996 		case 148:
4997 		case 228:
4998 		case 212:
4999 			block = "DB";
5000 			break;
5001 		case 37:
5002 		case 21:
5003 		case 101:
5004 		case 85:
5005 		case 165:
5006 		case 149:
5007 		case 229:
5008 		case 213:
5009 			block = "DB_HTILE";
5010 			break;
5011 		case 39:
5012 		case 23:
5013 		case 103:
5014 		case 87:
5015 		case 167:
5016 		case 151:
5017 		case 231:
5018 		case 215:
5019 			block = "DB_STEN";
5020 			break;
5021 		case 72:
5022 		case 68:
5023 		case 8:
5024 		case 4:
5025 		case 136:
5026 		case 132:
5027 		case 200:
5028 		case 196:
5029 			block = "TC";
5030 			break;
5031 		case 112:
5032 		case 48:
5033 			block = "CP";
5034 			break;
5035 		case 49:
5036 		case 177:
5037 		case 50:
5038 		case 178:
5039 			block = "SH";
5040 			break;
5041 		case 53:
5042 			block = "VGT";
5043 			break;
5044 		case 117:
5045 			block = "IH";
5046 			break;
5047 		case 51:
5048 		case 115:
5049 			block = "RLC";
5050 			break;
5051 		case 119:
5052 		case 183:
5053 			block = "DMA0";
5054 			break;
5055 		case 61:
5056 			block = "DMA1";
5057 			break;
5058 		case 248:
5059 		case 120:
5060 			block = "HDP";
5061 			break;
5062 		default:
5063 			block = "unknown";
5064 			break;
5065 		}
5066 	}
5067 
5068 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5069 	       protections, vmid, addr,
5070 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5071 	       block, mc_id);
5072 }
5073 
5074 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5075 		 unsigned vm_id, uint64_t pd_addr)
5076 {
5077 	/* write new base address */
5078 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5079 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5080 				 WRITE_DATA_DST_SEL(0)));
5081 
5082 	if (vm_id < 8) {
5083 		radeon_ring_write(ring,
5084 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5085 	} else {
5086 		radeon_ring_write(ring,
5087 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5088 	}
5089 	radeon_ring_write(ring, 0);
5090 	radeon_ring_write(ring, pd_addr >> 12);
5091 
5092 	/* flush hdp cache */
5093 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5094 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5095 				 WRITE_DATA_DST_SEL(0)));
5096 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5097 	radeon_ring_write(ring, 0);
5098 	radeon_ring_write(ring, 0x1);
5099 
5100 	/* bits 0-15 are the VM contexts0-15 */
5101 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5102 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5103 				 WRITE_DATA_DST_SEL(0)));
5104 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5105 	radeon_ring_write(ring, 0);
5106 	radeon_ring_write(ring, 1 << vm_id);
5107 
5108 	/* wait for the invalidate to complete */
5109 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5110 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5111 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5112 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5113 	radeon_ring_write(ring, 0);
5114 	radeon_ring_write(ring, 0); /* ref */
5115 	radeon_ring_write(ring, 0); /* mask */
5116 	radeon_ring_write(ring, 0x20); /* poll interval */
5117 
5118 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5119 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5120 	radeon_ring_write(ring, 0x0);
5121 }
5122 
5123 /*
5124  *  Power and clock gating
5125  */
5126 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5127 {
5128 	int i;
5129 
5130 	for (i = 0; i < rdev->usec_timeout; i++) {
5131 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5132 			break;
5133 		udelay(1);
5134 	}
5135 
5136 	for (i = 0; i < rdev->usec_timeout; i++) {
5137 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5138 			break;
5139 		udelay(1);
5140 	}
5141 }
5142 
5143 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5144 					 bool enable)
5145 {
5146 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5147 	u32 mask;
5148 	int i;
5149 
5150 	if (enable)
5151 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5152 	else
5153 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5154 	WREG32(CP_INT_CNTL_RING0, tmp);
5155 
5156 	if (!enable) {
5157 		/* read a gfx register */
5158 		tmp = RREG32(DB_DEPTH_INFO);
5159 
5160 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5161 		for (i = 0; i < rdev->usec_timeout; i++) {
5162 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5163 				break;
5164 			udelay(1);
5165 		}
5166 	}
5167 }
5168 
5169 static void si_set_uvd_dcm(struct radeon_device *rdev,
5170 			   bool sw_mode)
5171 {
5172 	u32 tmp, tmp2;
5173 
5174 	tmp = RREG32(UVD_CGC_CTRL);
5175 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5176 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5177 
5178 	if (sw_mode) {
5179 		tmp &= ~0x7ffff800;
5180 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5181 	} else {
5182 		tmp |= 0x7ffff800;
5183 		tmp2 = 0;
5184 	}
5185 
5186 	WREG32(UVD_CGC_CTRL, tmp);
5187 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5188 }
5189 
5190 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5191 {
5192 	bool hw_mode = true;
5193 
5194 	if (hw_mode) {
5195 		si_set_uvd_dcm(rdev, false);
5196 	} else {
5197 		u32 tmp = RREG32(UVD_CGC_CTRL);
5198 		tmp &= ~DCM;
5199 		WREG32(UVD_CGC_CTRL, tmp);
5200 	}
5201 }
5202 
5203 static u32 si_halt_rlc(struct radeon_device *rdev)
5204 {
5205 	u32 data, orig;
5206 
5207 	orig = data = RREG32(RLC_CNTL);
5208 
5209 	if (data & RLC_ENABLE) {
5210 		data &= ~RLC_ENABLE;
5211 		WREG32(RLC_CNTL, data);
5212 
5213 		si_wait_for_rlc_serdes(rdev);
5214 	}
5215 
5216 	return orig;
5217 }
5218 
5219 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5220 {
5221 	u32 tmp;
5222 
5223 	tmp = RREG32(RLC_CNTL);
5224 	if (tmp != rlc)
5225 		WREG32(RLC_CNTL, rlc);
5226 }
5227 
5228 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5229 {
5230 	u32 data, orig;
5231 
5232 	orig = data = RREG32(DMA_PG);
5233 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5234 		data |= PG_CNTL_ENABLE;
5235 	else
5236 		data &= ~PG_CNTL_ENABLE;
5237 	if (orig != data)
5238 		WREG32(DMA_PG, data);
5239 }
5240 
5241 static void si_init_dma_pg(struct radeon_device *rdev)
5242 {
5243 	u32 tmp;
5244 
5245 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5246 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5247 
5248 	for (tmp = 0; tmp < 5; tmp++)
5249 		WREG32(DMA_PGFSM_WRITE, 0);
5250 }
5251 
5252 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5253 			       bool enable)
5254 {
5255 	u32 tmp;
5256 
5257 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5258 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5259 		WREG32(RLC_TTOP_D, tmp);
5260 
5261 		tmp = RREG32(RLC_PG_CNTL);
5262 		tmp |= GFX_PG_ENABLE;
5263 		WREG32(RLC_PG_CNTL, tmp);
5264 
5265 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5266 		tmp |= AUTO_PG_EN;
5267 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5268 	} else {
5269 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5270 		tmp &= ~AUTO_PG_EN;
5271 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5272 
5273 		tmp = RREG32(DB_RENDER_CONTROL);
5274 	}
5275 }
5276 
5277 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5278 {
5279 	u32 tmp;
5280 
5281 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5282 
5283 	tmp = RREG32(RLC_PG_CNTL);
5284 	tmp |= GFX_PG_SRC;
5285 	WREG32(RLC_PG_CNTL, tmp);
5286 
5287 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5288 
5289 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5290 
5291 	tmp &= ~GRBM_REG_SGIT_MASK;
5292 	tmp |= GRBM_REG_SGIT(0x700);
5293 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5294 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5295 }
5296 
5297 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5298 {
5299 	u32 mask = 0, tmp, tmp1;
5300 	int i;
5301 
5302 	si_select_se_sh(rdev, se, sh);
5303 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5304 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5305 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5306 
5307 	tmp &= 0xffff0000;
5308 
5309 	tmp |= tmp1;
5310 	tmp >>= 16;
5311 
5312 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5313 		mask <<= 1;
5314 		mask |= 1;
5315 	}
5316 
5317 	return (~tmp) & mask;
5318 }
5319 
5320 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5321 {
5322 	u32 i, j, k, active_cu_number = 0;
5323 	u32 mask, counter, cu_bitmap;
5324 	u32 tmp = 0;
5325 
5326 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5327 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5328 			mask = 1;
5329 			cu_bitmap = 0;
5330 			counter  = 0;
5331 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5332 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5333 					if (counter < 2)
5334 						cu_bitmap |= mask;
5335 					counter++;
5336 				}
5337 				mask <<= 1;
5338 			}
5339 
5340 			active_cu_number += counter;
5341 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5342 		}
5343 	}
5344 
5345 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5346 
5347 	tmp = RREG32(RLC_MAX_PG_CU);
5348 	tmp &= ~MAX_PU_CU_MASK;
5349 	tmp |= MAX_PU_CU(active_cu_number);
5350 	WREG32(RLC_MAX_PG_CU, tmp);
5351 }
5352 
5353 static void si_enable_cgcg(struct radeon_device *rdev,
5354 			   bool enable)
5355 {
5356 	u32 data, orig, tmp;
5357 
5358 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5359 
5360 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5361 		si_enable_gui_idle_interrupt(rdev, true);
5362 
5363 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5364 
5365 		tmp = si_halt_rlc(rdev);
5366 
5367 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5368 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5369 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5370 
5371 		si_wait_for_rlc_serdes(rdev);
5372 
5373 		si_update_rlc(rdev, tmp);
5374 
5375 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5376 
5377 		data |= CGCG_EN | CGLS_EN;
5378 	} else {
5379 		si_enable_gui_idle_interrupt(rdev, false);
5380 
5381 		RREG32(CB_CGTT_SCLK_CTRL);
5382 		RREG32(CB_CGTT_SCLK_CTRL);
5383 		RREG32(CB_CGTT_SCLK_CTRL);
5384 		RREG32(CB_CGTT_SCLK_CTRL);
5385 
5386 		data &= ~(CGCG_EN | CGLS_EN);
5387 	}
5388 
5389 	if (orig != data)
5390 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5391 }
5392 
5393 static void si_enable_mgcg(struct radeon_device *rdev,
5394 			   bool enable)
5395 {
5396 	u32 data, orig, tmp = 0;
5397 
5398 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5399 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5400 		data = 0x96940200;
5401 		if (orig != data)
5402 			WREG32(CGTS_SM_CTRL_REG, data);
5403 
5404 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5405 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5406 			data |= CP_MEM_LS_EN;
5407 			if (orig != data)
5408 				WREG32(CP_MEM_SLP_CNTL, data);
5409 		}
5410 
5411 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5412 		data &= 0xffffffc0;
5413 		if (orig != data)
5414 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5415 
5416 		tmp = si_halt_rlc(rdev);
5417 
5418 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5419 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5420 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5421 
5422 		si_update_rlc(rdev, tmp);
5423 	} else {
5424 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5425 		data |= 0x00000003;
5426 		if (orig != data)
5427 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5428 
5429 		data = RREG32(CP_MEM_SLP_CNTL);
5430 		if (data & CP_MEM_LS_EN) {
5431 			data &= ~CP_MEM_LS_EN;
5432 			WREG32(CP_MEM_SLP_CNTL, data);
5433 		}
5434 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5435 		data |= LS_OVERRIDE | OVERRIDE;
5436 		if (orig != data)
5437 			WREG32(CGTS_SM_CTRL_REG, data);
5438 
5439 		tmp = si_halt_rlc(rdev);
5440 
5441 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5442 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5443 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5444 
5445 		si_update_rlc(rdev, tmp);
5446 	}
5447 }
5448 
5449 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5450 			       bool enable)
5451 {
5452 	u32 orig, data, tmp;
5453 
5454 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5455 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5456 		tmp |= 0x3fff;
5457 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5458 
5459 		orig = data = RREG32(UVD_CGC_CTRL);
5460 		data |= DCM;
5461 		if (orig != data)
5462 			WREG32(UVD_CGC_CTRL, data);
5463 
5464 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5465 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5466 	} else {
5467 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5468 		tmp &= ~0x3fff;
5469 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5470 
5471 		orig = data = RREG32(UVD_CGC_CTRL);
5472 		data &= ~DCM;
5473 		if (orig != data)
5474 			WREG32(UVD_CGC_CTRL, data);
5475 
5476 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5477 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5478 	}
5479 }
5480 
5481 static const u32 mc_cg_registers[] =
5482 {
5483 	MC_HUB_MISC_HUB_CG,
5484 	MC_HUB_MISC_SIP_CG,
5485 	MC_HUB_MISC_VM_CG,
5486 	MC_XPB_CLK_GAT,
5487 	ATC_MISC_CG,
5488 	MC_CITF_MISC_WR_CG,
5489 	MC_CITF_MISC_RD_CG,
5490 	MC_CITF_MISC_VM_CG,
5491 	VM_L2_CG,
5492 };
5493 
5494 static void si_enable_mc_ls(struct radeon_device *rdev,
5495 			    bool enable)
5496 {
5497 	int i;
5498 	u32 orig, data;
5499 
5500 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5501 		orig = data = RREG32(mc_cg_registers[i]);
5502 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5503 			data |= MC_LS_ENABLE;
5504 		else
5505 			data &= ~MC_LS_ENABLE;
5506 		if (data != orig)
5507 			WREG32(mc_cg_registers[i], data);
5508 	}
5509 }
5510 
5511 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5512 			       bool enable)
5513 {
5514 	int i;
5515 	u32 orig, data;
5516 
5517 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5518 		orig = data = RREG32(mc_cg_registers[i]);
5519 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5520 			data |= MC_CG_ENABLE;
5521 		else
5522 			data &= ~MC_CG_ENABLE;
5523 		if (data != orig)
5524 			WREG32(mc_cg_registers[i], data);
5525 	}
5526 }
5527 
5528 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5529 			       bool enable)
5530 {
5531 	u32 orig, data, offset;
5532 	int i;
5533 
5534 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5535 		for (i = 0; i < 2; i++) {
5536 			if (i == 0)
5537 				offset = DMA0_REGISTER_OFFSET;
5538 			else
5539 				offset = DMA1_REGISTER_OFFSET;
5540 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5541 			data &= ~MEM_POWER_OVERRIDE;
5542 			if (data != orig)
5543 				WREG32(DMA_POWER_CNTL + offset, data);
5544 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5545 		}
5546 	} else {
5547 		for (i = 0; i < 2; i++) {
5548 			if (i == 0)
5549 				offset = DMA0_REGISTER_OFFSET;
5550 			else
5551 				offset = DMA1_REGISTER_OFFSET;
5552 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5553 			data |= MEM_POWER_OVERRIDE;
5554 			if (data != orig)
5555 				WREG32(DMA_POWER_CNTL + offset, data);
5556 
5557 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5558 			data = 0xff000000;
5559 			if (data != orig)
5560 				WREG32(DMA_CLK_CTRL + offset, data);
5561 		}
5562 	}
5563 }
5564 
5565 static void si_enable_bif_mgls(struct radeon_device *rdev,
5566 			       bool enable)
5567 {
5568 	u32 orig, data;
5569 
5570 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5571 
5572 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5573 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5574 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5575 	else
5576 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5577 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5578 
5579 	if (orig != data)
5580 		WREG32_PCIE(PCIE_CNTL2, data);
5581 }
5582 
5583 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5584 			       bool enable)
5585 {
5586 	u32 orig, data;
5587 
5588 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5589 
5590 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5591 		data &= ~CLOCK_GATING_DIS;
5592 	else
5593 		data |= CLOCK_GATING_DIS;
5594 
5595 	if (orig != data)
5596 		WREG32(HDP_HOST_PATH_CNTL, data);
5597 }
5598 
5599 static void si_enable_hdp_ls(struct radeon_device *rdev,
5600 			     bool enable)
5601 {
5602 	u32 orig, data;
5603 
5604 	orig = data = RREG32(HDP_MEM_POWER_LS);
5605 
5606 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5607 		data |= HDP_LS_ENABLE;
5608 	else
5609 		data &= ~HDP_LS_ENABLE;
5610 
5611 	if (orig != data)
5612 		WREG32(HDP_MEM_POWER_LS, data);
5613 }
5614 
5615 static void si_update_cg(struct radeon_device *rdev,
5616 			 u32 block, bool enable)
5617 {
5618 	if (block & RADEON_CG_BLOCK_GFX) {
5619 		si_enable_gui_idle_interrupt(rdev, false);
5620 		/* order matters! */
5621 		if (enable) {
5622 			si_enable_mgcg(rdev, true);
5623 			si_enable_cgcg(rdev, true);
5624 		} else {
5625 			si_enable_cgcg(rdev, false);
5626 			si_enable_mgcg(rdev, false);
5627 		}
5628 		si_enable_gui_idle_interrupt(rdev, true);
5629 	}
5630 
5631 	if (block & RADEON_CG_BLOCK_MC) {
5632 		si_enable_mc_mgcg(rdev, enable);
5633 		si_enable_mc_ls(rdev, enable);
5634 	}
5635 
5636 	if (block & RADEON_CG_BLOCK_SDMA) {
5637 		si_enable_dma_mgcg(rdev, enable);
5638 	}
5639 
5640 	if (block & RADEON_CG_BLOCK_BIF) {
5641 		si_enable_bif_mgls(rdev, enable);
5642 	}
5643 
5644 	if (block & RADEON_CG_BLOCK_UVD) {
5645 		if (rdev->has_uvd) {
5646 			si_enable_uvd_mgcg(rdev, enable);
5647 		}
5648 	}
5649 
5650 	if (block & RADEON_CG_BLOCK_HDP) {
5651 		si_enable_hdp_mgcg(rdev, enable);
5652 		si_enable_hdp_ls(rdev, enable);
5653 	}
5654 }
5655 
5656 static void si_init_cg(struct radeon_device *rdev)
5657 {
5658 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5659 			    RADEON_CG_BLOCK_MC |
5660 			    RADEON_CG_BLOCK_SDMA |
5661 			    RADEON_CG_BLOCK_BIF |
5662 			    RADEON_CG_BLOCK_HDP), true);
5663 	if (rdev->has_uvd) {
5664 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5665 		si_init_uvd_internal_cg(rdev);
5666 	}
5667 }
5668 
5669 static void si_fini_cg(struct radeon_device *rdev)
5670 {
5671 	if (rdev->has_uvd) {
5672 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5673 	}
5674 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5675 			    RADEON_CG_BLOCK_MC |
5676 			    RADEON_CG_BLOCK_SDMA |
5677 			    RADEON_CG_BLOCK_BIF |
5678 			    RADEON_CG_BLOCK_HDP), false);
5679 }
5680 
5681 u32 si_get_csb_size(struct radeon_device *rdev)
5682 {
5683 	u32 count = 0;
5684 	const struct cs_section_def *sect = NULL;
5685 	const struct cs_extent_def *ext = NULL;
5686 
5687 	if (rdev->rlc.cs_data == NULL)
5688 		return 0;
5689 
5690 	/* begin clear state */
5691 	count += 2;
5692 	/* context control state */
5693 	count += 3;
5694 
5695 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5696 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5697 			if (sect->id == SECT_CONTEXT)
5698 				count += 2 + ext->reg_count;
5699 			else
5700 				return 0;
5701 		}
5702 	}
5703 	/* pa_sc_raster_config */
5704 	count += 3;
5705 	/* end clear state */
5706 	count += 2;
5707 	/* clear state */
5708 	count += 2;
5709 
5710 	return count;
5711 }
5712 
5713 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5714 {
5715 	u32 count = 0, i;
5716 	const struct cs_section_def *sect = NULL;
5717 	const struct cs_extent_def *ext = NULL;
5718 
5719 	if (rdev->rlc.cs_data == NULL)
5720 		return;
5721 	if (buffer == NULL)
5722 		return;
5723 
5724 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5725 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5726 
5727 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5728 	buffer[count++] = cpu_to_le32(0x80000000);
5729 	buffer[count++] = cpu_to_le32(0x80000000);
5730 
5731 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5732 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5733 			if (sect->id == SECT_CONTEXT) {
5734 				buffer[count++] =
5735 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5736 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5737 				for (i = 0; i < ext->reg_count; i++)
5738 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5739 			} else {
5740 				return;
5741 			}
5742 		}
5743 	}
5744 
5745 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5746 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5747 	switch (rdev->family) {
5748 	case CHIP_TAHITI:
5749 	case CHIP_PITCAIRN:
5750 		buffer[count++] = cpu_to_le32(0x2a00126a);
5751 		break;
5752 	case CHIP_VERDE:
5753 		buffer[count++] = cpu_to_le32(0x0000124a);
5754 		break;
5755 	case CHIP_OLAND:
5756 		buffer[count++] = cpu_to_le32(0x00000082);
5757 		break;
5758 	case CHIP_HAINAN:
5759 		buffer[count++] = cpu_to_le32(0x00000000);
5760 		break;
5761 	default:
5762 		buffer[count++] = cpu_to_le32(0x00000000);
5763 		break;
5764 	}
5765 
5766 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5767 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5768 
5769 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5770 	buffer[count++] = cpu_to_le32(0);
5771 }
5772 
5773 static void si_init_pg(struct radeon_device *rdev)
5774 {
5775 	if (rdev->pg_flags) {
5776 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5777 			si_init_dma_pg(rdev);
5778 		}
5779 		si_init_ao_cu_mask(rdev);
5780 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5781 			si_init_gfx_cgpg(rdev);
5782 		} else {
5783 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5784 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5785 		}
5786 		si_enable_dma_pg(rdev, true);
5787 		si_enable_gfx_cgpg(rdev, true);
5788 	} else {
5789 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5790 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5791 	}
5792 }
5793 
5794 static void si_fini_pg(struct radeon_device *rdev)
5795 {
5796 	if (rdev->pg_flags) {
5797 		si_enable_dma_pg(rdev, false);
5798 		si_enable_gfx_cgpg(rdev, false);
5799 	}
5800 }
5801 
5802 /*
5803  * RLC
5804  */
5805 void si_rlc_reset(struct radeon_device *rdev)
5806 {
5807 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5808 
5809 	tmp |= SOFT_RESET_RLC;
5810 	WREG32(GRBM_SOFT_RESET, tmp);
5811 	udelay(50);
5812 	tmp &= ~SOFT_RESET_RLC;
5813 	WREG32(GRBM_SOFT_RESET, tmp);
5814 	udelay(50);
5815 }
5816 
5817 static void si_rlc_stop(struct radeon_device *rdev)
5818 {
5819 	WREG32(RLC_CNTL, 0);
5820 
5821 	si_enable_gui_idle_interrupt(rdev, false);
5822 
5823 	si_wait_for_rlc_serdes(rdev);
5824 }
5825 
5826 static void si_rlc_start(struct radeon_device *rdev)
5827 {
5828 	WREG32(RLC_CNTL, RLC_ENABLE);
5829 
5830 	si_enable_gui_idle_interrupt(rdev, true);
5831 
5832 	udelay(50);
5833 }
5834 
5835 static bool si_lbpw_supported(struct radeon_device *rdev)
5836 {
5837 	u32 tmp;
5838 
5839 	/* Enable LBPW only for DDR3 */
5840 	tmp = RREG32(MC_SEQ_MISC0);
5841 	if ((tmp & 0xF0000000) == 0xB0000000)
5842 		return true;
5843 	return false;
5844 }
5845 
5846 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5847 {
5848 	u32 tmp;
5849 
5850 	tmp = RREG32(RLC_LB_CNTL);
5851 	if (enable)
5852 		tmp |= LOAD_BALANCE_ENABLE;
5853 	else
5854 		tmp &= ~LOAD_BALANCE_ENABLE;
5855 	WREG32(RLC_LB_CNTL, tmp);
5856 
5857 	if (!enable) {
5858 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5859 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5860 	}
5861 }
5862 
5863 static int si_rlc_resume(struct radeon_device *rdev)
5864 {
5865 	u32 i;
5866 
5867 	if (!rdev->rlc_fw)
5868 		return -EINVAL;
5869 
5870 	si_rlc_stop(rdev);
5871 
5872 	si_rlc_reset(rdev);
5873 
5874 	si_init_pg(rdev);
5875 
5876 	si_init_cg(rdev);
5877 
5878 	WREG32(RLC_RL_BASE, 0);
5879 	WREG32(RLC_RL_SIZE, 0);
5880 	WREG32(RLC_LB_CNTL, 0);
5881 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5882 	WREG32(RLC_LB_CNTR_INIT, 0);
5883 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5884 
5885 	WREG32(RLC_MC_CNTL, 0);
5886 	WREG32(RLC_UCODE_CNTL, 0);
5887 
5888 	if (rdev->new_fw) {
5889 		const struct rlc_firmware_header_v1_0 *hdr =
5890 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5891 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5892 		const __le32 *fw_data = (const __le32 *)
5893 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5894 
5895 		radeon_ucode_print_rlc_hdr(&hdr->header);
5896 
5897 		for (i = 0; i < fw_size; i++) {
5898 			WREG32(RLC_UCODE_ADDR, i);
5899 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5900 		}
5901 	} else {
5902 		const __be32 *fw_data =
5903 			(const __be32 *)rdev->rlc_fw->data;
5904 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5905 			WREG32(RLC_UCODE_ADDR, i);
5906 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5907 		}
5908 	}
5909 	WREG32(RLC_UCODE_ADDR, 0);
5910 
5911 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5912 
5913 	si_rlc_start(rdev);
5914 
5915 	return 0;
5916 }
5917 
5918 static void si_enable_interrupts(struct radeon_device *rdev)
5919 {
5920 	u32 ih_cntl = RREG32(IH_CNTL);
5921 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5922 
5923 	ih_cntl |= ENABLE_INTR;
5924 	ih_rb_cntl |= IH_RB_ENABLE;
5925 	WREG32(IH_CNTL, ih_cntl);
5926 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5927 	rdev->ih.enabled = true;
5928 }
5929 
5930 static void si_disable_interrupts(struct radeon_device *rdev)
5931 {
5932 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5933 	u32 ih_cntl = RREG32(IH_CNTL);
5934 
5935 	ih_rb_cntl &= ~IH_RB_ENABLE;
5936 	ih_cntl &= ~ENABLE_INTR;
5937 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5938 	WREG32(IH_CNTL, ih_cntl);
5939 	/* set rptr, wptr to 0 */
5940 	WREG32(IH_RB_RPTR, 0);
5941 	WREG32(IH_RB_WPTR, 0);
5942 	rdev->ih.enabled = false;
5943 	rdev->ih.rptr = 0;
5944 }
5945 
5946 static void si_disable_interrupt_state(struct radeon_device *rdev)
5947 {
5948 	u32 tmp;
5949 
5950 	tmp = RREG32(CP_INT_CNTL_RING0) &
5951 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5952 	WREG32(CP_INT_CNTL_RING0, tmp);
5953 	WREG32(CP_INT_CNTL_RING1, 0);
5954 	WREG32(CP_INT_CNTL_RING2, 0);
5955 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5956 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5957 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5958 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5959 	WREG32(GRBM_INT_CNTL, 0);
5960 	WREG32(SRBM_INT_CNTL, 0);
5961 	if (rdev->num_crtc >= 2) {
5962 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5963 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5964 	}
5965 	if (rdev->num_crtc >= 4) {
5966 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5967 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5968 	}
5969 	if (rdev->num_crtc >= 6) {
5970 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5971 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5972 	}
5973 
5974 	if (rdev->num_crtc >= 2) {
5975 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5976 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5977 	}
5978 	if (rdev->num_crtc >= 4) {
5979 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5980 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5981 	}
5982 	if (rdev->num_crtc >= 6) {
5983 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5984 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5985 	}
5986 
5987 	if (!ASIC_IS_NODCE(rdev)) {
5988 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5989 
5990 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5991 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5992 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5993 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5994 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5995 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5996 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5997 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5998 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5999 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6000 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6001 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6002 	}
6003 }
6004 
6005 static int si_irq_init(struct radeon_device *rdev)
6006 {
6007 	int ret = 0;
6008 	int rb_bufsz;
6009 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6010 
6011 	/* allocate ring */
6012 	ret = r600_ih_ring_alloc(rdev);
6013 	if (ret)
6014 		return ret;
6015 
6016 	/* disable irqs */
6017 	si_disable_interrupts(rdev);
6018 
6019 	/* init rlc */
6020 	ret = si_rlc_resume(rdev);
6021 	if (ret) {
6022 		r600_ih_ring_fini(rdev);
6023 		return ret;
6024 	}
6025 
6026 	/* setup interrupt control */
6027 	/* set dummy read address to ring address */
6028 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6029 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6030 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6031 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6032 	 */
6033 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6034 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6035 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6036 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6037 
6038 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6039 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6040 
6041 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6042 		      IH_WPTR_OVERFLOW_CLEAR |
6043 		      (rb_bufsz << 1));
6044 
6045 	if (rdev->wb.enabled)
6046 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6047 
6048 	/* set the writeback address whether it's enabled or not */
6049 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6050 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6051 
6052 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6053 
6054 	/* set rptr, wptr to 0 */
6055 	WREG32(IH_RB_RPTR, 0);
6056 	WREG32(IH_RB_WPTR, 0);
6057 
6058 	/* Default settings for IH_CNTL (disabled at first) */
6059 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6060 	/* RPTR_REARM only works if msi's are enabled */
6061 	if (rdev->msi_enabled)
6062 		ih_cntl |= RPTR_REARM;
6063 	WREG32(IH_CNTL, ih_cntl);
6064 
6065 	/* force the active interrupt state to all disabled */
6066 	si_disable_interrupt_state(rdev);
6067 
6068 	pci_set_master(rdev->pdev);
6069 
6070 	/* enable irqs */
6071 	si_enable_interrupts(rdev);
6072 
6073 	return ret;
6074 }
6075 
6076 int si_irq_set(struct radeon_device *rdev)
6077 {
6078 	u32 cp_int_cntl;
6079 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6080 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6081 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6082 	u32 grbm_int_cntl = 0;
6083 	u32 dma_cntl, dma_cntl1;
6084 	u32 thermal_int = 0;
6085 
6086 	if (!rdev->irq.installed) {
6087 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6088 		return -EINVAL;
6089 	}
6090 	/* don't enable anything if the ih is disabled */
6091 	if (!rdev->ih.enabled) {
6092 		si_disable_interrupts(rdev);
6093 		/* force the active interrupt state to all disabled */
6094 		si_disable_interrupt_state(rdev);
6095 		return 0;
6096 	}
6097 
6098 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6099 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6100 
6101 	if (!ASIC_IS_NODCE(rdev)) {
6102 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6103 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6104 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6105 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6106 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6107 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6108 	}
6109 
6110 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6111 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6112 
6113 	thermal_int = RREG32(CG_THERMAL_INT) &
6114 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6115 
6116 	/* enable CP interrupts on all rings */
6117 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6118 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6119 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6120 	}
6121 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6122 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6123 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6124 	}
6125 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6126 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6127 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6128 	}
6129 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6130 		DRM_DEBUG("si_irq_set: sw int dma\n");
6131 		dma_cntl |= TRAP_ENABLE;
6132 	}
6133 
6134 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6135 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6136 		dma_cntl1 |= TRAP_ENABLE;
6137 	}
6138 	if (rdev->irq.crtc_vblank_int[0] ||
6139 	    atomic_read(&rdev->irq.pflip[0])) {
6140 		DRM_DEBUG("si_irq_set: vblank 0\n");
6141 		crtc1 |= VBLANK_INT_MASK;
6142 	}
6143 	if (rdev->irq.crtc_vblank_int[1] ||
6144 	    atomic_read(&rdev->irq.pflip[1])) {
6145 		DRM_DEBUG("si_irq_set: vblank 1\n");
6146 		crtc2 |= VBLANK_INT_MASK;
6147 	}
6148 	if (rdev->irq.crtc_vblank_int[2] ||
6149 	    atomic_read(&rdev->irq.pflip[2])) {
6150 		DRM_DEBUG("si_irq_set: vblank 2\n");
6151 		crtc3 |= VBLANK_INT_MASK;
6152 	}
6153 	if (rdev->irq.crtc_vblank_int[3] ||
6154 	    atomic_read(&rdev->irq.pflip[3])) {
6155 		DRM_DEBUG("si_irq_set: vblank 3\n");
6156 		crtc4 |= VBLANK_INT_MASK;
6157 	}
6158 	if (rdev->irq.crtc_vblank_int[4] ||
6159 	    atomic_read(&rdev->irq.pflip[4])) {
6160 		DRM_DEBUG("si_irq_set: vblank 4\n");
6161 		crtc5 |= VBLANK_INT_MASK;
6162 	}
6163 	if (rdev->irq.crtc_vblank_int[5] ||
6164 	    atomic_read(&rdev->irq.pflip[5])) {
6165 		DRM_DEBUG("si_irq_set: vblank 5\n");
6166 		crtc6 |= VBLANK_INT_MASK;
6167 	}
6168 	if (rdev->irq.hpd[0]) {
6169 		DRM_DEBUG("si_irq_set: hpd 1\n");
6170 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6171 	}
6172 	if (rdev->irq.hpd[1]) {
6173 		DRM_DEBUG("si_irq_set: hpd 2\n");
6174 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6175 	}
6176 	if (rdev->irq.hpd[2]) {
6177 		DRM_DEBUG("si_irq_set: hpd 3\n");
6178 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6179 	}
6180 	if (rdev->irq.hpd[3]) {
6181 		DRM_DEBUG("si_irq_set: hpd 4\n");
6182 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6183 	}
6184 	if (rdev->irq.hpd[4]) {
6185 		DRM_DEBUG("si_irq_set: hpd 5\n");
6186 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6187 	}
6188 	if (rdev->irq.hpd[5]) {
6189 		DRM_DEBUG("si_irq_set: hpd 6\n");
6190 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6191 	}
6192 
6193 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6194 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6195 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6196 
6197 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6198 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6199 
6200 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6201 
6202 	if (rdev->irq.dpm_thermal) {
6203 		DRM_DEBUG("dpm thermal\n");
6204 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6205 	}
6206 
6207 	if (rdev->num_crtc >= 2) {
6208 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6209 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6210 	}
6211 	if (rdev->num_crtc >= 4) {
6212 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6213 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6214 	}
6215 	if (rdev->num_crtc >= 6) {
6216 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6217 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6218 	}
6219 
6220 	if (rdev->num_crtc >= 2) {
6221 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6222 		       GRPH_PFLIP_INT_MASK);
6223 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6224 		       GRPH_PFLIP_INT_MASK);
6225 	}
6226 	if (rdev->num_crtc >= 4) {
6227 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6228 		       GRPH_PFLIP_INT_MASK);
6229 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6230 		       GRPH_PFLIP_INT_MASK);
6231 	}
6232 	if (rdev->num_crtc >= 6) {
6233 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6234 		       GRPH_PFLIP_INT_MASK);
6235 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6236 		       GRPH_PFLIP_INT_MASK);
6237 	}
6238 
6239 	if (!ASIC_IS_NODCE(rdev)) {
6240 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6241 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6242 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6243 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6244 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6245 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6246 	}
6247 
6248 	WREG32(CG_THERMAL_INT, thermal_int);
6249 
6250 	/* posting read */
6251 	RREG32(SRBM_STATUS);
6252 
6253 	return 0;
6254 }
6255 
6256 static inline void si_irq_ack(struct radeon_device *rdev)
6257 {
6258 	u32 tmp;
6259 
6260 	if (ASIC_IS_NODCE(rdev))
6261 		return;
6262 
6263 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6264 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6265 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6266 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6267 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6268 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6269 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6270 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6271 	if (rdev->num_crtc >= 4) {
6272 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6273 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6274 	}
6275 	if (rdev->num_crtc >= 6) {
6276 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6277 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6278 	}
6279 
6280 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6281 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6282 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6283 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6284 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6285 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6286 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6287 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6288 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6289 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6290 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6291 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6292 
6293 	if (rdev->num_crtc >= 4) {
6294 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6295 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6296 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6297 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6298 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6299 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6300 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6301 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6302 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6303 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6304 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6305 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6306 	}
6307 
6308 	if (rdev->num_crtc >= 6) {
6309 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6310 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6311 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6312 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6313 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6314 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6315 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6316 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6317 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6318 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6319 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6320 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6321 	}
6322 
6323 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6324 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6325 		tmp |= DC_HPDx_INT_ACK;
6326 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6327 	}
6328 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6329 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6330 		tmp |= DC_HPDx_INT_ACK;
6331 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6332 	}
6333 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6334 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6335 		tmp |= DC_HPDx_INT_ACK;
6336 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6337 	}
6338 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6339 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6340 		tmp |= DC_HPDx_INT_ACK;
6341 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6342 	}
6343 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6344 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6345 		tmp |= DC_HPDx_INT_ACK;
6346 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6347 	}
6348 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6349 		tmp = RREG32(DC_HPD6_INT_CONTROL);
6350 		tmp |= DC_HPDx_INT_ACK;
6351 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6352 	}
6353 
6354 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6355 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6356 		tmp |= DC_HPDx_RX_INT_ACK;
6357 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6358 	}
6359 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6360 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6361 		tmp |= DC_HPDx_RX_INT_ACK;
6362 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6363 	}
6364 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6365 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6366 		tmp |= DC_HPDx_RX_INT_ACK;
6367 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6368 	}
6369 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6370 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6371 		tmp |= DC_HPDx_RX_INT_ACK;
6372 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6373 	}
6374 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6375 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6376 		tmp |= DC_HPDx_RX_INT_ACK;
6377 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6378 	}
6379 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6380 		tmp = RREG32(DC_HPD6_INT_CONTROL);
6381 		tmp |= DC_HPDx_RX_INT_ACK;
6382 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6383 	}
6384 }
6385 
6386 static void si_irq_disable(struct radeon_device *rdev)
6387 {
6388 	si_disable_interrupts(rdev);
6389 	/* Wait and acknowledge irq */
6390 	mdelay(1);
6391 	si_irq_ack(rdev);
6392 	si_disable_interrupt_state(rdev);
6393 }
6394 
6395 static void si_irq_suspend(struct radeon_device *rdev)
6396 {
6397 	si_irq_disable(rdev);
6398 	si_rlc_stop(rdev);
6399 }
6400 
6401 static void si_irq_fini(struct radeon_device *rdev)
6402 {
6403 	si_irq_suspend(rdev);
6404 	r600_ih_ring_fini(rdev);
6405 }
6406 
6407 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6408 {
6409 	u32 wptr, tmp;
6410 
6411 	if (rdev->wb.enabled)
6412 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6413 	else
6414 		wptr = RREG32(IH_RB_WPTR);
6415 
6416 	if (wptr & RB_OVERFLOW) {
6417 		wptr &= ~RB_OVERFLOW;
6418 		/* When a ring buffer overflow happen start parsing interrupt
6419 		 * from the last not overwritten vector (wptr + 16). Hopefully
6420 		 * this should allow us to catchup.
6421 		 */
6422 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6423 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6424 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6425 		tmp = RREG32(IH_RB_CNTL);
6426 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6427 		WREG32(IH_RB_CNTL, tmp);
6428 	}
6429 	return (wptr & rdev->ih.ptr_mask);
6430 }
6431 
6432 /*        SI IV Ring
6433  * Each IV ring entry is 128 bits:
6434  * [7:0]    - interrupt source id
6435  * [31:8]   - reserved
6436  * [59:32]  - interrupt source data
6437  * [63:60]  - reserved
6438  * [71:64]  - RINGID
6439  * [79:72]  - VMID
6440  * [127:80] - reserved
6441  */
6442 irqreturn_t si_irq_process(struct radeon_device *rdev)
6443 {
6444 	u32 wptr;
6445 	u32 rptr;
6446 	u32 src_id, src_data, ring_id;
6447 	u32 ring_index;
6448 	bool queue_hotplug = false;
6449 	bool queue_dp = false;
6450 	bool queue_thermal = false;
6451 	u32 status, addr;
6452 
6453 	if (!rdev->ih.enabled || rdev->shutdown)
6454 		return IRQ_NONE;
6455 
6456 	wptr = si_get_ih_wptr(rdev);
6457 
6458 restart_ih:
6459 	/* is somebody else already processing irqs? */
6460 	if (atomic_xchg(&rdev->ih.lock, 1))
6461 		return IRQ_NONE;
6462 
6463 	rptr = rdev->ih.rptr;
6464 	DRM_DEBUG_VBLANK("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6465 
6466 	/* Order reading of wptr vs. reading of IH ring data */
6467 	rmb();
6468 
6469 	/* display interrupts */
6470 	si_irq_ack(rdev);
6471 
6472 	while (rptr != wptr) {
6473 		/* wptr/rptr are in bytes! */
6474 		ring_index = rptr / 4;
6475 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6476 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6477 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6478 
6479 		switch (src_id) {
6480 		case 1: /* D1 vblank/vline */
6481 			switch (src_data) {
6482 			case 0: /* D1 vblank */
6483 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6484 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6485 
6486 				if (rdev->irq.crtc_vblank_int[0]) {
6487 					drm_handle_vblank(rdev->ddev, 0);
6488 					rdev->pm.vblank_sync = true;
6489 					wake_up(&rdev->irq.vblank_queue);
6490 				}
6491 				if (atomic_read(&rdev->irq.pflip[0]))
6492 					radeon_crtc_handle_vblank(rdev, 0);
6493 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6494 				DRM_DEBUG_VBLANK("IH: D1 vblank\n");
6495 
6496 				break;
6497 			case 1: /* D1 vline */
6498 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6499 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6500 
6501 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6502 				DRM_DEBUG_VBLANK("IH: D1 vline\n");
6503 
6504 				break;
6505 			default:
6506 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6507 				break;
6508 			}
6509 			break;
6510 		case 2: /* D2 vblank/vline */
6511 			switch (src_data) {
6512 			case 0: /* D2 vblank */
6513 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6514 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6515 
6516 				if (rdev->irq.crtc_vblank_int[1]) {
6517 					drm_handle_vblank(rdev->ddev, 1);
6518 					rdev->pm.vblank_sync = true;
6519 					wake_up(&rdev->irq.vblank_queue);
6520 				}
6521 				if (atomic_read(&rdev->irq.pflip[1]))
6522 					radeon_crtc_handle_vblank(rdev, 1);
6523 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6524 				DRM_DEBUG_VBLANK("IH: D2 vblank\n");
6525 
6526 				break;
6527 			case 1: /* D2 vline */
6528 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6529 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6530 
6531 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6532 				DRM_DEBUG_VBLANK("IH: D2 vline\n");
6533 
6534 				break;
6535 			default:
6536 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6537 				break;
6538 			}
6539 			break;
6540 		case 3: /* D3 vblank/vline */
6541 			switch (src_data) {
6542 			case 0: /* D3 vblank */
6543 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6544 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6545 
6546 				if (rdev->irq.crtc_vblank_int[2]) {
6547 					drm_handle_vblank(rdev->ddev, 2);
6548 					rdev->pm.vblank_sync = true;
6549 					wake_up(&rdev->irq.vblank_queue);
6550 				}
6551 				if (atomic_read(&rdev->irq.pflip[2]))
6552 					radeon_crtc_handle_vblank(rdev, 2);
6553 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6554 				DRM_DEBUG_VBLANK("IH: D3 vblank\n");
6555 
6556 				break;
6557 			case 1: /* D3 vline */
6558 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6559 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6560 
6561 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6562 				DRM_DEBUG_VBLANK("IH: D3 vline\n");
6563 
6564 				break;
6565 			default:
6566 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6567 				break;
6568 			}
6569 			break;
6570 		case 4: /* D4 vblank/vline */
6571 			switch (src_data) {
6572 			case 0: /* D4 vblank */
6573 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6574 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6575 
6576 				if (rdev->irq.crtc_vblank_int[3]) {
6577 					drm_handle_vblank(rdev->ddev, 3);
6578 					rdev->pm.vblank_sync = true;
6579 					wake_up(&rdev->irq.vblank_queue);
6580 				}
6581 				if (atomic_read(&rdev->irq.pflip[3]))
6582 					radeon_crtc_handle_vblank(rdev, 3);
6583 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6584 				DRM_DEBUG_VBLANK("IH: D4 vblank\n");
6585 
6586 				break;
6587 			case 1: /* D4 vline */
6588 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6589 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6590 
6591 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6592 				DRM_DEBUG_VBLANK("IH: D4 vline\n");
6593 
6594 				break;
6595 			default:
6596 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6597 				break;
6598 			}
6599 			break;
6600 		case 5: /* D5 vblank/vline */
6601 			switch (src_data) {
6602 			case 0: /* D5 vblank */
6603 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6604 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6605 
6606 				if (rdev->irq.crtc_vblank_int[4]) {
6607 					drm_handle_vblank(rdev->ddev, 4);
6608 					rdev->pm.vblank_sync = true;
6609 					wake_up(&rdev->irq.vblank_queue);
6610 				}
6611 				if (atomic_read(&rdev->irq.pflip[4]))
6612 					radeon_crtc_handle_vblank(rdev, 4);
6613 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6614 				DRM_DEBUG_VBLANK("IH: D5 vblank\n");
6615 
6616 				break;
6617 			case 1: /* D5 vline */
6618 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6619 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6620 
6621 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6622 				DRM_DEBUG_VBLANK("IH: D5 vline\n");
6623 
6624 				break;
6625 			default:
6626 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6627 				break;
6628 			}
6629 			break;
6630 		case 6: /* D6 vblank/vline */
6631 			switch (src_data) {
6632 			case 0: /* D6 vblank */
6633 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6634 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6635 
6636 				if (rdev->irq.crtc_vblank_int[5]) {
6637 					drm_handle_vblank(rdev->ddev, 5);
6638 					rdev->pm.vblank_sync = true;
6639 					wake_up(&rdev->irq.vblank_queue);
6640 				}
6641 				if (atomic_read(&rdev->irq.pflip[5]))
6642 					radeon_crtc_handle_vblank(rdev, 5);
6643 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6644 				DRM_DEBUG_VBLANK("IH: D6 vblank\n");
6645 
6646 				break;
6647 			case 1: /* D6 vline */
6648 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6649 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6650 
6651 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6652 				DRM_DEBUG_VBLANK("IH: D6 vline\n");
6653 
6654 				break;
6655 			default:
6656 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6657 				break;
6658 			}
6659 			break;
6660 		case 8: /* D1 page flip */
6661 		case 10: /* D2 page flip */
6662 		case 12: /* D3 page flip */
6663 		case 14: /* D4 page flip */
6664 		case 16: /* D5 page flip */
6665 		case 18: /* D6 page flip */
6666 			DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6667 			if (radeon_use_pflipirq > 0)
6668 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6669 			break;
6670 		case 42: /* HPD hotplug */
6671 			switch (src_data) {
6672 			case 0:
6673 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6674 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6675 
6676 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6677 				queue_hotplug = true;
6678 				DRM_DEBUG("IH: HPD1\n");
6679 
6680 				break;
6681 			case 1:
6682 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6683 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6684 
6685 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6686 				queue_hotplug = true;
6687 				DRM_DEBUG("IH: HPD2\n");
6688 
6689 				break;
6690 			case 2:
6691 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6692 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6693 
6694 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6695 				queue_hotplug = true;
6696 				DRM_DEBUG("IH: HPD3\n");
6697 
6698 				break;
6699 			case 3:
6700 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6701 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6702 
6703 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6704 				queue_hotplug = true;
6705 				DRM_DEBUG("IH: HPD4\n");
6706 
6707 				break;
6708 			case 4:
6709 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6710 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6711 
6712 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6713 				queue_hotplug = true;
6714 				DRM_DEBUG("IH: HPD5\n");
6715 
6716 				break;
6717 			case 5:
6718 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6719 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6720 
6721 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6722 				queue_hotplug = true;
6723 				DRM_DEBUG("IH: HPD6\n");
6724 
6725 				break;
6726 			case 6:
6727 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6728 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6729 
6730 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6731 				queue_dp = true;
6732 				DRM_DEBUG("IH: HPD_RX 1\n");
6733 
6734 				break;
6735 			case 7:
6736 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6737 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6738 
6739 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6740 				queue_dp = true;
6741 				DRM_DEBUG("IH: HPD_RX 2\n");
6742 
6743 				break;
6744 			case 8:
6745 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6746 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6747 
6748 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6749 				queue_dp = true;
6750 				DRM_DEBUG("IH: HPD_RX 3\n");
6751 
6752 				break;
6753 			case 9:
6754 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6755 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6756 
6757 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6758 				queue_dp = true;
6759 				DRM_DEBUG("IH: HPD_RX 4\n");
6760 
6761 				break;
6762 			case 10:
6763 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6764 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6765 
6766 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6767 				queue_dp = true;
6768 				DRM_DEBUG("IH: HPD_RX 5\n");
6769 
6770 				break;
6771 			case 11:
6772 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6773 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6774 
6775 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6776 				queue_dp = true;
6777 				DRM_DEBUG("IH: HPD_RX 6\n");
6778 
6779 				break;
6780 			default:
6781 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6782 				break;
6783 			}
6784 			break;
6785 		case 96:
6786 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6787 			WREG32(SRBM_INT_ACK, 0x1);
6788 			break;
6789 		case 124: /* UVD */
6790 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6791 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6792 			break;
6793 		case 146:
6794 		case 147:
6795 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6796 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6797 			/* reset addr and status */
6798 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6799 			if (addr == 0x0 && status == 0x0)
6800 				break;
6801 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6802 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6803 				addr);
6804 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6805 				status);
6806 			si_vm_decode_fault(rdev, status, addr);
6807 			break;
6808 		case 176: /* RINGID0 CP_INT */
6809 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6810 			break;
6811 		case 177: /* RINGID1 CP_INT */
6812 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6813 			break;
6814 		case 178: /* RINGID2 CP_INT */
6815 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6816 			break;
6817 		case 181: /* CP EOP event */
6818 			DRM_DEBUG("IH: CP EOP\n");
6819 			switch (ring_id) {
6820 			case 0:
6821 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6822 				break;
6823 			case 1:
6824 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6825 				break;
6826 			case 2:
6827 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6828 				break;
6829 			}
6830 			break;
6831 		case 224: /* DMA trap event */
6832 			DRM_DEBUG("IH: DMA trap\n");
6833 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6834 			break;
6835 		case 230: /* thermal low to high */
6836 			DRM_DEBUG("IH: thermal low to high\n");
6837 			rdev->pm.dpm.thermal.high_to_low = false;
6838 			queue_thermal = true;
6839 			break;
6840 		case 231: /* thermal high to low */
6841 			DRM_DEBUG("IH: thermal high to low\n");
6842 			rdev->pm.dpm.thermal.high_to_low = true;
6843 			queue_thermal = true;
6844 			break;
6845 		case 233: /* GUI IDLE */
6846 			DRM_DEBUG("IH: GUI idle\n");
6847 			break;
6848 		case 244: /* DMA trap event */
6849 			DRM_DEBUG("IH: DMA1 trap\n");
6850 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6851 			break;
6852 		default:
6853 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6854 			break;
6855 		}
6856 
6857 		/* wptr/rptr are in bytes! */
6858 		rptr += 16;
6859 		rptr &= rdev->ih.ptr_mask;
6860 		WREG32(IH_RB_RPTR, rptr);
6861 	}
6862 	if (queue_dp)
6863 		schedule_work(&rdev->dp_work);
6864 	if (queue_hotplug)
6865 		schedule_delayed_work(&rdev->hotplug_work, 0);
6866 	if (queue_thermal && rdev->pm.dpm_enabled)
6867 		schedule_work(&rdev->pm.dpm.thermal.work);
6868 	rdev->ih.rptr = rptr;
6869 	atomic_set(&rdev->ih.lock, 0);
6870 
6871 	/* make sure wptr hasn't changed while processing */
6872 	wptr = si_get_ih_wptr(rdev);
6873 	if (wptr != rptr)
6874 		goto restart_ih;
6875 
6876 	return IRQ_HANDLED;
6877 }
6878 
6879 /*
6880  * startup/shutdown callbacks
6881  */
6882 static void si_uvd_init(struct radeon_device *rdev)
6883 {
6884 	int r;
6885 
6886 	if (!rdev->has_uvd)
6887 		return;
6888 
6889 	r = radeon_uvd_init(rdev);
6890 	if (r) {
6891 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6892 		/*
6893 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6894 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6895 		 * there. So it is pointless to try to go through that code
6896 		 * hence why we disable uvd here.
6897 		 */
6898 		rdev->has_uvd = 0;
6899 		return;
6900 	}
6901 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6902 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6903 }
6904 
6905 static void si_uvd_start(struct radeon_device *rdev)
6906 {
6907 	int r;
6908 
6909 	if (!rdev->has_uvd)
6910 		return;
6911 
6912 	r = uvd_v2_2_resume(rdev);
6913 	if (r) {
6914 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6915 		goto error;
6916 	}
6917 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6918 	if (r) {
6919 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6920 		goto error;
6921 	}
6922 	return;
6923 
6924 error:
6925 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6926 }
6927 
6928 static void si_uvd_resume(struct radeon_device *rdev)
6929 {
6930 	struct radeon_ring *ring;
6931 	int r;
6932 
6933 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6934 		return;
6935 
6936 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6937 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6938 	if (r) {
6939 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6940 		return;
6941 	}
6942 	r = uvd_v1_0_init(rdev);
6943 	if (r) {
6944 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6945 		return;
6946 	}
6947 }
6948 
6949 static void si_vce_init(struct radeon_device *rdev)
6950 {
6951 	int r;
6952 
6953 	if (!rdev->has_vce)
6954 		return;
6955 
6956 	r = radeon_vce_init(rdev);
6957 	if (r) {
6958 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6959 		/*
6960 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6961 		 * to early fails si_vce_start() and thus nothing happens
6962 		 * there. So it is pointless to try to go through that code
6963 		 * hence why we disable vce here.
6964 		 */
6965 		rdev->has_vce = 0;
6966 		return;
6967 	}
6968 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6969 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6970 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6971 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6972 }
6973 
6974 static void si_vce_start(struct radeon_device *rdev)
6975 {
6976 	int r;
6977 
6978 	if (!rdev->has_vce)
6979 		return;
6980 
6981 	r = radeon_vce_resume(rdev);
6982 	if (r) {
6983 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6984 		goto error;
6985 	}
6986 	r = vce_v1_0_resume(rdev);
6987 	if (r) {
6988 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6989 		goto error;
6990 	}
6991 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6992 	if (r) {
6993 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6994 		goto error;
6995 	}
6996 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6997 	if (r) {
6998 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6999 		goto error;
7000 	}
7001 	return;
7002 
7003 error:
7004 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
7005 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
7006 }
7007 
7008 static void si_vce_resume(struct radeon_device *rdev)
7009 {
7010 	struct radeon_ring *ring;
7011 	int r;
7012 
7013 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
7014 		return;
7015 
7016 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7017 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7018 	if (r) {
7019 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7020 		return;
7021 	}
7022 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7023 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7024 	if (r) {
7025 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7026 		return;
7027 	}
7028 	r = vce_v1_0_init(rdev);
7029 	if (r) {
7030 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
7031 		return;
7032 	}
7033 }
7034 
7035 static int si_startup(struct radeon_device *rdev)
7036 {
7037 	struct radeon_ring *ring;
7038 	int r;
7039 
7040 	/* enable pcie gen2/3 link */
7041 	si_pcie_gen3_enable(rdev);
7042 	/* enable aspm */
7043 	si_program_aspm(rdev);
7044 
7045 	/* scratch needs to be initialized before MC */
7046 	r = r600_vram_scratch_init(rdev);
7047 	if (r)
7048 		return r;
7049 
7050 	si_mc_program(rdev);
7051 
7052 	if (!rdev->pm.dpm_enabled) {
7053 		r = si_mc_load_microcode(rdev);
7054 		if (r) {
7055 			DRM_ERROR("Failed to load MC firmware!\n");
7056 			return r;
7057 		}
7058 	}
7059 
7060 	r = si_pcie_gart_enable(rdev);
7061 	if (r)
7062 		return r;
7063 	si_gpu_init(rdev);
7064 
7065 	/* allocate rlc buffers */
7066 	if (rdev->family == CHIP_VERDE) {
7067 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7068 		rdev->rlc.reg_list_size =
7069 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7070 	}
7071 	rdev->rlc.cs_data = si_cs_data;
7072 	r = sumo_rlc_init(rdev);
7073 	if (r) {
7074 		DRM_ERROR("Failed to init rlc BOs!\n");
7075 		return r;
7076 	}
7077 
7078 	/* allocate wb buffer */
7079 	r = radeon_wb_init(rdev);
7080 	if (r)
7081 		return r;
7082 
7083 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7084 	if (r) {
7085 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7086 		return r;
7087 	}
7088 
7089 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7090 	if (r) {
7091 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7092 		return r;
7093 	}
7094 
7095 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7096 	if (r) {
7097 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7098 		return r;
7099 	}
7100 
7101 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7102 	if (r) {
7103 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7104 		return r;
7105 	}
7106 
7107 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7108 	if (r) {
7109 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7110 		return r;
7111 	}
7112 
7113 	si_uvd_start(rdev);
7114 	si_vce_start(rdev);
7115 
7116 	/* Enable IRQ */
7117 	if (!rdev->irq.installed) {
7118 		r = radeon_irq_kms_init(rdev);
7119 		if (r)
7120 			return r;
7121 	}
7122 
7123 	r = si_irq_init(rdev);
7124 	if (r) {
7125 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7126 		radeon_irq_kms_fini(rdev);
7127 		return r;
7128 	}
7129 	si_irq_set(rdev);
7130 
7131 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7132 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7133 			     RADEON_CP_PACKET2);
7134 	if (r)
7135 		return r;
7136 
7137 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7138 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7139 			     RADEON_CP_PACKET2);
7140 	if (r)
7141 		return r;
7142 
7143 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7144 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7145 			     RADEON_CP_PACKET2);
7146 	if (r)
7147 		return r;
7148 
7149 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7150 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7151 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7152 	if (r)
7153 		return r;
7154 
7155 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7156 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7157 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7158 	if (r)
7159 		return r;
7160 
7161 	r = si_cp_load_microcode(rdev);
7162 	if (r)
7163 		return r;
7164 	r = si_cp_resume(rdev);
7165 	if (r)
7166 		return r;
7167 
7168 	r = cayman_dma_resume(rdev);
7169 	if (r)
7170 		return r;
7171 
7172 	si_uvd_resume(rdev);
7173 	si_vce_resume(rdev);
7174 
7175 	r = radeon_ib_pool_init(rdev);
7176 	if (r) {
7177 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7178 		return r;
7179 	}
7180 
7181 	r = radeon_vm_manager_init(rdev);
7182 	if (r) {
7183 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7184 		return r;
7185 	}
7186 
7187 	r = radeon_audio_init(rdev);
7188 	if (r)
7189 		return r;
7190 
7191 	return 0;
7192 }
7193 
7194 int si_resume(struct radeon_device *rdev)
7195 {
7196 	int r;
7197 
7198 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7199 	 * posting will perform necessary task to bring back GPU into good
7200 	 * shape.
7201 	 */
7202 	/* post card */
7203 	atom_asic_init(rdev->mode_info.atom_context);
7204 
7205 	/* init golden registers */
7206 	si_init_golden_registers(rdev);
7207 
7208 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7209 		radeon_pm_resume(rdev);
7210 
7211 	rdev->accel_working = true;
7212 	r = si_startup(rdev);
7213 	if (r) {
7214 		DRM_ERROR("si startup failed on resume\n");
7215 		rdev->accel_working = false;
7216 		return r;
7217 	}
7218 
7219 	return r;
7220 
7221 }
7222 
7223 int si_suspend(struct radeon_device *rdev)
7224 {
7225 	radeon_pm_suspend(rdev);
7226 	radeon_audio_fini(rdev);
7227 	radeon_vm_manager_fini(rdev);
7228 	si_cp_enable(rdev, false);
7229 	cayman_dma_stop(rdev);
7230 	if (rdev->has_uvd) {
7231 		uvd_v1_0_fini(rdev);
7232 		radeon_uvd_suspend(rdev);
7233 	}
7234 	if (rdev->has_vce)
7235 		radeon_vce_suspend(rdev);
7236 	si_fini_pg(rdev);
7237 	si_fini_cg(rdev);
7238 	si_irq_suspend(rdev);
7239 	radeon_wb_disable(rdev);
7240 	si_pcie_gart_disable(rdev);
7241 	return 0;
7242 }
7243 
7244 /* Plan is to move initialization in that function and use
7245  * helper function so that radeon_device_init pretty much
7246  * do nothing more than calling asic specific function. This
7247  * should also allow to remove a bunch of callback function
7248  * like vram_info.
7249  */
7250 int si_init(struct radeon_device *rdev)
7251 {
7252 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7253 	int r;
7254 
7255 	/* Read BIOS */
7256 	if (!radeon_get_bios(rdev)) {
7257 		if (ASIC_IS_AVIVO(rdev))
7258 			return -EINVAL;
7259 	}
7260 	/* Must be an ATOMBIOS */
7261 	if (!rdev->is_atom_bios) {
7262 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7263 		return -EINVAL;
7264 	}
7265 	r = radeon_atombios_init(rdev);
7266 	if (r)
7267 		return r;
7268 
7269 	/* Post card if necessary */
7270 	if (!radeon_card_posted(rdev)) {
7271 		if (!rdev->bios) {
7272 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7273 			return -EINVAL;
7274 		}
7275 		DRM_INFO("GPU not posted. posting now...\n");
7276 		atom_asic_init(rdev->mode_info.atom_context);
7277 	}
7278 	/* init golden registers */
7279 	si_init_golden_registers(rdev);
7280 	/* Initialize scratch registers */
7281 	si_scratch_init(rdev);
7282 	/* Initialize surface registers */
7283 	radeon_surface_init(rdev);
7284 	/* Initialize clocks */
7285 	radeon_get_clock_info(rdev->ddev);
7286 
7287 	/* Fence driver */
7288 	r = radeon_fence_driver_init(rdev);
7289 	if (r)
7290 		return r;
7291 
7292 	/* initialize memory controller */
7293 	r = si_mc_init(rdev);
7294 	if (r)
7295 		return r;
7296 	/* Memory manager */
7297 	r = radeon_bo_init(rdev);
7298 	if (r)
7299 		return r;
7300 
7301 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7302 	    !rdev->rlc_fw || !rdev->mc_fw) {
7303 		r = si_init_microcode(rdev);
7304 		if (r) {
7305 			DRM_ERROR("Failed to load firmware!\n");
7306 			return r;
7307 		}
7308 	}
7309 
7310 	/* Initialize power management */
7311 	radeon_pm_init(rdev);
7312 
7313 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7314 	ring->ring_obj = NULL;
7315 	r600_ring_init(rdev, ring, 1024 * 1024);
7316 
7317 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7318 	ring->ring_obj = NULL;
7319 	r600_ring_init(rdev, ring, 1024 * 1024);
7320 
7321 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7322 	ring->ring_obj = NULL;
7323 	r600_ring_init(rdev, ring, 1024 * 1024);
7324 
7325 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7326 	ring->ring_obj = NULL;
7327 	r600_ring_init(rdev, ring, 64 * 1024);
7328 
7329 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7330 	ring->ring_obj = NULL;
7331 	r600_ring_init(rdev, ring, 64 * 1024);
7332 
7333 	si_uvd_init(rdev);
7334 	si_vce_init(rdev);
7335 
7336 	rdev->ih.ring_obj = NULL;
7337 	r600_ih_ring_init(rdev, 64 * 1024);
7338 
7339 	r = r600_pcie_gart_init(rdev);
7340 	if (r)
7341 		return r;
7342 
7343 #ifdef __DragonFly__
7344 	/*
7345 	   Some glx operations (xfce 4.14) hang on si hardware,
7346 	   tell userland acceleration is not working properly
7347 	*/
7348 	rdev->accel_working = false;
7349 	DRM_ERROR("GPU acceleration disabled for now on DragonFly\n");
7350 #else
7351 	rdev->accel_working = true;
7352 #endif
7353 	r = si_startup(rdev);
7354 	if (r) {
7355 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7356 		si_cp_fini(rdev);
7357 		cayman_dma_fini(rdev);
7358 		si_irq_fini(rdev);
7359 		sumo_rlc_fini(rdev);
7360 		radeon_wb_fini(rdev);
7361 		radeon_ib_pool_fini(rdev);
7362 		radeon_vm_manager_fini(rdev);
7363 		radeon_irq_kms_fini(rdev);
7364 		si_pcie_gart_fini(rdev);
7365 		rdev->accel_working = false;
7366 	}
7367 
7368 	/* Don't start up if the MC ucode is missing.
7369 	 * The default clocks and voltages before the MC ucode
7370 	 * is loaded are not suffient for advanced operations.
7371 	 */
7372 	if (!rdev->mc_fw) {
7373 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7374 		return -EINVAL;
7375 	}
7376 
7377 	return 0;
7378 }
7379 
7380 void si_fini(struct radeon_device *rdev)
7381 {
7382 	radeon_pm_fini(rdev);
7383 	si_cp_fini(rdev);
7384 	cayman_dma_fini(rdev);
7385 	si_fini_pg(rdev);
7386 	si_fini_cg(rdev);
7387 	si_irq_fini(rdev);
7388 	sumo_rlc_fini(rdev);
7389 	radeon_wb_fini(rdev);
7390 	radeon_vm_manager_fini(rdev);
7391 	radeon_ib_pool_fini(rdev);
7392 	radeon_irq_kms_fini(rdev);
7393 	if (rdev->has_uvd) {
7394 		uvd_v1_0_fini(rdev);
7395 		radeon_uvd_fini(rdev);
7396 	}
7397 	if (rdev->has_vce)
7398 		radeon_vce_fini(rdev);
7399 	si_pcie_gart_fini(rdev);
7400 	r600_vram_scratch_fini(rdev);
7401 	radeon_gem_fini(rdev);
7402 	radeon_fence_driver_fini(rdev);
7403 	radeon_bo_fini(rdev);
7404 	radeon_atombios_fini(rdev);
7405 	si_fini_microcode(rdev);
7406 	kfree(rdev->bios);
7407 	rdev->bios = NULL;
7408 }
7409 
7410 /**
7411  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7412  *
7413  * @rdev: radeon_device pointer
7414  *
7415  * Fetches a GPU clock counter snapshot (SI).
7416  * Returns the 64 bit clock counter snapshot.
7417  */
7418 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7419 {
7420 	uint64_t clock;
7421 
7422 	mutex_lock(&rdev->gpu_clock_mutex);
7423 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7424 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7425 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7426 	mutex_unlock(&rdev->gpu_clock_mutex);
7427 	return clock;
7428 }
7429 
7430 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7431 {
7432 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7433 	int r;
7434 
7435 	/* bypass vclk and dclk with bclk */
7436 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7437 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7438 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7439 
7440 	/* put PLL in bypass mode */
7441 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7442 
7443 	if (!vclk || !dclk) {
7444 		/* keep the Bypass mode */
7445 		return 0;
7446 	}
7447 
7448 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7449 					  16384, 0x03FFFFFF, 0, 128, 5,
7450 					  &fb_div, &vclk_div, &dclk_div);
7451 	if (r)
7452 		return r;
7453 
7454 	/* set RESET_ANTI_MUX to 0 */
7455 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7456 
7457 	/* set VCO_MODE to 1 */
7458 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7459 
7460 	/* disable sleep mode */
7461 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7462 
7463 	/* deassert UPLL_RESET */
7464 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7465 
7466 	mdelay(1);
7467 
7468 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7469 	if (r)
7470 		return r;
7471 
7472 	/* assert UPLL_RESET again */
7473 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7474 
7475 	/* disable spread spectrum. */
7476 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7477 
7478 	/* set feedback divider */
7479 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7480 
7481 	/* set ref divider to 0 */
7482 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7483 
7484 	if (fb_div < 307200)
7485 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7486 	else
7487 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7488 
7489 	/* set PDIV_A and PDIV_B */
7490 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7491 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7492 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7493 
7494 	/* give the PLL some time to settle */
7495 	mdelay(15);
7496 
7497 	/* deassert PLL_RESET */
7498 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7499 
7500 	mdelay(15);
7501 
7502 	/* switch from bypass mode to normal mode */
7503 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7504 
7505 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7506 	if (r)
7507 		return r;
7508 
7509 	/* switch VCLK and DCLK selection */
7510 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7511 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7512 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7513 
7514 	mdelay(100);
7515 
7516 	return 0;
7517 }
7518 
7519 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7520 {
7521 	struct pci_dev *root = rdev->pdev->bus->self;
7522 	int bridge_pos, gpu_pos;
7523 	u32 speed_cntl, mask, current_data_rate;
7524 	int ret, i;
7525 	u16 tmp16;
7526 
7527 #if 0
7528 	if (pci_is_root_bus(rdev->pdev->bus))
7529 		return;
7530 #endif
7531 
7532 	if (radeon_pcie_gen2 == 0)
7533 		return;
7534 
7535 	if (rdev->flags & RADEON_IS_IGP)
7536 		return;
7537 
7538 	if (!(rdev->flags & RADEON_IS_PCIE))
7539 		return;
7540 
7541 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7542 	if (ret != 0)
7543 		return;
7544 
7545 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7546 		return;
7547 
7548 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7549 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7550 		LC_CURRENT_DATA_RATE_SHIFT;
7551 	if (mask & DRM_PCIE_SPEED_80) {
7552 		if (current_data_rate == 2) {
7553 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7554 			return;
7555 		}
7556 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7557 	} else if (mask & DRM_PCIE_SPEED_50) {
7558 		if (current_data_rate == 1) {
7559 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7560 			return;
7561 		}
7562 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7563 	}
7564 
7565 	bridge_pos = pci_pcie_cap(root);
7566 	if (!bridge_pos)
7567 		return;
7568 
7569 	gpu_pos = pci_pcie_cap(rdev->pdev);
7570 	if (!gpu_pos)
7571 		return;
7572 
7573 	if (mask & DRM_PCIE_SPEED_80) {
7574 		/* re-try equalization if gen3 is not already enabled */
7575 		if (current_data_rate != 2) {
7576 			u16 bridge_cfg, gpu_cfg;
7577 			u16 bridge_cfg2, gpu_cfg2;
7578 			u32 max_lw, current_lw, tmp;
7579 
7580 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7581 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7582 
7583 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7584 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7585 
7586 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7587 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7588 
7589 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7590 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7591 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7592 
7593 			if (current_lw < max_lw) {
7594 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7595 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7596 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7597 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7598 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7599 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7600 				}
7601 			}
7602 
7603 			for (i = 0; i < 10; i++) {
7604 				/* check status */
7605 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7606 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7607 					break;
7608 
7609 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7610 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7611 
7612 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7613 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7614 
7615 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7616 				tmp |= LC_SET_QUIESCE;
7617 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7618 
7619 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7620 				tmp |= LC_REDO_EQ;
7621 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7622 
7623 				mdelay(100);
7624 
7625 				/* linkctl */
7626 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7627 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7628 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7629 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7630 
7631 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7632 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7633 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7634 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7635 
7636 				/* linkctl2 */
7637 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7638 				tmp16 &= ~((1 << 4) | (7 << 9));
7639 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7640 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7641 
7642 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7643 				tmp16 &= ~((1 << 4) | (7 << 9));
7644 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7645 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7646 
7647 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7648 				tmp &= ~LC_SET_QUIESCE;
7649 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7650 			}
7651 		}
7652 	}
7653 
7654 	/* set the link speed */
7655 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7656 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7657 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7658 
7659 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7660 	tmp16 &= ~0xf;
7661 	if (mask & DRM_PCIE_SPEED_80)
7662 		tmp16 |= 3; /* gen3 */
7663 	else if (mask & DRM_PCIE_SPEED_50)
7664 		tmp16 |= 2; /* gen2 */
7665 	else
7666 		tmp16 |= 1; /* gen1 */
7667 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7668 
7669 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7670 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7671 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7672 
7673 	for (i = 0; i < rdev->usec_timeout; i++) {
7674 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7675 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7676 			break;
7677 		udelay(1);
7678 	}
7679 }
7680 
7681 static void si_program_aspm(struct radeon_device *rdev)
7682 {
7683 	u32 data, orig;
7684 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7685 #if 0
7686 	bool disable_clkreq = false;
7687 #endif
7688 
7689 	if (radeon_aspm == 0)
7690 		return;
7691 
7692 	if (!(rdev->flags & RADEON_IS_PCIE))
7693 		return;
7694 
7695 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7696 	data &= ~LC_XMIT_N_FTS_MASK;
7697 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7698 	if (orig != data)
7699 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7700 
7701 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7702 	data |= LC_GO_TO_RECOVERY;
7703 	if (orig != data)
7704 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7705 
7706 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7707 	data |= P_IGNORE_EDB_ERR;
7708 	if (orig != data)
7709 		WREG32_PCIE(PCIE_P_CNTL, data);
7710 
7711 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7712 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7713 	data |= LC_PMI_TO_L1_DIS;
7714 	if (!disable_l0s)
7715 		data |= LC_L0S_INACTIVITY(7);
7716 
7717 	if (!disable_l1) {
7718 		data |= LC_L1_INACTIVITY(7);
7719 		data &= ~LC_PMI_TO_L1_DIS;
7720 		if (orig != data)
7721 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7722 
7723 		if (!disable_plloff_in_l1) {
7724 			bool clk_req_support;
7725 
7726 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7727 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7728 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7729 			if (orig != data)
7730 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7731 
7732 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7733 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7734 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7735 			if (orig != data)
7736 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7737 
7738 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7739 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7740 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7741 			if (orig != data)
7742 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7743 
7744 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7745 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7746 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7747 			if (orig != data)
7748 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7749 
7750 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7751 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7752 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7753 				if (orig != data)
7754 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7755 
7756 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7757 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7758 				if (orig != data)
7759 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7760 
7761 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7762 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7763 				if (orig != data)
7764 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7765 
7766 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7767 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7768 				if (orig != data)
7769 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7770 
7771 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7772 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7773 				if (orig != data)
7774 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7775 
7776 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7777 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7778 				if (orig != data)
7779 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7780 
7781 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7782 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7783 				if (orig != data)
7784 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7785 
7786 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7787 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7788 				if (orig != data)
7789 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7790 			}
7791 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7792 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7793 			data |= LC_DYN_LANES_PWR_STATE(3);
7794 			if (orig != data)
7795 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7796 
7797 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7798 			data &= ~LS2_EXIT_TIME_MASK;
7799 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7800 				data |= LS2_EXIT_TIME(5);
7801 			if (orig != data)
7802 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7803 
7804 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7805 			data &= ~LS2_EXIT_TIME_MASK;
7806 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7807 				data |= LS2_EXIT_TIME(5);
7808 			if (orig != data)
7809 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7810 
7811 #ifdef zMN_TODO
7812 			if (!disable_clkreq &&
7813 			    !pci_is_root_bus(rdev->pdev->bus)) {
7814 				struct pci_dev *root = rdev->pdev->bus->self;
7815 				u32 lnkcap;
7816 
7817 				clk_req_support = false;
7818 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7819 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7820 					clk_req_support = true;
7821 			} else {
7822 				clk_req_support = false;
7823 			}
7824 #else
7825 			clk_req_support = false;
7826 #endif
7827 
7828 			if (clk_req_support) {
7829 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7830 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7831 				if (orig != data)
7832 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7833 
7834 				orig = data = RREG32(THM_CLK_CNTL);
7835 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7836 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7837 				if (orig != data)
7838 					WREG32(THM_CLK_CNTL, data);
7839 
7840 				orig = data = RREG32(MISC_CLK_CNTL);
7841 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7842 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7843 				if (orig != data)
7844 					WREG32(MISC_CLK_CNTL, data);
7845 
7846 				orig = data = RREG32(CG_CLKPIN_CNTL);
7847 				data &= ~BCLK_AS_XCLK;
7848 				if (orig != data)
7849 					WREG32(CG_CLKPIN_CNTL, data);
7850 
7851 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7852 				data &= ~FORCE_BIF_REFCLK_EN;
7853 				if (orig != data)
7854 					WREG32(CG_CLKPIN_CNTL_2, data);
7855 
7856 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7857 				data &= ~MPLL_CLKOUT_SEL_MASK;
7858 				data |= MPLL_CLKOUT_SEL(4);
7859 				if (orig != data)
7860 					WREG32(MPLL_BYPASSCLK_SEL, data);
7861 
7862 				orig = data = RREG32(SPLL_CNTL_MODE);
7863 				data &= ~SPLL_REFCLK_SEL_MASK;
7864 				if (orig != data)
7865 					WREG32(SPLL_CNTL_MODE, data);
7866 			}
7867 		}
7868 	} else {
7869 		if (orig != data)
7870 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7871 	}
7872 
7873 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7874 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7875 	if (orig != data)
7876 		WREG32_PCIE(PCIE_CNTL2, data);
7877 
7878 	if (!disable_l0s) {
7879 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7880 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7881 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7882 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7883 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7884 				data &= ~LC_L0S_INACTIVITY_MASK;
7885 				if (orig != data)
7886 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7887 			}
7888 		}
7889 	}
7890 }
7891 
7892 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7893 {
7894 	unsigned i;
7895 
7896 	/* make sure VCEPLL_CTLREQ is deasserted */
7897 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7898 
7899 	mdelay(10);
7900 
7901 	/* assert UPLL_CTLREQ */
7902 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7903 
7904 	/* wait for CTLACK and CTLACK2 to get asserted */
7905 	for (i = 0; i < 100; ++i) {
7906 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7907 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7908 			break;
7909 		mdelay(10);
7910 	}
7911 
7912 	/* deassert UPLL_CTLREQ */
7913 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7914 
7915 	if (i == 100) {
7916 		DRM_ERROR("Timeout setting UVD clocks!\n");
7917 		return -ETIMEDOUT;
7918 	}
7919 
7920 	return 0;
7921 }
7922 
7923 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7924 {
7925 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7926 	int r;
7927 
7928 	/* bypass evclk and ecclk with bclk */
7929 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7930 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7931 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7932 
7933 	/* put PLL in bypass mode */
7934 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7935 		     ~VCEPLL_BYPASS_EN_MASK);
7936 
7937 	if (!evclk || !ecclk) {
7938 		/* keep the Bypass mode, put PLL to sleep */
7939 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7940 			     ~VCEPLL_SLEEP_MASK);
7941 		return 0;
7942 	}
7943 
7944 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7945 					  16384, 0x03FFFFFF, 0, 128, 5,
7946 					  &fb_div, &evclk_div, &ecclk_div);
7947 	if (r)
7948 		return r;
7949 
7950 	/* set RESET_ANTI_MUX to 0 */
7951 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7952 
7953 	/* set VCO_MODE to 1 */
7954 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7955 		     ~VCEPLL_VCO_MODE_MASK);
7956 
7957 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7958 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7959 		     ~VCEPLL_SLEEP_MASK);
7960 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7961 
7962 	/* deassert VCEPLL_RESET */
7963 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7964 
7965 	mdelay(1);
7966 
7967 	r = si_vce_send_vcepll_ctlreq(rdev);
7968 	if (r)
7969 		return r;
7970 
7971 	/* assert VCEPLL_RESET again */
7972 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7973 
7974 	/* disable spread spectrum. */
7975 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7976 
7977 	/* set feedback divider */
7978 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7979 
7980 	/* set ref divider to 0 */
7981 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7982 
7983 	/* set PDIV_A and PDIV_B */
7984 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7985 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7986 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7987 
7988 	/* give the PLL some time to settle */
7989 	mdelay(15);
7990 
7991 	/* deassert PLL_RESET */
7992 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7993 
7994 	mdelay(15);
7995 
7996 	/* switch from bypass mode to normal mode */
7997 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7998 
7999 	r = si_vce_send_vcepll_ctlreq(rdev);
8000 	if (r)
8001 		return r;
8002 
8003 	/* switch VCLK and DCLK selection */
8004 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
8005 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
8006 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
8007 
8008 	mdelay(100);
8009 
8010 	return 0;
8011 }
8012