xref: /dragonfly/sys/dev/drm/radeon/si.c (revision f4450298)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "radeon_audio.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
47 MODULE_FIRMWARE("radeon/tahiti_me.bin");
48 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
49 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
50 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
52 
53 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
60 
61 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
62 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
67 
68 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
69 MODULE_FIRMWARE("radeon/VERDE_me.bin");
70 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
71 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
72 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
73 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/verde_pfp.bin");
77 MODULE_FIRMWARE("radeon/verde_me.bin");
78 MODULE_FIRMWARE("radeon/verde_ce.bin");
79 MODULE_FIRMWARE("radeon/verde_mc.bin");
80 MODULE_FIRMWARE("radeon/verde_rlc.bin");
81 MODULE_FIRMWARE("radeon/verde_smc.bin");
82 
83 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
84 MODULE_FIRMWARE("radeon/OLAND_me.bin");
85 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
86 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
87 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
88 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
89 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
90 
91 MODULE_FIRMWARE("radeon/oland_pfp.bin");
92 MODULE_FIRMWARE("radeon/oland_me.bin");
93 MODULE_FIRMWARE("radeon/oland_ce.bin");
94 MODULE_FIRMWARE("radeon/oland_mc.bin");
95 MODULE_FIRMWARE("radeon/oland_rlc.bin");
96 MODULE_FIRMWARE("radeon/oland_smc.bin");
97 
98 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
99 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
105 
106 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
107 MODULE_FIRMWARE("radeon/hainan_me.bin");
108 MODULE_FIRMWARE("radeon/hainan_ce.bin");
109 MODULE_FIRMWARE("radeon/hainan_mc.bin");
110 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
111 MODULE_FIRMWARE("radeon/hainan_smc.bin");
112 
113 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
114 static void si_pcie_gen3_enable(struct radeon_device *rdev);
115 static void si_program_aspm(struct radeon_device *rdev);
116 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
117 					 bool enable);
118 static void si_init_pg(struct radeon_device *rdev);
119 static void si_init_cg(struct radeon_device *rdev);
120 static void si_fini_pg(struct radeon_device *rdev);
121 static void si_fini_cg(struct radeon_device *rdev);
122 static void si_rlc_stop(struct radeon_device *rdev);
123 
124 static const u32 verde_rlc_save_restore_register_list[] =
125 {
126 	(0x8000 << 16) | (0x98f4 >> 2),
127 	0x00000000,
128 	(0x8040 << 16) | (0x98f4 >> 2),
129 	0x00000000,
130 	(0x8000 << 16) | (0xe80 >> 2),
131 	0x00000000,
132 	(0x8040 << 16) | (0xe80 >> 2),
133 	0x00000000,
134 	(0x8000 << 16) | (0x89bc >> 2),
135 	0x00000000,
136 	(0x8040 << 16) | (0x89bc >> 2),
137 	0x00000000,
138 	(0x8000 << 16) | (0x8c1c >> 2),
139 	0x00000000,
140 	(0x8040 << 16) | (0x8c1c >> 2),
141 	0x00000000,
142 	(0x9c00 << 16) | (0x98f0 >> 2),
143 	0x00000000,
144 	(0x9c00 << 16) | (0xe7c >> 2),
145 	0x00000000,
146 	(0x8000 << 16) | (0x9148 >> 2),
147 	0x00000000,
148 	(0x8040 << 16) | (0x9148 >> 2),
149 	0x00000000,
150 	(0x9c00 << 16) | (0x9150 >> 2),
151 	0x00000000,
152 	(0x9c00 << 16) | (0x897c >> 2),
153 	0x00000000,
154 	(0x9c00 << 16) | (0x8d8c >> 2),
155 	0x00000000,
156 	(0x9c00 << 16) | (0xac54 >> 2),
157 	0X00000000,
158 	0x3,
159 	(0x9c00 << 16) | (0x98f8 >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x9910 >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x9914 >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x9918 >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0x991c >> 2),
168 	0x00000000,
169 	(0x9c00 << 16) | (0x9920 >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x9924 >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x9928 >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x992c >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x9930 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9934 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9938 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x993c >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x9940 >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9944 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9948 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x994c >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x9950 >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9954 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9958 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x995c >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x9960 >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9964 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9968 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x996c >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x9970 >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9974 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9978 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x997c >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9980 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9984 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9988 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x998c >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x8c00 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x8c14 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x8c04 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x8c08 >> 2),
232 	0x00000000,
233 	(0x8000 << 16) | (0x9b7c >> 2),
234 	0x00000000,
235 	(0x8040 << 16) | (0x9b7c >> 2),
236 	0x00000000,
237 	(0x8000 << 16) | (0xe84 >> 2),
238 	0x00000000,
239 	(0x8040 << 16) | (0xe84 >> 2),
240 	0x00000000,
241 	(0x8000 << 16) | (0x89c0 >> 2),
242 	0x00000000,
243 	(0x8040 << 16) | (0x89c0 >> 2),
244 	0x00000000,
245 	(0x8000 << 16) | (0x914c >> 2),
246 	0x00000000,
247 	(0x8040 << 16) | (0x914c >> 2),
248 	0x00000000,
249 	(0x8000 << 16) | (0x8c20 >> 2),
250 	0x00000000,
251 	(0x8040 << 16) | (0x8c20 >> 2),
252 	0x00000000,
253 	(0x8000 << 16) | (0x9354 >> 2),
254 	0x00000000,
255 	(0x8040 << 16) | (0x9354 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0x9060 >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0x9364 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0x9100 >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x913c >> 2),
264 	0x00000000,
265 	(0x8000 << 16) | (0x90e0 >> 2),
266 	0x00000000,
267 	(0x8000 << 16) | (0x90e4 >> 2),
268 	0x00000000,
269 	(0x8000 << 16) | (0x90e8 >> 2),
270 	0x00000000,
271 	(0x8040 << 16) | (0x90e0 >> 2),
272 	0x00000000,
273 	(0x8040 << 16) | (0x90e4 >> 2),
274 	0x00000000,
275 	(0x8040 << 16) | (0x90e8 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x8bcc >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x8b24 >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x88c4 >> 2),
282 	0x00000000,
283 	(0x9c00 << 16) | (0x8e50 >> 2),
284 	0x00000000,
285 	(0x9c00 << 16) | (0x8c0c >> 2),
286 	0x00000000,
287 	(0x9c00 << 16) | (0x8e58 >> 2),
288 	0x00000000,
289 	(0x9c00 << 16) | (0x8e5c >> 2),
290 	0x00000000,
291 	(0x9c00 << 16) | (0x9508 >> 2),
292 	0x00000000,
293 	(0x9c00 << 16) | (0x950c >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x9494 >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0xac0c >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0xac10 >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0xac14 >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0xae00 >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0xac08 >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0x88d4 >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x88c8 >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0x88cc >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0x89b0 >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0x8b10 >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0x8a14 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0x9830 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0x9834 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x9838 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x9a10 >> 2),
326 	0x00000000,
327 	(0x8000 << 16) | (0x9870 >> 2),
328 	0x00000000,
329 	(0x8000 << 16) | (0x9874 >> 2),
330 	0x00000000,
331 	(0x8001 << 16) | (0x9870 >> 2),
332 	0x00000000,
333 	(0x8001 << 16) | (0x9874 >> 2),
334 	0x00000000,
335 	(0x8040 << 16) | (0x9870 >> 2),
336 	0x00000000,
337 	(0x8040 << 16) | (0x9874 >> 2),
338 	0x00000000,
339 	(0x8041 << 16) | (0x9870 >> 2),
340 	0x00000000,
341 	(0x8041 << 16) | (0x9874 >> 2),
342 	0x00000000,
343 	0x00000000
344 };
345 
346 static const u32 tahiti_golden_rlc_registers[] =
347 {
348 	0xc424, 0xffffffff, 0x00601005,
349 	0xc47c, 0xffffffff, 0x10104040,
350 	0xc488, 0xffffffff, 0x0100000a,
351 	0xc314, 0xffffffff, 0x00000800,
352 	0xc30c, 0xffffffff, 0x800000f4,
353 	0xf4a8, 0xffffffff, 0x00000000
354 };
355 
356 static const u32 tahiti_golden_registers[] =
357 {
358 	0x9a10, 0x00010000, 0x00018208,
359 	0x9830, 0xffffffff, 0x00000000,
360 	0x9834, 0xf00fffff, 0x00000400,
361 	0x9838, 0x0002021c, 0x00020200,
362 	0xc78, 0x00000080, 0x00000000,
363 	0xd030, 0x000300c0, 0x00800040,
364 	0xd830, 0x000300c0, 0x00800040,
365 	0x5bb0, 0x000000f0, 0x00000070,
366 	0x5bc0, 0x00200000, 0x50100000,
367 	0x7030, 0x31000311, 0x00000011,
368 	0x277c, 0x00000003, 0x000007ff,
369 	0x240c, 0x000007ff, 0x00000000,
370 	0x8a14, 0xf000001f, 0x00000007,
371 	0x8b24, 0xffffffff, 0x00ffffff,
372 	0x8b10, 0x0000ff0f, 0x00000000,
373 	0x28a4c, 0x07ffffff, 0x4e000000,
374 	0x28350, 0x3f3f3fff, 0x2a00126a,
375 	0x30, 0x000000ff, 0x0040,
376 	0x34, 0x00000040, 0x00004040,
377 	0x9100, 0x07ffffff, 0x03000000,
378 	0x8e88, 0x01ff1f3f, 0x00000000,
379 	0x8e84, 0x01ff1f3f, 0x00000000,
380 	0x9060, 0x0000007f, 0x00000020,
381 	0x9508, 0x00010000, 0x00010000,
382 	0xac14, 0x00000200, 0x000002fb,
383 	0xac10, 0xffffffff, 0x0000543b,
384 	0xac0c, 0xffffffff, 0xa9210876,
385 	0x88d0, 0xffffffff, 0x000fff40,
386 	0x88d4, 0x0000001f, 0x00000010,
387 	0x1410, 0x20000000, 0x20fffed8,
388 	0x15c0, 0x000c0fc0, 0x000c0400
389 };
390 
391 static const u32 tahiti_golden_registers2[] =
392 {
393 	0xc64, 0x00000001, 0x00000001
394 };
395 
396 static const u32 pitcairn_golden_rlc_registers[] =
397 {
398 	0xc424, 0xffffffff, 0x00601004,
399 	0xc47c, 0xffffffff, 0x10102020,
400 	0xc488, 0xffffffff, 0x01000020,
401 	0xc314, 0xffffffff, 0x00000800,
402 	0xc30c, 0xffffffff, 0x800000a4
403 };
404 
405 static const u32 pitcairn_golden_registers[] =
406 {
407 	0x9a10, 0x00010000, 0x00018208,
408 	0x9830, 0xffffffff, 0x00000000,
409 	0x9834, 0xf00fffff, 0x00000400,
410 	0x9838, 0x0002021c, 0x00020200,
411 	0xc78, 0x00000080, 0x00000000,
412 	0xd030, 0x000300c0, 0x00800040,
413 	0xd830, 0x000300c0, 0x00800040,
414 	0x5bb0, 0x000000f0, 0x00000070,
415 	0x5bc0, 0x00200000, 0x50100000,
416 	0x7030, 0x31000311, 0x00000011,
417 	0x2ae4, 0x00073ffe, 0x000022a2,
418 	0x240c, 0x000007ff, 0x00000000,
419 	0x8a14, 0xf000001f, 0x00000007,
420 	0x8b24, 0xffffffff, 0x00ffffff,
421 	0x8b10, 0x0000ff0f, 0x00000000,
422 	0x28a4c, 0x07ffffff, 0x4e000000,
423 	0x28350, 0x3f3f3fff, 0x2a00126a,
424 	0x30, 0x000000ff, 0x0040,
425 	0x34, 0x00000040, 0x00004040,
426 	0x9100, 0x07ffffff, 0x03000000,
427 	0x9060, 0x0000007f, 0x00000020,
428 	0x9508, 0x00010000, 0x00010000,
429 	0xac14, 0x000003ff, 0x000000f7,
430 	0xac10, 0xffffffff, 0x00000000,
431 	0xac0c, 0xffffffff, 0x32761054,
432 	0x88d4, 0x0000001f, 0x00000010,
433 	0x15c0, 0x000c0fc0, 0x000c0400
434 };
435 
436 static const u32 verde_golden_rlc_registers[] =
437 {
438 	0xc424, 0xffffffff, 0x033f1005,
439 	0xc47c, 0xffffffff, 0x10808020,
440 	0xc488, 0xffffffff, 0x00800008,
441 	0xc314, 0xffffffff, 0x00001000,
442 	0xc30c, 0xffffffff, 0x80010014
443 };
444 
445 static const u32 verde_golden_registers[] =
446 {
447 	0x9a10, 0x00010000, 0x00018208,
448 	0x9830, 0xffffffff, 0x00000000,
449 	0x9834, 0xf00fffff, 0x00000400,
450 	0x9838, 0x0002021c, 0x00020200,
451 	0xc78, 0x00000080, 0x00000000,
452 	0xd030, 0x000300c0, 0x00800040,
453 	0xd030, 0x000300c0, 0x00800040,
454 	0xd830, 0x000300c0, 0x00800040,
455 	0xd830, 0x000300c0, 0x00800040,
456 	0x5bb0, 0x000000f0, 0x00000070,
457 	0x5bc0, 0x00200000, 0x50100000,
458 	0x7030, 0x31000311, 0x00000011,
459 	0x2ae4, 0x00073ffe, 0x000022a2,
460 	0x2ae4, 0x00073ffe, 0x000022a2,
461 	0x2ae4, 0x00073ffe, 0x000022a2,
462 	0x240c, 0x000007ff, 0x00000000,
463 	0x240c, 0x000007ff, 0x00000000,
464 	0x240c, 0x000007ff, 0x00000000,
465 	0x8a14, 0xf000001f, 0x00000007,
466 	0x8a14, 0xf000001f, 0x00000007,
467 	0x8a14, 0xf000001f, 0x00000007,
468 	0x8b24, 0xffffffff, 0x00ffffff,
469 	0x8b10, 0x0000ff0f, 0x00000000,
470 	0x28a4c, 0x07ffffff, 0x4e000000,
471 	0x28350, 0x3f3f3fff, 0x0000124a,
472 	0x28350, 0x3f3f3fff, 0x0000124a,
473 	0x28350, 0x3f3f3fff, 0x0000124a,
474 	0x30, 0x000000ff, 0x0040,
475 	0x34, 0x00000040, 0x00004040,
476 	0x9100, 0x07ffffff, 0x03000000,
477 	0x9100, 0x07ffffff, 0x03000000,
478 	0x8e88, 0x01ff1f3f, 0x00000000,
479 	0x8e88, 0x01ff1f3f, 0x00000000,
480 	0x8e88, 0x01ff1f3f, 0x00000000,
481 	0x8e84, 0x01ff1f3f, 0x00000000,
482 	0x8e84, 0x01ff1f3f, 0x00000000,
483 	0x8e84, 0x01ff1f3f, 0x00000000,
484 	0x9060, 0x0000007f, 0x00000020,
485 	0x9508, 0x00010000, 0x00010000,
486 	0xac14, 0x000003ff, 0x00000003,
487 	0xac14, 0x000003ff, 0x00000003,
488 	0xac14, 0x000003ff, 0x00000003,
489 	0xac10, 0xffffffff, 0x00000000,
490 	0xac10, 0xffffffff, 0x00000000,
491 	0xac10, 0xffffffff, 0x00000000,
492 	0xac0c, 0xffffffff, 0x00001032,
493 	0xac0c, 0xffffffff, 0x00001032,
494 	0xac0c, 0xffffffff, 0x00001032,
495 	0x88d4, 0x0000001f, 0x00000010,
496 	0x88d4, 0x0000001f, 0x00000010,
497 	0x88d4, 0x0000001f, 0x00000010,
498 	0x15c0, 0x000c0fc0, 0x000c0400
499 };
500 
501 static const u32 oland_golden_rlc_registers[] =
502 {
503 	0xc424, 0xffffffff, 0x00601005,
504 	0xc47c, 0xffffffff, 0x10104040,
505 	0xc488, 0xffffffff, 0x0100000a,
506 	0xc314, 0xffffffff, 0x00000800,
507 	0xc30c, 0xffffffff, 0x800000f4
508 };
509 
510 static const u32 oland_golden_registers[] =
511 {
512 	0x9a10, 0x00010000, 0x00018208,
513 	0x9830, 0xffffffff, 0x00000000,
514 	0x9834, 0xf00fffff, 0x00000400,
515 	0x9838, 0x0002021c, 0x00020200,
516 	0xc78, 0x00000080, 0x00000000,
517 	0xd030, 0x000300c0, 0x00800040,
518 	0xd830, 0x000300c0, 0x00800040,
519 	0x5bb0, 0x000000f0, 0x00000070,
520 	0x5bc0, 0x00200000, 0x50100000,
521 	0x7030, 0x31000311, 0x00000011,
522 	0x2ae4, 0x00073ffe, 0x000022a2,
523 	0x240c, 0x000007ff, 0x00000000,
524 	0x8a14, 0xf000001f, 0x00000007,
525 	0x8b24, 0xffffffff, 0x00ffffff,
526 	0x8b10, 0x0000ff0f, 0x00000000,
527 	0x28a4c, 0x07ffffff, 0x4e000000,
528 	0x28350, 0x3f3f3fff, 0x00000082,
529 	0x30, 0x000000ff, 0x0040,
530 	0x34, 0x00000040, 0x00004040,
531 	0x9100, 0x07ffffff, 0x03000000,
532 	0x9060, 0x0000007f, 0x00000020,
533 	0x9508, 0x00010000, 0x00010000,
534 	0xac14, 0x000003ff, 0x000000f3,
535 	0xac10, 0xffffffff, 0x00000000,
536 	0xac0c, 0xffffffff, 0x00003210,
537 	0x88d4, 0x0000001f, 0x00000010,
538 	0x15c0, 0x000c0fc0, 0x000c0400
539 };
540 
541 static const u32 hainan_golden_registers[] =
542 {
543 	0x9a10, 0x00010000, 0x00018208,
544 	0x9830, 0xffffffff, 0x00000000,
545 	0x9834, 0xf00fffff, 0x00000400,
546 	0x9838, 0x0002021c, 0x00020200,
547 	0xd0c0, 0xff000fff, 0x00000100,
548 	0xd030, 0x000300c0, 0x00800040,
549 	0xd8c0, 0xff000fff, 0x00000100,
550 	0xd830, 0x000300c0, 0x00800040,
551 	0x2ae4, 0x00073ffe, 0x000022a2,
552 	0x240c, 0x000007ff, 0x00000000,
553 	0x8a14, 0xf000001f, 0x00000007,
554 	0x8b24, 0xffffffff, 0x00ffffff,
555 	0x8b10, 0x0000ff0f, 0x00000000,
556 	0x28a4c, 0x07ffffff, 0x4e000000,
557 	0x28350, 0x3f3f3fff, 0x00000000,
558 	0x30, 0x000000ff, 0x0040,
559 	0x34, 0x00000040, 0x00004040,
560 	0x9100, 0x03e00000, 0x03600000,
561 	0x9060, 0x0000007f, 0x00000020,
562 	0x9508, 0x00010000, 0x00010000,
563 	0xac14, 0x000003ff, 0x000000f1,
564 	0xac10, 0xffffffff, 0x00000000,
565 	0xac0c, 0xffffffff, 0x00003210,
566 	0x88d4, 0x0000001f, 0x00000010,
567 	0x15c0, 0x000c0fc0, 0x000c0400
568 };
569 
570 static const u32 hainan_golden_registers2[] =
571 {
572 	0x98f8, 0xffffffff, 0x02010001
573 };
574 
575 static const u32 tahiti_mgcg_cgcg_init[] =
576 {
577 	0xc400, 0xffffffff, 0xfffffffc,
578 	0x802c, 0xffffffff, 0xe0000000,
579 	0x9a60, 0xffffffff, 0x00000100,
580 	0x92a4, 0xffffffff, 0x00000100,
581 	0xc164, 0xffffffff, 0x00000100,
582 	0x9774, 0xffffffff, 0x00000100,
583 	0x8984, 0xffffffff, 0x06000100,
584 	0x8a18, 0xffffffff, 0x00000100,
585 	0x92a0, 0xffffffff, 0x00000100,
586 	0xc380, 0xffffffff, 0x00000100,
587 	0x8b28, 0xffffffff, 0x00000100,
588 	0x9144, 0xffffffff, 0x00000100,
589 	0x8d88, 0xffffffff, 0x00000100,
590 	0x8d8c, 0xffffffff, 0x00000100,
591 	0x9030, 0xffffffff, 0x00000100,
592 	0x9034, 0xffffffff, 0x00000100,
593 	0x9038, 0xffffffff, 0x00000100,
594 	0x903c, 0xffffffff, 0x00000100,
595 	0xad80, 0xffffffff, 0x00000100,
596 	0xac54, 0xffffffff, 0x00000100,
597 	0x897c, 0xffffffff, 0x06000100,
598 	0x9868, 0xffffffff, 0x00000100,
599 	0x9510, 0xffffffff, 0x00000100,
600 	0xaf04, 0xffffffff, 0x00000100,
601 	0xae04, 0xffffffff, 0x00000100,
602 	0x949c, 0xffffffff, 0x00000100,
603 	0x802c, 0xffffffff, 0xe0000000,
604 	0x9160, 0xffffffff, 0x00010000,
605 	0x9164, 0xffffffff, 0x00030002,
606 	0x9168, 0xffffffff, 0x00040007,
607 	0x916c, 0xffffffff, 0x00060005,
608 	0x9170, 0xffffffff, 0x00090008,
609 	0x9174, 0xffffffff, 0x00020001,
610 	0x9178, 0xffffffff, 0x00040003,
611 	0x917c, 0xffffffff, 0x00000007,
612 	0x9180, 0xffffffff, 0x00060005,
613 	0x9184, 0xffffffff, 0x00090008,
614 	0x9188, 0xffffffff, 0x00030002,
615 	0x918c, 0xffffffff, 0x00050004,
616 	0x9190, 0xffffffff, 0x00000008,
617 	0x9194, 0xffffffff, 0x00070006,
618 	0x9198, 0xffffffff, 0x000a0009,
619 	0x919c, 0xffffffff, 0x00040003,
620 	0x91a0, 0xffffffff, 0x00060005,
621 	0x91a4, 0xffffffff, 0x00000009,
622 	0x91a8, 0xffffffff, 0x00080007,
623 	0x91ac, 0xffffffff, 0x000b000a,
624 	0x91b0, 0xffffffff, 0x00050004,
625 	0x91b4, 0xffffffff, 0x00070006,
626 	0x91b8, 0xffffffff, 0x0008000b,
627 	0x91bc, 0xffffffff, 0x000a0009,
628 	0x91c0, 0xffffffff, 0x000d000c,
629 	0x91c4, 0xffffffff, 0x00060005,
630 	0x91c8, 0xffffffff, 0x00080007,
631 	0x91cc, 0xffffffff, 0x0000000b,
632 	0x91d0, 0xffffffff, 0x000a0009,
633 	0x91d4, 0xffffffff, 0x000d000c,
634 	0x91d8, 0xffffffff, 0x00070006,
635 	0x91dc, 0xffffffff, 0x00090008,
636 	0x91e0, 0xffffffff, 0x0000000c,
637 	0x91e4, 0xffffffff, 0x000b000a,
638 	0x91e8, 0xffffffff, 0x000e000d,
639 	0x91ec, 0xffffffff, 0x00080007,
640 	0x91f0, 0xffffffff, 0x000a0009,
641 	0x91f4, 0xffffffff, 0x0000000d,
642 	0x91f8, 0xffffffff, 0x000c000b,
643 	0x91fc, 0xffffffff, 0x000f000e,
644 	0x9200, 0xffffffff, 0x00090008,
645 	0x9204, 0xffffffff, 0x000b000a,
646 	0x9208, 0xffffffff, 0x000c000f,
647 	0x920c, 0xffffffff, 0x000e000d,
648 	0x9210, 0xffffffff, 0x00110010,
649 	0x9214, 0xffffffff, 0x000a0009,
650 	0x9218, 0xffffffff, 0x000c000b,
651 	0x921c, 0xffffffff, 0x0000000f,
652 	0x9220, 0xffffffff, 0x000e000d,
653 	0x9224, 0xffffffff, 0x00110010,
654 	0x9228, 0xffffffff, 0x000b000a,
655 	0x922c, 0xffffffff, 0x000d000c,
656 	0x9230, 0xffffffff, 0x00000010,
657 	0x9234, 0xffffffff, 0x000f000e,
658 	0x9238, 0xffffffff, 0x00120011,
659 	0x923c, 0xffffffff, 0x000c000b,
660 	0x9240, 0xffffffff, 0x000e000d,
661 	0x9244, 0xffffffff, 0x00000011,
662 	0x9248, 0xffffffff, 0x0010000f,
663 	0x924c, 0xffffffff, 0x00130012,
664 	0x9250, 0xffffffff, 0x000d000c,
665 	0x9254, 0xffffffff, 0x000f000e,
666 	0x9258, 0xffffffff, 0x00100013,
667 	0x925c, 0xffffffff, 0x00120011,
668 	0x9260, 0xffffffff, 0x00150014,
669 	0x9264, 0xffffffff, 0x000e000d,
670 	0x9268, 0xffffffff, 0x0010000f,
671 	0x926c, 0xffffffff, 0x00000013,
672 	0x9270, 0xffffffff, 0x00120011,
673 	0x9274, 0xffffffff, 0x00150014,
674 	0x9278, 0xffffffff, 0x000f000e,
675 	0x927c, 0xffffffff, 0x00110010,
676 	0x9280, 0xffffffff, 0x00000014,
677 	0x9284, 0xffffffff, 0x00130012,
678 	0x9288, 0xffffffff, 0x00160015,
679 	0x928c, 0xffffffff, 0x0010000f,
680 	0x9290, 0xffffffff, 0x00120011,
681 	0x9294, 0xffffffff, 0x00000015,
682 	0x9298, 0xffffffff, 0x00140013,
683 	0x929c, 0xffffffff, 0x00170016,
684 	0x9150, 0xffffffff, 0x96940200,
685 	0x8708, 0xffffffff, 0x00900100,
686 	0xc478, 0xffffffff, 0x00000080,
687 	0xc404, 0xffffffff, 0x0020003f,
688 	0x30, 0xffffffff, 0x0000001c,
689 	0x34, 0x000f0000, 0x000f0000,
690 	0x160c, 0xffffffff, 0x00000100,
691 	0x1024, 0xffffffff, 0x00000100,
692 	0x102c, 0x00000101, 0x00000000,
693 	0x20a8, 0xffffffff, 0x00000104,
694 	0x264c, 0x000c0000, 0x000c0000,
695 	0x2648, 0x000c0000, 0x000c0000,
696 	0x55e4, 0xff000fff, 0x00000100,
697 	0x55e8, 0x00000001, 0x00000001,
698 	0x2f50, 0x00000001, 0x00000001,
699 	0x30cc, 0xc0000fff, 0x00000104,
700 	0xc1e4, 0x00000001, 0x00000001,
701 	0xd0c0, 0xfffffff0, 0x00000100,
702 	0xd8c0, 0xfffffff0, 0x00000100
703 };
704 
705 static const u32 pitcairn_mgcg_cgcg_init[] =
706 {
707 	0xc400, 0xffffffff, 0xfffffffc,
708 	0x802c, 0xffffffff, 0xe0000000,
709 	0x9a60, 0xffffffff, 0x00000100,
710 	0x92a4, 0xffffffff, 0x00000100,
711 	0xc164, 0xffffffff, 0x00000100,
712 	0x9774, 0xffffffff, 0x00000100,
713 	0x8984, 0xffffffff, 0x06000100,
714 	0x8a18, 0xffffffff, 0x00000100,
715 	0x92a0, 0xffffffff, 0x00000100,
716 	0xc380, 0xffffffff, 0x00000100,
717 	0x8b28, 0xffffffff, 0x00000100,
718 	0x9144, 0xffffffff, 0x00000100,
719 	0x8d88, 0xffffffff, 0x00000100,
720 	0x8d8c, 0xffffffff, 0x00000100,
721 	0x9030, 0xffffffff, 0x00000100,
722 	0x9034, 0xffffffff, 0x00000100,
723 	0x9038, 0xffffffff, 0x00000100,
724 	0x903c, 0xffffffff, 0x00000100,
725 	0xad80, 0xffffffff, 0x00000100,
726 	0xac54, 0xffffffff, 0x00000100,
727 	0x897c, 0xffffffff, 0x06000100,
728 	0x9868, 0xffffffff, 0x00000100,
729 	0x9510, 0xffffffff, 0x00000100,
730 	0xaf04, 0xffffffff, 0x00000100,
731 	0xae04, 0xffffffff, 0x00000100,
732 	0x949c, 0xffffffff, 0x00000100,
733 	0x802c, 0xffffffff, 0xe0000000,
734 	0x9160, 0xffffffff, 0x00010000,
735 	0x9164, 0xffffffff, 0x00030002,
736 	0x9168, 0xffffffff, 0x00040007,
737 	0x916c, 0xffffffff, 0x00060005,
738 	0x9170, 0xffffffff, 0x00090008,
739 	0x9174, 0xffffffff, 0x00020001,
740 	0x9178, 0xffffffff, 0x00040003,
741 	0x917c, 0xffffffff, 0x00000007,
742 	0x9180, 0xffffffff, 0x00060005,
743 	0x9184, 0xffffffff, 0x00090008,
744 	0x9188, 0xffffffff, 0x00030002,
745 	0x918c, 0xffffffff, 0x00050004,
746 	0x9190, 0xffffffff, 0x00000008,
747 	0x9194, 0xffffffff, 0x00070006,
748 	0x9198, 0xffffffff, 0x000a0009,
749 	0x919c, 0xffffffff, 0x00040003,
750 	0x91a0, 0xffffffff, 0x00060005,
751 	0x91a4, 0xffffffff, 0x00000009,
752 	0x91a8, 0xffffffff, 0x00080007,
753 	0x91ac, 0xffffffff, 0x000b000a,
754 	0x91b0, 0xffffffff, 0x00050004,
755 	0x91b4, 0xffffffff, 0x00070006,
756 	0x91b8, 0xffffffff, 0x0008000b,
757 	0x91bc, 0xffffffff, 0x000a0009,
758 	0x91c0, 0xffffffff, 0x000d000c,
759 	0x9200, 0xffffffff, 0x00090008,
760 	0x9204, 0xffffffff, 0x000b000a,
761 	0x9208, 0xffffffff, 0x000c000f,
762 	0x920c, 0xffffffff, 0x000e000d,
763 	0x9210, 0xffffffff, 0x00110010,
764 	0x9214, 0xffffffff, 0x000a0009,
765 	0x9218, 0xffffffff, 0x000c000b,
766 	0x921c, 0xffffffff, 0x0000000f,
767 	0x9220, 0xffffffff, 0x000e000d,
768 	0x9224, 0xffffffff, 0x00110010,
769 	0x9228, 0xffffffff, 0x000b000a,
770 	0x922c, 0xffffffff, 0x000d000c,
771 	0x9230, 0xffffffff, 0x00000010,
772 	0x9234, 0xffffffff, 0x000f000e,
773 	0x9238, 0xffffffff, 0x00120011,
774 	0x923c, 0xffffffff, 0x000c000b,
775 	0x9240, 0xffffffff, 0x000e000d,
776 	0x9244, 0xffffffff, 0x00000011,
777 	0x9248, 0xffffffff, 0x0010000f,
778 	0x924c, 0xffffffff, 0x00130012,
779 	0x9250, 0xffffffff, 0x000d000c,
780 	0x9254, 0xffffffff, 0x000f000e,
781 	0x9258, 0xffffffff, 0x00100013,
782 	0x925c, 0xffffffff, 0x00120011,
783 	0x9260, 0xffffffff, 0x00150014,
784 	0x9150, 0xffffffff, 0x96940200,
785 	0x8708, 0xffffffff, 0x00900100,
786 	0xc478, 0xffffffff, 0x00000080,
787 	0xc404, 0xffffffff, 0x0020003f,
788 	0x30, 0xffffffff, 0x0000001c,
789 	0x34, 0x000f0000, 0x000f0000,
790 	0x160c, 0xffffffff, 0x00000100,
791 	0x1024, 0xffffffff, 0x00000100,
792 	0x102c, 0x00000101, 0x00000000,
793 	0x20a8, 0xffffffff, 0x00000104,
794 	0x55e4, 0xff000fff, 0x00000100,
795 	0x55e8, 0x00000001, 0x00000001,
796 	0x2f50, 0x00000001, 0x00000001,
797 	0x30cc, 0xc0000fff, 0x00000104,
798 	0xc1e4, 0x00000001, 0x00000001,
799 	0xd0c0, 0xfffffff0, 0x00000100,
800 	0xd8c0, 0xfffffff0, 0x00000100
801 };
802 
803 static const u32 verde_mgcg_cgcg_init[] =
804 {
805 	0xc400, 0xffffffff, 0xfffffffc,
806 	0x802c, 0xffffffff, 0xe0000000,
807 	0x9a60, 0xffffffff, 0x00000100,
808 	0x92a4, 0xffffffff, 0x00000100,
809 	0xc164, 0xffffffff, 0x00000100,
810 	0x9774, 0xffffffff, 0x00000100,
811 	0x8984, 0xffffffff, 0x06000100,
812 	0x8a18, 0xffffffff, 0x00000100,
813 	0x92a0, 0xffffffff, 0x00000100,
814 	0xc380, 0xffffffff, 0x00000100,
815 	0x8b28, 0xffffffff, 0x00000100,
816 	0x9144, 0xffffffff, 0x00000100,
817 	0x8d88, 0xffffffff, 0x00000100,
818 	0x8d8c, 0xffffffff, 0x00000100,
819 	0x9030, 0xffffffff, 0x00000100,
820 	0x9034, 0xffffffff, 0x00000100,
821 	0x9038, 0xffffffff, 0x00000100,
822 	0x903c, 0xffffffff, 0x00000100,
823 	0xad80, 0xffffffff, 0x00000100,
824 	0xac54, 0xffffffff, 0x00000100,
825 	0x897c, 0xffffffff, 0x06000100,
826 	0x9868, 0xffffffff, 0x00000100,
827 	0x9510, 0xffffffff, 0x00000100,
828 	0xaf04, 0xffffffff, 0x00000100,
829 	0xae04, 0xffffffff, 0x00000100,
830 	0x949c, 0xffffffff, 0x00000100,
831 	0x802c, 0xffffffff, 0xe0000000,
832 	0x9160, 0xffffffff, 0x00010000,
833 	0x9164, 0xffffffff, 0x00030002,
834 	0x9168, 0xffffffff, 0x00040007,
835 	0x916c, 0xffffffff, 0x00060005,
836 	0x9170, 0xffffffff, 0x00090008,
837 	0x9174, 0xffffffff, 0x00020001,
838 	0x9178, 0xffffffff, 0x00040003,
839 	0x917c, 0xffffffff, 0x00000007,
840 	0x9180, 0xffffffff, 0x00060005,
841 	0x9184, 0xffffffff, 0x00090008,
842 	0x9188, 0xffffffff, 0x00030002,
843 	0x918c, 0xffffffff, 0x00050004,
844 	0x9190, 0xffffffff, 0x00000008,
845 	0x9194, 0xffffffff, 0x00070006,
846 	0x9198, 0xffffffff, 0x000a0009,
847 	0x919c, 0xffffffff, 0x00040003,
848 	0x91a0, 0xffffffff, 0x00060005,
849 	0x91a4, 0xffffffff, 0x00000009,
850 	0x91a8, 0xffffffff, 0x00080007,
851 	0x91ac, 0xffffffff, 0x000b000a,
852 	0x91b0, 0xffffffff, 0x00050004,
853 	0x91b4, 0xffffffff, 0x00070006,
854 	0x91b8, 0xffffffff, 0x0008000b,
855 	0x91bc, 0xffffffff, 0x000a0009,
856 	0x91c0, 0xffffffff, 0x000d000c,
857 	0x9200, 0xffffffff, 0x00090008,
858 	0x9204, 0xffffffff, 0x000b000a,
859 	0x9208, 0xffffffff, 0x000c000f,
860 	0x920c, 0xffffffff, 0x000e000d,
861 	0x9210, 0xffffffff, 0x00110010,
862 	0x9214, 0xffffffff, 0x000a0009,
863 	0x9218, 0xffffffff, 0x000c000b,
864 	0x921c, 0xffffffff, 0x0000000f,
865 	0x9220, 0xffffffff, 0x000e000d,
866 	0x9224, 0xffffffff, 0x00110010,
867 	0x9228, 0xffffffff, 0x000b000a,
868 	0x922c, 0xffffffff, 0x000d000c,
869 	0x9230, 0xffffffff, 0x00000010,
870 	0x9234, 0xffffffff, 0x000f000e,
871 	0x9238, 0xffffffff, 0x00120011,
872 	0x923c, 0xffffffff, 0x000c000b,
873 	0x9240, 0xffffffff, 0x000e000d,
874 	0x9244, 0xffffffff, 0x00000011,
875 	0x9248, 0xffffffff, 0x0010000f,
876 	0x924c, 0xffffffff, 0x00130012,
877 	0x9250, 0xffffffff, 0x000d000c,
878 	0x9254, 0xffffffff, 0x000f000e,
879 	0x9258, 0xffffffff, 0x00100013,
880 	0x925c, 0xffffffff, 0x00120011,
881 	0x9260, 0xffffffff, 0x00150014,
882 	0x9150, 0xffffffff, 0x96940200,
883 	0x8708, 0xffffffff, 0x00900100,
884 	0xc478, 0xffffffff, 0x00000080,
885 	0xc404, 0xffffffff, 0x0020003f,
886 	0x30, 0xffffffff, 0x0000001c,
887 	0x34, 0x000f0000, 0x000f0000,
888 	0x160c, 0xffffffff, 0x00000100,
889 	0x1024, 0xffffffff, 0x00000100,
890 	0x102c, 0x00000101, 0x00000000,
891 	0x20a8, 0xffffffff, 0x00000104,
892 	0x264c, 0x000c0000, 0x000c0000,
893 	0x2648, 0x000c0000, 0x000c0000,
894 	0x55e4, 0xff000fff, 0x00000100,
895 	0x55e8, 0x00000001, 0x00000001,
896 	0x2f50, 0x00000001, 0x00000001,
897 	0x30cc, 0xc0000fff, 0x00000104,
898 	0xc1e4, 0x00000001, 0x00000001,
899 	0xd0c0, 0xfffffff0, 0x00000100,
900 	0xd8c0, 0xfffffff0, 0x00000100
901 };
902 
903 static const u32 oland_mgcg_cgcg_init[] =
904 {
905 	0xc400, 0xffffffff, 0xfffffffc,
906 	0x802c, 0xffffffff, 0xe0000000,
907 	0x9a60, 0xffffffff, 0x00000100,
908 	0x92a4, 0xffffffff, 0x00000100,
909 	0xc164, 0xffffffff, 0x00000100,
910 	0x9774, 0xffffffff, 0x00000100,
911 	0x8984, 0xffffffff, 0x06000100,
912 	0x8a18, 0xffffffff, 0x00000100,
913 	0x92a0, 0xffffffff, 0x00000100,
914 	0xc380, 0xffffffff, 0x00000100,
915 	0x8b28, 0xffffffff, 0x00000100,
916 	0x9144, 0xffffffff, 0x00000100,
917 	0x8d88, 0xffffffff, 0x00000100,
918 	0x8d8c, 0xffffffff, 0x00000100,
919 	0x9030, 0xffffffff, 0x00000100,
920 	0x9034, 0xffffffff, 0x00000100,
921 	0x9038, 0xffffffff, 0x00000100,
922 	0x903c, 0xffffffff, 0x00000100,
923 	0xad80, 0xffffffff, 0x00000100,
924 	0xac54, 0xffffffff, 0x00000100,
925 	0x897c, 0xffffffff, 0x06000100,
926 	0x9868, 0xffffffff, 0x00000100,
927 	0x9510, 0xffffffff, 0x00000100,
928 	0xaf04, 0xffffffff, 0x00000100,
929 	0xae04, 0xffffffff, 0x00000100,
930 	0x949c, 0xffffffff, 0x00000100,
931 	0x802c, 0xffffffff, 0xe0000000,
932 	0x9160, 0xffffffff, 0x00010000,
933 	0x9164, 0xffffffff, 0x00030002,
934 	0x9168, 0xffffffff, 0x00040007,
935 	0x916c, 0xffffffff, 0x00060005,
936 	0x9170, 0xffffffff, 0x00090008,
937 	0x9174, 0xffffffff, 0x00020001,
938 	0x9178, 0xffffffff, 0x00040003,
939 	0x917c, 0xffffffff, 0x00000007,
940 	0x9180, 0xffffffff, 0x00060005,
941 	0x9184, 0xffffffff, 0x00090008,
942 	0x9188, 0xffffffff, 0x00030002,
943 	0x918c, 0xffffffff, 0x00050004,
944 	0x9190, 0xffffffff, 0x00000008,
945 	0x9194, 0xffffffff, 0x00070006,
946 	0x9198, 0xffffffff, 0x000a0009,
947 	0x919c, 0xffffffff, 0x00040003,
948 	0x91a0, 0xffffffff, 0x00060005,
949 	0x91a4, 0xffffffff, 0x00000009,
950 	0x91a8, 0xffffffff, 0x00080007,
951 	0x91ac, 0xffffffff, 0x000b000a,
952 	0x91b0, 0xffffffff, 0x00050004,
953 	0x91b4, 0xffffffff, 0x00070006,
954 	0x91b8, 0xffffffff, 0x0008000b,
955 	0x91bc, 0xffffffff, 0x000a0009,
956 	0x91c0, 0xffffffff, 0x000d000c,
957 	0x91c4, 0xffffffff, 0x00060005,
958 	0x91c8, 0xffffffff, 0x00080007,
959 	0x91cc, 0xffffffff, 0x0000000b,
960 	0x91d0, 0xffffffff, 0x000a0009,
961 	0x91d4, 0xffffffff, 0x000d000c,
962 	0x9150, 0xffffffff, 0x96940200,
963 	0x8708, 0xffffffff, 0x00900100,
964 	0xc478, 0xffffffff, 0x00000080,
965 	0xc404, 0xffffffff, 0x0020003f,
966 	0x30, 0xffffffff, 0x0000001c,
967 	0x34, 0x000f0000, 0x000f0000,
968 	0x160c, 0xffffffff, 0x00000100,
969 	0x1024, 0xffffffff, 0x00000100,
970 	0x102c, 0x00000101, 0x00000000,
971 	0x20a8, 0xffffffff, 0x00000104,
972 	0x264c, 0x000c0000, 0x000c0000,
973 	0x2648, 0x000c0000, 0x000c0000,
974 	0x55e4, 0xff000fff, 0x00000100,
975 	0x55e8, 0x00000001, 0x00000001,
976 	0x2f50, 0x00000001, 0x00000001,
977 	0x30cc, 0xc0000fff, 0x00000104,
978 	0xc1e4, 0x00000001, 0x00000001,
979 	0xd0c0, 0xfffffff0, 0x00000100,
980 	0xd8c0, 0xfffffff0, 0x00000100
981 };
982 
983 static const u32 hainan_mgcg_cgcg_init[] =
984 {
985 	0xc400, 0xffffffff, 0xfffffffc,
986 	0x802c, 0xffffffff, 0xe0000000,
987 	0x9a60, 0xffffffff, 0x00000100,
988 	0x92a4, 0xffffffff, 0x00000100,
989 	0xc164, 0xffffffff, 0x00000100,
990 	0x9774, 0xffffffff, 0x00000100,
991 	0x8984, 0xffffffff, 0x06000100,
992 	0x8a18, 0xffffffff, 0x00000100,
993 	0x92a0, 0xffffffff, 0x00000100,
994 	0xc380, 0xffffffff, 0x00000100,
995 	0x8b28, 0xffffffff, 0x00000100,
996 	0x9144, 0xffffffff, 0x00000100,
997 	0x8d88, 0xffffffff, 0x00000100,
998 	0x8d8c, 0xffffffff, 0x00000100,
999 	0x9030, 0xffffffff, 0x00000100,
1000 	0x9034, 0xffffffff, 0x00000100,
1001 	0x9038, 0xffffffff, 0x00000100,
1002 	0x903c, 0xffffffff, 0x00000100,
1003 	0xad80, 0xffffffff, 0x00000100,
1004 	0xac54, 0xffffffff, 0x00000100,
1005 	0x897c, 0xffffffff, 0x06000100,
1006 	0x9868, 0xffffffff, 0x00000100,
1007 	0x9510, 0xffffffff, 0x00000100,
1008 	0xaf04, 0xffffffff, 0x00000100,
1009 	0xae04, 0xffffffff, 0x00000100,
1010 	0x949c, 0xffffffff, 0x00000100,
1011 	0x802c, 0xffffffff, 0xe0000000,
1012 	0x9160, 0xffffffff, 0x00010000,
1013 	0x9164, 0xffffffff, 0x00030002,
1014 	0x9168, 0xffffffff, 0x00040007,
1015 	0x916c, 0xffffffff, 0x00060005,
1016 	0x9170, 0xffffffff, 0x00090008,
1017 	0x9174, 0xffffffff, 0x00020001,
1018 	0x9178, 0xffffffff, 0x00040003,
1019 	0x917c, 0xffffffff, 0x00000007,
1020 	0x9180, 0xffffffff, 0x00060005,
1021 	0x9184, 0xffffffff, 0x00090008,
1022 	0x9188, 0xffffffff, 0x00030002,
1023 	0x918c, 0xffffffff, 0x00050004,
1024 	0x9190, 0xffffffff, 0x00000008,
1025 	0x9194, 0xffffffff, 0x00070006,
1026 	0x9198, 0xffffffff, 0x000a0009,
1027 	0x919c, 0xffffffff, 0x00040003,
1028 	0x91a0, 0xffffffff, 0x00060005,
1029 	0x91a4, 0xffffffff, 0x00000009,
1030 	0x91a8, 0xffffffff, 0x00080007,
1031 	0x91ac, 0xffffffff, 0x000b000a,
1032 	0x91b0, 0xffffffff, 0x00050004,
1033 	0x91b4, 0xffffffff, 0x00070006,
1034 	0x91b8, 0xffffffff, 0x0008000b,
1035 	0x91bc, 0xffffffff, 0x000a0009,
1036 	0x91c0, 0xffffffff, 0x000d000c,
1037 	0x91c4, 0xffffffff, 0x00060005,
1038 	0x91c8, 0xffffffff, 0x00080007,
1039 	0x91cc, 0xffffffff, 0x0000000b,
1040 	0x91d0, 0xffffffff, 0x000a0009,
1041 	0x91d4, 0xffffffff, 0x000d000c,
1042 	0x9150, 0xffffffff, 0x96940200,
1043 	0x8708, 0xffffffff, 0x00900100,
1044 	0xc478, 0xffffffff, 0x00000080,
1045 	0xc404, 0xffffffff, 0x0020003f,
1046 	0x30, 0xffffffff, 0x0000001c,
1047 	0x34, 0x000f0000, 0x000f0000,
1048 	0x160c, 0xffffffff, 0x00000100,
1049 	0x1024, 0xffffffff, 0x00000100,
1050 	0x20a8, 0xffffffff, 0x00000104,
1051 	0x264c, 0x000c0000, 0x000c0000,
1052 	0x2648, 0x000c0000, 0x000c0000,
1053 	0x2f50, 0x00000001, 0x00000001,
1054 	0x30cc, 0xc0000fff, 0x00000104,
1055 	0xc1e4, 0x00000001, 0x00000001,
1056 	0xd0c0, 0xfffffff0, 0x00000100,
1057 	0xd8c0, 0xfffffff0, 0x00000100
1058 };
1059 
1060 static u32 verde_pg_init[] =
1061 {
1062 	0x353c, 0xffffffff, 0x40000,
1063 	0x3538, 0xffffffff, 0x200010ff,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x353c, 0xffffffff, 0x0,
1066 	0x353c, 0xffffffff, 0x0,
1067 	0x353c, 0xffffffff, 0x0,
1068 	0x353c, 0xffffffff, 0x0,
1069 	0x353c, 0xffffffff, 0x7007,
1070 	0x3538, 0xffffffff, 0x300010ff,
1071 	0x353c, 0xffffffff, 0x0,
1072 	0x353c, 0xffffffff, 0x0,
1073 	0x353c, 0xffffffff, 0x0,
1074 	0x353c, 0xffffffff, 0x0,
1075 	0x353c, 0xffffffff, 0x0,
1076 	0x353c, 0xffffffff, 0x400000,
1077 	0x3538, 0xffffffff, 0x100010ff,
1078 	0x353c, 0xffffffff, 0x0,
1079 	0x353c, 0xffffffff, 0x0,
1080 	0x353c, 0xffffffff, 0x0,
1081 	0x353c, 0xffffffff, 0x0,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x120200,
1084 	0x3538, 0xffffffff, 0x500010ff,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x0,
1087 	0x353c, 0xffffffff, 0x0,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x1e1e16,
1091 	0x3538, 0xffffffff, 0x600010ff,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x0,
1094 	0x353c, 0xffffffff, 0x0,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x171f1e,
1098 	0x3538, 0xffffffff, 0x700010ff,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x0,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x3538, 0xffffffff, 0x9ff,
1106 	0x3500, 0xffffffff, 0x0,
1107 	0x3504, 0xffffffff, 0x10000800,
1108 	0x3504, 0xffffffff, 0xf,
1109 	0x3504, 0xffffffff, 0xf,
1110 	0x3500, 0xffffffff, 0x4,
1111 	0x3504, 0xffffffff, 0x1000051e,
1112 	0x3504, 0xffffffff, 0xffff,
1113 	0x3504, 0xffffffff, 0xffff,
1114 	0x3500, 0xffffffff, 0x8,
1115 	0x3504, 0xffffffff, 0x80500,
1116 	0x3500, 0xffffffff, 0x12,
1117 	0x3504, 0xffffffff, 0x9050c,
1118 	0x3500, 0xffffffff, 0x1d,
1119 	0x3504, 0xffffffff, 0xb052c,
1120 	0x3500, 0xffffffff, 0x2a,
1121 	0x3504, 0xffffffff, 0x1053e,
1122 	0x3500, 0xffffffff, 0x2d,
1123 	0x3504, 0xffffffff, 0x10546,
1124 	0x3500, 0xffffffff, 0x30,
1125 	0x3504, 0xffffffff, 0xa054e,
1126 	0x3500, 0xffffffff, 0x3c,
1127 	0x3504, 0xffffffff, 0x1055f,
1128 	0x3500, 0xffffffff, 0x3f,
1129 	0x3504, 0xffffffff, 0x10567,
1130 	0x3500, 0xffffffff, 0x42,
1131 	0x3504, 0xffffffff, 0x1056f,
1132 	0x3500, 0xffffffff, 0x45,
1133 	0x3504, 0xffffffff, 0x10572,
1134 	0x3500, 0xffffffff, 0x48,
1135 	0x3504, 0xffffffff, 0x20575,
1136 	0x3500, 0xffffffff, 0x4c,
1137 	0x3504, 0xffffffff, 0x190801,
1138 	0x3500, 0xffffffff, 0x67,
1139 	0x3504, 0xffffffff, 0x1082a,
1140 	0x3500, 0xffffffff, 0x6a,
1141 	0x3504, 0xffffffff, 0x1b082d,
1142 	0x3500, 0xffffffff, 0x87,
1143 	0x3504, 0xffffffff, 0x310851,
1144 	0x3500, 0xffffffff, 0xba,
1145 	0x3504, 0xffffffff, 0x891,
1146 	0x3500, 0xffffffff, 0xbc,
1147 	0x3504, 0xffffffff, 0x893,
1148 	0x3500, 0xffffffff, 0xbe,
1149 	0x3504, 0xffffffff, 0x20895,
1150 	0x3500, 0xffffffff, 0xc2,
1151 	0x3504, 0xffffffff, 0x20899,
1152 	0x3500, 0xffffffff, 0xc6,
1153 	0x3504, 0xffffffff, 0x2089d,
1154 	0x3500, 0xffffffff, 0xca,
1155 	0x3504, 0xffffffff, 0x8a1,
1156 	0x3500, 0xffffffff, 0xcc,
1157 	0x3504, 0xffffffff, 0x8a3,
1158 	0x3500, 0xffffffff, 0xce,
1159 	0x3504, 0xffffffff, 0x308a5,
1160 	0x3500, 0xffffffff, 0xd3,
1161 	0x3504, 0xffffffff, 0x6d08cd,
1162 	0x3500, 0xffffffff, 0x142,
1163 	0x3504, 0xffffffff, 0x2000095a,
1164 	0x3504, 0xffffffff, 0x1,
1165 	0x3500, 0xffffffff, 0x144,
1166 	0x3504, 0xffffffff, 0x301f095b,
1167 	0x3500, 0xffffffff, 0x165,
1168 	0x3504, 0xffffffff, 0xc094d,
1169 	0x3500, 0xffffffff, 0x173,
1170 	0x3504, 0xffffffff, 0xf096d,
1171 	0x3500, 0xffffffff, 0x184,
1172 	0x3504, 0xffffffff, 0x15097f,
1173 	0x3500, 0xffffffff, 0x19b,
1174 	0x3504, 0xffffffff, 0xc0998,
1175 	0x3500, 0xffffffff, 0x1a9,
1176 	0x3504, 0xffffffff, 0x409a7,
1177 	0x3500, 0xffffffff, 0x1af,
1178 	0x3504, 0xffffffff, 0xcdc,
1179 	0x3500, 0xffffffff, 0x1b1,
1180 	0x3504, 0xffffffff, 0x800,
1181 	0x3508, 0xffffffff, 0x6c9b2000,
1182 	0x3510, 0xfc00, 0x2000,
1183 	0x3544, 0xffffffff, 0xfc0,
1184 	0x28d4, 0x00000100, 0x100
1185 };
1186 
1187 static void si_init_golden_registers(struct radeon_device *rdev)
1188 {
1189 	switch (rdev->family) {
1190 	case CHIP_TAHITI:
1191 		radeon_program_register_sequence(rdev,
1192 						 tahiti_golden_registers,
1193 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1194 		radeon_program_register_sequence(rdev,
1195 						 tahiti_golden_rlc_registers,
1196 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1197 		radeon_program_register_sequence(rdev,
1198 						 tahiti_mgcg_cgcg_init,
1199 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1200 		radeon_program_register_sequence(rdev,
1201 						 tahiti_golden_registers2,
1202 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1203 		break;
1204 	case CHIP_PITCAIRN:
1205 		radeon_program_register_sequence(rdev,
1206 						 pitcairn_golden_registers,
1207 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1208 		radeon_program_register_sequence(rdev,
1209 						 pitcairn_golden_rlc_registers,
1210 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1211 		radeon_program_register_sequence(rdev,
1212 						 pitcairn_mgcg_cgcg_init,
1213 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1214 		break;
1215 	case CHIP_VERDE:
1216 		radeon_program_register_sequence(rdev,
1217 						 verde_golden_registers,
1218 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1219 		radeon_program_register_sequence(rdev,
1220 						 verde_golden_rlc_registers,
1221 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1222 		radeon_program_register_sequence(rdev,
1223 						 verde_mgcg_cgcg_init,
1224 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1225 		radeon_program_register_sequence(rdev,
1226 						 verde_pg_init,
1227 						 (const u32)ARRAY_SIZE(verde_pg_init));
1228 		break;
1229 	case CHIP_OLAND:
1230 		radeon_program_register_sequence(rdev,
1231 						 oland_golden_registers,
1232 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1233 		radeon_program_register_sequence(rdev,
1234 						 oland_golden_rlc_registers,
1235 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1236 		radeon_program_register_sequence(rdev,
1237 						 oland_mgcg_cgcg_init,
1238 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1239 		break;
1240 	case CHIP_HAINAN:
1241 		radeon_program_register_sequence(rdev,
1242 						 hainan_golden_registers,
1243 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1244 		radeon_program_register_sequence(rdev,
1245 						 hainan_golden_registers2,
1246 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1247 		radeon_program_register_sequence(rdev,
1248 						 hainan_mgcg_cgcg_init,
1249 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1250 		break;
1251 	default:
1252 		break;
1253 	}
1254 }
1255 
1256 /**
1257  * si_get_allowed_info_register - fetch the register for the info ioctl
1258  *
1259  * @rdev: radeon_device pointer
1260  * @reg: register offset in bytes
1261  * @val: register value
1262  *
1263  * Returns 0 for success or -EINVAL for an invalid register
1264  *
1265  */
1266 int si_get_allowed_info_register(struct radeon_device *rdev,
1267 				 u32 reg, u32 *val)
1268 {
1269 	switch (reg) {
1270 	case GRBM_STATUS:
1271 	case GRBM_STATUS2:
1272 	case GRBM_STATUS_SE0:
1273 	case GRBM_STATUS_SE1:
1274 	case SRBM_STATUS:
1275 	case SRBM_STATUS2:
1276 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1277 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1278 	case UVD_STATUS:
1279 		*val = RREG32(reg);
1280 		return 0;
1281 	default:
1282 		return -EINVAL;
1283 	}
1284 }
1285 
1286 #define PCIE_BUS_CLK                10000
1287 #define TCLK                        (PCIE_BUS_CLK / 10)
1288 
1289 /**
1290  * si_get_xclk - get the xclk
1291  *
1292  * @rdev: radeon_device pointer
1293  *
1294  * Returns the reference clock used by the gfx engine
1295  * (SI).
1296  */
1297 u32 si_get_xclk(struct radeon_device *rdev)
1298 {
1299 	u32 reference_clock = rdev->clock.spll.reference_freq;
1300 	u32 tmp;
1301 
1302 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1303 	if (tmp & MUX_TCLK_TO_XCLK)
1304 		return TCLK;
1305 
1306 	tmp = RREG32(CG_CLKPIN_CNTL);
1307 	if (tmp & XTALIN_DIVIDE)
1308 		return reference_clock / 4;
1309 
1310 	return reference_clock;
1311 }
1312 
1313 /* get temperature in millidegrees */
1314 int si_get_temp(struct radeon_device *rdev)
1315 {
1316 	u32 temp;
1317 	int actual_temp = 0;
1318 
1319 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1320 		CTF_TEMP_SHIFT;
1321 
1322 	if (temp & 0x200)
1323 		actual_temp = 255;
1324 	else
1325 		actual_temp = temp & 0x1ff;
1326 
1327 	actual_temp = (actual_temp * 1000);
1328 
1329 	return actual_temp;
1330 }
1331 
1332 #define TAHITI_IO_MC_REGS_SIZE 36
1333 
1334 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1335 	{0x0000006f, 0x03044000},
1336 	{0x00000070, 0x0480c018},
1337 	{0x00000071, 0x00000040},
1338 	{0x00000072, 0x01000000},
1339 	{0x00000074, 0x000000ff},
1340 	{0x00000075, 0x00143400},
1341 	{0x00000076, 0x08ec0800},
1342 	{0x00000077, 0x040000cc},
1343 	{0x00000079, 0x00000000},
1344 	{0x0000007a, 0x21000409},
1345 	{0x0000007c, 0x00000000},
1346 	{0x0000007d, 0xe8000000},
1347 	{0x0000007e, 0x044408a8},
1348 	{0x0000007f, 0x00000003},
1349 	{0x00000080, 0x00000000},
1350 	{0x00000081, 0x01000000},
1351 	{0x00000082, 0x02000000},
1352 	{0x00000083, 0x00000000},
1353 	{0x00000084, 0xe3f3e4f4},
1354 	{0x00000085, 0x00052024},
1355 	{0x00000087, 0x00000000},
1356 	{0x00000088, 0x66036603},
1357 	{0x00000089, 0x01000000},
1358 	{0x0000008b, 0x1c0a0000},
1359 	{0x0000008c, 0xff010000},
1360 	{0x0000008e, 0xffffefff},
1361 	{0x0000008f, 0xfff3efff},
1362 	{0x00000090, 0xfff3efbf},
1363 	{0x00000094, 0x00101101},
1364 	{0x00000095, 0x00000fff},
1365 	{0x00000096, 0x00116fff},
1366 	{0x00000097, 0x60010000},
1367 	{0x00000098, 0x10010000},
1368 	{0x00000099, 0x00006000},
1369 	{0x0000009a, 0x00001000},
1370 	{0x0000009f, 0x00a77400}
1371 };
1372 
1373 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1374 	{0x0000006f, 0x03044000},
1375 	{0x00000070, 0x0480c018},
1376 	{0x00000071, 0x00000040},
1377 	{0x00000072, 0x01000000},
1378 	{0x00000074, 0x000000ff},
1379 	{0x00000075, 0x00143400},
1380 	{0x00000076, 0x08ec0800},
1381 	{0x00000077, 0x040000cc},
1382 	{0x00000079, 0x00000000},
1383 	{0x0000007a, 0x21000409},
1384 	{0x0000007c, 0x00000000},
1385 	{0x0000007d, 0xe8000000},
1386 	{0x0000007e, 0x044408a8},
1387 	{0x0000007f, 0x00000003},
1388 	{0x00000080, 0x00000000},
1389 	{0x00000081, 0x01000000},
1390 	{0x00000082, 0x02000000},
1391 	{0x00000083, 0x00000000},
1392 	{0x00000084, 0xe3f3e4f4},
1393 	{0x00000085, 0x00052024},
1394 	{0x00000087, 0x00000000},
1395 	{0x00000088, 0x66036603},
1396 	{0x00000089, 0x01000000},
1397 	{0x0000008b, 0x1c0a0000},
1398 	{0x0000008c, 0xff010000},
1399 	{0x0000008e, 0xffffefff},
1400 	{0x0000008f, 0xfff3efff},
1401 	{0x00000090, 0xfff3efbf},
1402 	{0x00000094, 0x00101101},
1403 	{0x00000095, 0x00000fff},
1404 	{0x00000096, 0x00116fff},
1405 	{0x00000097, 0x60010000},
1406 	{0x00000098, 0x10010000},
1407 	{0x00000099, 0x00006000},
1408 	{0x0000009a, 0x00001000},
1409 	{0x0000009f, 0x00a47400}
1410 };
1411 
1412 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1413 	{0x0000006f, 0x03044000},
1414 	{0x00000070, 0x0480c018},
1415 	{0x00000071, 0x00000040},
1416 	{0x00000072, 0x01000000},
1417 	{0x00000074, 0x000000ff},
1418 	{0x00000075, 0x00143400},
1419 	{0x00000076, 0x08ec0800},
1420 	{0x00000077, 0x040000cc},
1421 	{0x00000079, 0x00000000},
1422 	{0x0000007a, 0x21000409},
1423 	{0x0000007c, 0x00000000},
1424 	{0x0000007d, 0xe8000000},
1425 	{0x0000007e, 0x044408a8},
1426 	{0x0000007f, 0x00000003},
1427 	{0x00000080, 0x00000000},
1428 	{0x00000081, 0x01000000},
1429 	{0x00000082, 0x02000000},
1430 	{0x00000083, 0x00000000},
1431 	{0x00000084, 0xe3f3e4f4},
1432 	{0x00000085, 0x00052024},
1433 	{0x00000087, 0x00000000},
1434 	{0x00000088, 0x66036603},
1435 	{0x00000089, 0x01000000},
1436 	{0x0000008b, 0x1c0a0000},
1437 	{0x0000008c, 0xff010000},
1438 	{0x0000008e, 0xffffefff},
1439 	{0x0000008f, 0xfff3efff},
1440 	{0x00000090, 0xfff3efbf},
1441 	{0x00000094, 0x00101101},
1442 	{0x00000095, 0x00000fff},
1443 	{0x00000096, 0x00116fff},
1444 	{0x00000097, 0x60010000},
1445 	{0x00000098, 0x10010000},
1446 	{0x00000099, 0x00006000},
1447 	{0x0000009a, 0x00001000},
1448 	{0x0000009f, 0x00a37400}
1449 };
1450 
1451 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1452 	{0x0000006f, 0x03044000},
1453 	{0x00000070, 0x0480c018},
1454 	{0x00000071, 0x00000040},
1455 	{0x00000072, 0x01000000},
1456 	{0x00000074, 0x000000ff},
1457 	{0x00000075, 0x00143400},
1458 	{0x00000076, 0x08ec0800},
1459 	{0x00000077, 0x040000cc},
1460 	{0x00000079, 0x00000000},
1461 	{0x0000007a, 0x21000409},
1462 	{0x0000007c, 0x00000000},
1463 	{0x0000007d, 0xe8000000},
1464 	{0x0000007e, 0x044408a8},
1465 	{0x0000007f, 0x00000003},
1466 	{0x00000080, 0x00000000},
1467 	{0x00000081, 0x01000000},
1468 	{0x00000082, 0x02000000},
1469 	{0x00000083, 0x00000000},
1470 	{0x00000084, 0xe3f3e4f4},
1471 	{0x00000085, 0x00052024},
1472 	{0x00000087, 0x00000000},
1473 	{0x00000088, 0x66036603},
1474 	{0x00000089, 0x01000000},
1475 	{0x0000008b, 0x1c0a0000},
1476 	{0x0000008c, 0xff010000},
1477 	{0x0000008e, 0xffffefff},
1478 	{0x0000008f, 0xfff3efff},
1479 	{0x00000090, 0xfff3efbf},
1480 	{0x00000094, 0x00101101},
1481 	{0x00000095, 0x00000fff},
1482 	{0x00000096, 0x00116fff},
1483 	{0x00000097, 0x60010000},
1484 	{0x00000098, 0x10010000},
1485 	{0x00000099, 0x00006000},
1486 	{0x0000009a, 0x00001000},
1487 	{0x0000009f, 0x00a17730}
1488 };
1489 
1490 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1491 	{0x0000006f, 0x03044000},
1492 	{0x00000070, 0x0480c018},
1493 	{0x00000071, 0x00000040},
1494 	{0x00000072, 0x01000000},
1495 	{0x00000074, 0x000000ff},
1496 	{0x00000075, 0x00143400},
1497 	{0x00000076, 0x08ec0800},
1498 	{0x00000077, 0x040000cc},
1499 	{0x00000079, 0x00000000},
1500 	{0x0000007a, 0x21000409},
1501 	{0x0000007c, 0x00000000},
1502 	{0x0000007d, 0xe8000000},
1503 	{0x0000007e, 0x044408a8},
1504 	{0x0000007f, 0x00000003},
1505 	{0x00000080, 0x00000000},
1506 	{0x00000081, 0x01000000},
1507 	{0x00000082, 0x02000000},
1508 	{0x00000083, 0x00000000},
1509 	{0x00000084, 0xe3f3e4f4},
1510 	{0x00000085, 0x00052024},
1511 	{0x00000087, 0x00000000},
1512 	{0x00000088, 0x66036603},
1513 	{0x00000089, 0x01000000},
1514 	{0x0000008b, 0x1c0a0000},
1515 	{0x0000008c, 0xff010000},
1516 	{0x0000008e, 0xffffefff},
1517 	{0x0000008f, 0xfff3efff},
1518 	{0x00000090, 0xfff3efbf},
1519 	{0x00000094, 0x00101101},
1520 	{0x00000095, 0x00000fff},
1521 	{0x00000096, 0x00116fff},
1522 	{0x00000097, 0x60010000},
1523 	{0x00000098, 0x10010000},
1524 	{0x00000099, 0x00006000},
1525 	{0x0000009a, 0x00001000},
1526 	{0x0000009f, 0x00a07730}
1527 };
1528 
1529 /* ucode loading */
1530 int si_mc_load_microcode(struct radeon_device *rdev)
1531 {
1532 	const __be32 *fw_data = NULL;
1533 	const __le32 *new_fw_data = NULL;
1534 	u32 running, blackout = 0;
1535 	u32 *io_mc_regs = NULL;
1536 	const __le32 *new_io_mc_regs = NULL;
1537 	int i, regs_size, ucode_size;
1538 
1539 	if (!rdev->mc_fw)
1540 		return -EINVAL;
1541 
1542 	if (rdev->new_fw) {
1543 		const struct mc_firmware_header_v1_0 *hdr =
1544 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1545 
1546 		radeon_ucode_print_mc_hdr(&hdr->header);
1547 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1548 		new_io_mc_regs = (const __le32 *)
1549 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1550 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1551 		new_fw_data = (const __le32 *)
1552 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1553 	} else {
1554 		ucode_size = rdev->mc_fw->datasize / 4;
1555 
1556 		switch (rdev->family) {
1557 		case CHIP_TAHITI:
1558 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1559 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1560 			break;
1561 		case CHIP_PITCAIRN:
1562 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1563 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1564 			break;
1565 		case CHIP_VERDE:
1566 		default:
1567 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1568 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1569 			break;
1570 		case CHIP_OLAND:
1571 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1572 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1573 			break;
1574 		case CHIP_HAINAN:
1575 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1576 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1577 			break;
1578 		}
1579 		fw_data = (const __be32 *)rdev->mc_fw->data;
1580 	}
1581 
1582 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1583 
1584 	if (running == 0) {
1585 		if (running) {
1586 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1587 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1588 		}
1589 
1590 		/* reset the engine and set to writable */
1591 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1592 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1593 
1594 		/* load mc io regs */
1595 		for (i = 0; i < regs_size; i++) {
1596 			if (rdev->new_fw) {
1597 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1598 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1599 			} else {
1600 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1601 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1602 			}
1603 		}
1604 		/* load the MC ucode */
1605 		for (i = 0; i < ucode_size; i++) {
1606 			if (rdev->new_fw)
1607 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1608 			else
1609 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1610 		}
1611 
1612 		/* put the engine back into the active state */
1613 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1614 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1615 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1616 
1617 		/* wait for training to complete */
1618 		for (i = 0; i < rdev->usec_timeout; i++) {
1619 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1620 				break;
1621 			udelay(1);
1622 		}
1623 		for (i = 0; i < rdev->usec_timeout; i++) {
1624 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1625 				break;
1626 			udelay(1);
1627 		}
1628 
1629 		if (running)
1630 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1631 	}
1632 
1633 	return 0;
1634 }
1635 
1636 static int si_init_microcode(struct radeon_device *rdev)
1637 {
1638 	const char *chip_name;
1639 	const char *new_chip_name;
1640 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1641 	size_t smc_req_size, mc2_req_size;
1642 	char fw_name[30];
1643 	int err;
1644 	int new_fw = 0;
1645 
1646 	DRM_DEBUG("\n");
1647 
1648 	switch (rdev->family) {
1649 	case CHIP_TAHITI:
1650 		chip_name = "TAHITI";
1651 		new_chip_name = "tahiti";
1652 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1653 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1654 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1655 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1656 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1657 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1658 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1659 		break;
1660 	case CHIP_PITCAIRN:
1661 		chip_name = "PITCAIRN";
1662 		new_chip_name = "pitcairn";
1663 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1664 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1665 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1666 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1667 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1668 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1669 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1670 		break;
1671 	case CHIP_VERDE:
1672 		chip_name = "VERDE";
1673 		new_chip_name = "verde";
1674 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1675 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1676 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1677 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1678 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1679 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1680 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1681 		break;
1682 	case CHIP_OLAND:
1683 		chip_name = "OLAND";
1684 		new_chip_name = "oland";
1685 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1686 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1687 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1688 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1689 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1690 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1691 		break;
1692 	case CHIP_HAINAN:
1693 		chip_name = "HAINAN";
1694 		new_chip_name = "hainan";
1695 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1696 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1697 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1698 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1699 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1700 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1701 		break;
1702 	default: BUG();
1703 	}
1704 
1705 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1706 
1707 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1708 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1709 	if (err) {
1710 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1711 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1712 		if (err)
1713 			goto out;
1714 		if (rdev->pfp_fw->datasize != pfp_req_size) {
1715 			printk(KERN_ERR
1716 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1717 			       rdev->pfp_fw->datasize, fw_name);
1718 			err = -EINVAL;
1719 			goto out;
1720 		}
1721 	} else {
1722 		err = radeon_ucode_validate(rdev->pfp_fw);
1723 		if (err) {
1724 			printk(KERN_ERR
1725 			       "si_cp: validation failed for firmware \"%s\"\n",
1726 			       fw_name);
1727 			goto out;
1728 		} else {
1729 			new_fw++;
1730 		}
1731 	}
1732 
1733 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
1734 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1735 	if (err) {
1736 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1737 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1738 		if (err)
1739 			goto out;
1740 		if (rdev->me_fw->datasize != me_req_size) {
1741 			printk(KERN_ERR
1742 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1743 			       rdev->me_fw->datasize, fw_name);
1744 			err = -EINVAL;
1745 		}
1746 	} else {
1747 		err = radeon_ucode_validate(rdev->me_fw);
1748 		if (err) {
1749 			printk(KERN_ERR
1750 			       "si_cp: validation failed for firmware \"%s\"\n",
1751 			       fw_name);
1752 			goto out;
1753 		} else {
1754 			new_fw++;
1755 		}
1756 	}
1757 
1758 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
1759 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1760 	if (err) {
1761 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1762 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1763 		if (err)
1764 			goto out;
1765 		if (rdev->ce_fw->datasize != ce_req_size) {
1766 			printk(KERN_ERR
1767 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1768 			       rdev->ce_fw->datasize, fw_name);
1769 			err = -EINVAL;
1770 		}
1771 	} else {
1772 		err = radeon_ucode_validate(rdev->ce_fw);
1773 		if (err) {
1774 			printk(KERN_ERR
1775 			       "si_cp: validation failed for firmware \"%s\"\n",
1776 			       fw_name);
1777 			goto out;
1778 		} else {
1779 			new_fw++;
1780 		}
1781 	}
1782 
1783 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
1784 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1785 	if (err) {
1786 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
1787 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1788 		if (err)
1789 			goto out;
1790 		if (rdev->rlc_fw->datasize != rlc_req_size) {
1791 			printk(KERN_ERR
1792 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1793 			       rdev->rlc_fw->datasize, fw_name);
1794 			err = -EINVAL;
1795 		}
1796 	} else {
1797 		err = radeon_ucode_validate(rdev->rlc_fw);
1798 		if (err) {
1799 			printk(KERN_ERR
1800 			       "si_cp: validation failed for firmware \"%s\"\n",
1801 			       fw_name);
1802 			goto out;
1803 		} else {
1804 			new_fw++;
1805 		}
1806 	}
1807 
1808 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
1809 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1810 	if (err) {
1811 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
1812 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1813 		if (err) {
1814 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1815 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1816 			if (err)
1817 				goto out;
1818 		}
1819 		if ((rdev->mc_fw->datasize != mc_req_size) &&
1820 		    (rdev->mc_fw->datasize != mc2_req_size)) {
1821 			printk(KERN_ERR
1822 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1823 			       rdev->mc_fw->datasize, fw_name);
1824 			err = -EINVAL;
1825 		}
1826 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
1827 	} else {
1828 		err = radeon_ucode_validate(rdev->mc_fw);
1829 		if (err) {
1830 			printk(KERN_ERR
1831 			       "si_cp: validation failed for firmware \"%s\"\n",
1832 			       fw_name);
1833 			goto out;
1834 		} else {
1835 			new_fw++;
1836 		}
1837 	}
1838 
1839 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
1840 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1841 	if (err) {
1842 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1843 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1844 		if (err) {
1845 			printk(KERN_ERR
1846 			       "smc: error loading firmware \"%s\"\n",
1847 			       fw_name);
1848 			release_firmware(rdev->smc_fw);
1849 			rdev->smc_fw = NULL;
1850 			err = 0;
1851 		} else if (rdev->smc_fw->datasize != smc_req_size) {
1852 			printk(KERN_ERR
1853 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1854 			       rdev->smc_fw->datasize, fw_name);
1855 			err = -EINVAL;
1856 		}
1857 	} else {
1858 		err = radeon_ucode_validate(rdev->smc_fw);
1859 		if (err) {
1860 			printk(KERN_ERR
1861 			       "si_cp: validation failed for firmware \"%s\"\n",
1862 			       fw_name);
1863 			goto out;
1864 		} else {
1865 			new_fw++;
1866 		}
1867 	}
1868 
1869 	if (new_fw == 0) {
1870 		rdev->new_fw = false;
1871 	} else if (new_fw < 6) {
1872 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1873 		err = -EINVAL;
1874 	} else {
1875 		rdev->new_fw = true;
1876 	}
1877 out:
1878 	if (err) {
1879 		if (err != -EINVAL)
1880 			printk(KERN_ERR
1881 			       "si_cp: Failed to load firmware \"%s\"\n",
1882 			       fw_name);
1883 		release_firmware(rdev->pfp_fw);
1884 		rdev->pfp_fw = NULL;
1885 		release_firmware(rdev->me_fw);
1886 		rdev->me_fw = NULL;
1887 		release_firmware(rdev->ce_fw);
1888 		rdev->ce_fw = NULL;
1889 		release_firmware(rdev->rlc_fw);
1890 		rdev->rlc_fw = NULL;
1891 		release_firmware(rdev->mc_fw);
1892 		rdev->mc_fw = NULL;
1893 		release_firmware(rdev->smc_fw);
1894 		rdev->smc_fw = NULL;
1895 	}
1896 	return err;
1897 }
1898 
1899 /**
1900  * si_fini_microcode - drop the firmwares image references
1901  *
1902  * @rdev: radeon_device pointer
1903  *
1904  * Drop the pfp, me, rlc, mc and ce firmware image references.
1905  * Called at driver shutdown.
1906  */
1907 static void si_fini_microcode(struct radeon_device *rdev)
1908 {
1909 	release_firmware(rdev->pfp_fw);
1910 	rdev->pfp_fw = NULL;
1911 	release_firmware(rdev->me_fw);
1912 	rdev->me_fw = NULL;
1913 	release_firmware(rdev->rlc_fw);
1914 	rdev->rlc_fw = NULL;
1915 	release_firmware(rdev->mc_fw);
1916 	rdev->mc_fw = NULL;
1917 	release_firmware(rdev->smc_fw);
1918 	rdev->smc_fw = NULL;
1919 	release_firmware(rdev->ce_fw);
1920 	rdev->ce_fw = NULL;
1921 }
1922 
1923 /* watermark setup */
1924 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1925 				   struct radeon_crtc *radeon_crtc,
1926 				   struct drm_display_mode *mode,
1927 				   struct drm_display_mode *other_mode)
1928 {
1929 	u32 tmp, buffer_alloc, i;
1930 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1931 	/*
1932 	 * Line Buffer Setup
1933 	 * There are 3 line buffers, each one shared by 2 display controllers.
1934 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1935 	 * the display controllers.  The paritioning is done via one of four
1936 	 * preset allocations specified in bits 21:20:
1937 	 *  0 - half lb
1938 	 *  2 - whole lb, other crtc must be disabled
1939 	 */
1940 	/* this can get tricky if we have two large displays on a paired group
1941 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1942 	 * non-linked crtcs for maximum line buffer allocation.
1943 	 */
1944 	if (radeon_crtc->base.enabled && mode) {
1945 		if (other_mode) {
1946 			tmp = 0; /* 1/2 */
1947 			buffer_alloc = 1;
1948 		} else {
1949 			tmp = 2; /* whole */
1950 			buffer_alloc = 2;
1951 		}
1952 	} else {
1953 		tmp = 0;
1954 		buffer_alloc = 0;
1955 	}
1956 
1957 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1958 	       DC_LB_MEMORY_CONFIG(tmp));
1959 
1960 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1961 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1962 	for (i = 0; i < rdev->usec_timeout; i++) {
1963 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1964 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1965 			break;
1966 		udelay(1);
1967 	}
1968 
1969 	if (radeon_crtc->base.enabled && mode) {
1970 		switch (tmp) {
1971 		case 0:
1972 		default:
1973 			return 4096 * 2;
1974 		case 2:
1975 			return 8192 * 2;
1976 		}
1977 	}
1978 
1979 	/* controller not enabled, so no lb used */
1980 	return 0;
1981 }
1982 
1983 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1984 {
1985 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1986 
1987 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1988 	case 0:
1989 	default:
1990 		return 1;
1991 	case 1:
1992 		return 2;
1993 	case 2:
1994 		return 4;
1995 	case 3:
1996 		return 8;
1997 	case 4:
1998 		return 3;
1999 	case 5:
2000 		return 6;
2001 	case 6:
2002 		return 10;
2003 	case 7:
2004 		return 12;
2005 	case 8:
2006 		return 16;
2007 	}
2008 }
2009 
2010 struct dce6_wm_params {
2011 	u32 dram_channels; /* number of dram channels */
2012 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2013 	u32 sclk;          /* engine clock in kHz */
2014 	u32 disp_clk;      /* display clock in kHz */
2015 	u32 src_width;     /* viewport width */
2016 	u32 active_time;   /* active display time in ns */
2017 	u32 blank_time;    /* blank time in ns */
2018 	bool interlaced;    /* mode is interlaced */
2019 	fixed20_12 vsc;    /* vertical scale ratio */
2020 	u32 num_heads;     /* number of active crtcs */
2021 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2022 	u32 lb_size;       /* line buffer allocated to pipe */
2023 	u32 vtaps;         /* vertical scaler taps */
2024 };
2025 
2026 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2027 {
2028 	/* Calculate raw DRAM Bandwidth */
2029 	fixed20_12 dram_efficiency; /* 0.7 */
2030 	fixed20_12 yclk, dram_channels, bandwidth;
2031 	fixed20_12 a;
2032 
2033 	a.full = dfixed_const(1000);
2034 	yclk.full = dfixed_const(wm->yclk);
2035 	yclk.full = dfixed_div(yclk, a);
2036 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2037 	a.full = dfixed_const(10);
2038 	dram_efficiency.full = dfixed_const(7);
2039 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2040 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2041 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2042 
2043 	return dfixed_trunc(bandwidth);
2044 }
2045 
2046 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2047 {
2048 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2049 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2050 	fixed20_12 yclk, dram_channels, bandwidth;
2051 	fixed20_12 a;
2052 
2053 	a.full = dfixed_const(1000);
2054 	yclk.full = dfixed_const(wm->yclk);
2055 	yclk.full = dfixed_div(yclk, a);
2056 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2057 	a.full = dfixed_const(10);
2058 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2059 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2060 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2061 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2062 
2063 	return dfixed_trunc(bandwidth);
2064 }
2065 
2066 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2067 {
2068 	/* Calculate the display Data return Bandwidth */
2069 	fixed20_12 return_efficiency; /* 0.8 */
2070 	fixed20_12 sclk, bandwidth;
2071 	fixed20_12 a;
2072 
2073 	a.full = dfixed_const(1000);
2074 	sclk.full = dfixed_const(wm->sclk);
2075 	sclk.full = dfixed_div(sclk, a);
2076 	a.full = dfixed_const(10);
2077 	return_efficiency.full = dfixed_const(8);
2078 	return_efficiency.full = dfixed_div(return_efficiency, a);
2079 	a.full = dfixed_const(32);
2080 	bandwidth.full = dfixed_mul(a, sclk);
2081 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2082 
2083 	return dfixed_trunc(bandwidth);
2084 }
2085 
2086 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2087 {
2088 	return 32;
2089 }
2090 
2091 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2092 {
2093 	/* Calculate the DMIF Request Bandwidth */
2094 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2095 	fixed20_12 disp_clk, sclk, bandwidth;
2096 	fixed20_12 a, b1, b2;
2097 	u32 min_bandwidth;
2098 
2099 	a.full = dfixed_const(1000);
2100 	disp_clk.full = dfixed_const(wm->disp_clk);
2101 	disp_clk.full = dfixed_div(disp_clk, a);
2102 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2103 	b1.full = dfixed_mul(a, disp_clk);
2104 
2105 	a.full = dfixed_const(1000);
2106 	sclk.full = dfixed_const(wm->sclk);
2107 	sclk.full = dfixed_div(sclk, a);
2108 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2109 	b2.full = dfixed_mul(a, sclk);
2110 
2111 	a.full = dfixed_const(10);
2112 	disp_clk_request_efficiency.full = dfixed_const(8);
2113 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2114 
2115 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2116 
2117 	a.full = dfixed_const(min_bandwidth);
2118 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2119 
2120 	return dfixed_trunc(bandwidth);
2121 }
2122 
2123 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2124 {
2125 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2126 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2127 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2128 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2129 
2130 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2131 }
2132 
2133 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2134 {
2135 	/* Calculate the display mode Average Bandwidth
2136 	 * DisplayMode should contain the source and destination dimensions,
2137 	 * timing, etc.
2138 	 */
2139 	fixed20_12 bpp;
2140 	fixed20_12 line_time;
2141 	fixed20_12 src_width;
2142 	fixed20_12 bandwidth;
2143 	fixed20_12 a;
2144 
2145 	a.full = dfixed_const(1000);
2146 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2147 	line_time.full = dfixed_div(line_time, a);
2148 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2149 	src_width.full = dfixed_const(wm->src_width);
2150 	bandwidth.full = dfixed_mul(src_width, bpp);
2151 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2152 	bandwidth.full = dfixed_div(bandwidth, line_time);
2153 
2154 	return dfixed_trunc(bandwidth);
2155 }
2156 
2157 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2158 {
2159 	/* First calcualte the latency in ns */
2160 	u32 mc_latency = 2000; /* 2000 ns. */
2161 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2162 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2163 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2164 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2165 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2166 		(wm->num_heads * cursor_line_pair_return_time);
2167 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2168 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2169 	u32 tmp, dmif_size = 12288;
2170 	fixed20_12 a, b, c;
2171 
2172 	if (wm->num_heads == 0)
2173 		return 0;
2174 
2175 	a.full = dfixed_const(2);
2176 	b.full = dfixed_const(1);
2177 	if ((wm->vsc.full > a.full) ||
2178 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2179 	    (wm->vtaps >= 5) ||
2180 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2181 		max_src_lines_per_dst_line = 4;
2182 	else
2183 		max_src_lines_per_dst_line = 2;
2184 
2185 	a.full = dfixed_const(available_bandwidth);
2186 	b.full = dfixed_const(wm->num_heads);
2187 	a.full = dfixed_div(a, b);
2188 
2189 	b.full = dfixed_const(mc_latency + 512);
2190 	c.full = dfixed_const(wm->disp_clk);
2191 	b.full = dfixed_div(b, c);
2192 
2193 	c.full = dfixed_const(dmif_size);
2194 	b.full = dfixed_div(c, b);
2195 
2196 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2197 
2198 	b.full = dfixed_const(1000);
2199 	c.full = dfixed_const(wm->disp_clk);
2200 	b.full = dfixed_div(c, b);
2201 	c.full = dfixed_const(wm->bytes_per_pixel);
2202 	b.full = dfixed_mul(b, c);
2203 
2204 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2205 
2206 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2207 	b.full = dfixed_const(1000);
2208 	c.full = dfixed_const(lb_fill_bw);
2209 	b.full = dfixed_div(c, b);
2210 	a.full = dfixed_div(a, b);
2211 	line_fill_time = dfixed_trunc(a);
2212 
2213 	if (line_fill_time < wm->active_time)
2214 		return latency;
2215 	else
2216 		return latency + (line_fill_time - wm->active_time);
2217 
2218 }
2219 
2220 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2221 {
2222 	if (dce6_average_bandwidth(wm) <=
2223 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2224 		return true;
2225 	else
2226 		return false;
2227 };
2228 
2229 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2230 {
2231 	if (dce6_average_bandwidth(wm) <=
2232 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2233 		return true;
2234 	else
2235 		return false;
2236 };
2237 
2238 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2239 {
2240 	u32 lb_partitions = wm->lb_size / wm->src_width;
2241 	u32 line_time = wm->active_time + wm->blank_time;
2242 	u32 latency_tolerant_lines;
2243 	u32 latency_hiding;
2244 	fixed20_12 a;
2245 
2246 	a.full = dfixed_const(1);
2247 	if (wm->vsc.full > a.full)
2248 		latency_tolerant_lines = 1;
2249 	else {
2250 		if (lb_partitions <= (wm->vtaps + 1))
2251 			latency_tolerant_lines = 1;
2252 		else
2253 			latency_tolerant_lines = 2;
2254 	}
2255 
2256 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2257 
2258 	if (dce6_latency_watermark(wm) <= latency_hiding)
2259 		return true;
2260 	else
2261 		return false;
2262 }
2263 
2264 static void dce6_program_watermarks(struct radeon_device *rdev,
2265 					 struct radeon_crtc *radeon_crtc,
2266 					 u32 lb_size, u32 num_heads)
2267 {
2268 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2269 	struct dce6_wm_params wm_low, wm_high;
2270 	u32 dram_channels;
2271 	u32 pixel_period;
2272 	u32 line_time = 0;
2273 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2274 	u32 priority_a_mark = 0, priority_b_mark = 0;
2275 	u32 priority_a_cnt = PRIORITY_OFF;
2276 	u32 priority_b_cnt = PRIORITY_OFF;
2277 	u32 tmp, arb_control3;
2278 	fixed20_12 a, b, c;
2279 
2280 	if (radeon_crtc->base.enabled && num_heads && mode) {
2281 		pixel_period = 1000000 / (u32)mode->clock;
2282 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2283 		priority_a_cnt = 0;
2284 		priority_b_cnt = 0;
2285 
2286 		if (rdev->family == CHIP_ARUBA)
2287 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2288 		else
2289 			dram_channels = si_get_number_of_dram_channels(rdev);
2290 
2291 		/* watermark for high clocks */
2292 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2293 			wm_high.yclk =
2294 				radeon_dpm_get_mclk(rdev, false) * 10;
2295 			wm_high.sclk =
2296 				radeon_dpm_get_sclk(rdev, false) * 10;
2297 		} else {
2298 			wm_high.yclk = rdev->pm.current_mclk * 10;
2299 			wm_high.sclk = rdev->pm.current_sclk * 10;
2300 		}
2301 
2302 		wm_high.disp_clk = mode->clock;
2303 		wm_high.src_width = mode->crtc_hdisplay;
2304 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2305 		wm_high.blank_time = line_time - wm_high.active_time;
2306 		wm_high.interlaced = false;
2307 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2308 			wm_high.interlaced = true;
2309 		wm_high.vsc = radeon_crtc->vsc;
2310 		wm_high.vtaps = 1;
2311 		if (radeon_crtc->rmx_type != RMX_OFF)
2312 			wm_high.vtaps = 2;
2313 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2314 		wm_high.lb_size = lb_size;
2315 		wm_high.dram_channels = dram_channels;
2316 		wm_high.num_heads = num_heads;
2317 
2318 		/* watermark for low clocks */
2319 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2320 			wm_low.yclk =
2321 				radeon_dpm_get_mclk(rdev, true) * 10;
2322 			wm_low.sclk =
2323 				radeon_dpm_get_sclk(rdev, true) * 10;
2324 		} else {
2325 			wm_low.yclk = rdev->pm.current_mclk * 10;
2326 			wm_low.sclk = rdev->pm.current_sclk * 10;
2327 		}
2328 
2329 		wm_low.disp_clk = mode->clock;
2330 		wm_low.src_width = mode->crtc_hdisplay;
2331 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2332 		wm_low.blank_time = line_time - wm_low.active_time;
2333 		wm_low.interlaced = false;
2334 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2335 			wm_low.interlaced = true;
2336 		wm_low.vsc = radeon_crtc->vsc;
2337 		wm_low.vtaps = 1;
2338 		if (radeon_crtc->rmx_type != RMX_OFF)
2339 			wm_low.vtaps = 2;
2340 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2341 		wm_low.lb_size = lb_size;
2342 		wm_low.dram_channels = dram_channels;
2343 		wm_low.num_heads = num_heads;
2344 
2345 		/* set for high clocks */
2346 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2347 		/* set for low clocks */
2348 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2349 
2350 		/* possibly force display priority to high */
2351 		/* should really do this at mode validation time... */
2352 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2353 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2354 		    !dce6_check_latency_hiding(&wm_high) ||
2355 		    (rdev->disp_priority == 2)) {
2356 			DRM_DEBUG_KMS("force priority to high\n");
2357 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2358 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2359 		}
2360 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2361 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2362 		    !dce6_check_latency_hiding(&wm_low) ||
2363 		    (rdev->disp_priority == 2)) {
2364 			DRM_DEBUG_KMS("force priority to high\n");
2365 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2366 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2367 		}
2368 
2369 		a.full = dfixed_const(1000);
2370 		b.full = dfixed_const(mode->clock);
2371 		b.full = dfixed_div(b, a);
2372 		c.full = dfixed_const(latency_watermark_a);
2373 		c.full = dfixed_mul(c, b);
2374 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2375 		c.full = dfixed_div(c, a);
2376 		a.full = dfixed_const(16);
2377 		c.full = dfixed_div(c, a);
2378 		priority_a_mark = dfixed_trunc(c);
2379 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2380 
2381 		a.full = dfixed_const(1000);
2382 		b.full = dfixed_const(mode->clock);
2383 		b.full = dfixed_div(b, a);
2384 		c.full = dfixed_const(latency_watermark_b);
2385 		c.full = dfixed_mul(c, b);
2386 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2387 		c.full = dfixed_div(c, a);
2388 		a.full = dfixed_const(16);
2389 		c.full = dfixed_div(c, a);
2390 		priority_b_mark = dfixed_trunc(c);
2391 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2392 
2393 		/* Save number of lines the linebuffer leads before the scanout */
2394 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2395 	}
2396 
2397 	/* select wm A */
2398 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2399 	tmp = arb_control3;
2400 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2401 	tmp |= LATENCY_WATERMARK_MASK(1);
2402 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2403 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2404 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2405 		LATENCY_HIGH_WATERMARK(line_time)));
2406 	/* select wm B */
2407 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2408 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2409 	tmp |= LATENCY_WATERMARK_MASK(2);
2410 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2411 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2412 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2413 		LATENCY_HIGH_WATERMARK(line_time)));
2414 	/* restore original selection */
2415 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2416 
2417 	/* write the priority marks */
2418 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2419 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2420 
2421 	/* save values for DPM */
2422 	radeon_crtc->line_time = line_time;
2423 	radeon_crtc->wm_high = latency_watermark_a;
2424 	radeon_crtc->wm_low = latency_watermark_b;
2425 }
2426 
2427 void dce6_bandwidth_update(struct radeon_device *rdev)
2428 {
2429 	struct drm_display_mode *mode0 = NULL;
2430 	struct drm_display_mode *mode1 = NULL;
2431 	u32 num_heads = 0, lb_size;
2432 	int i;
2433 
2434 	if (!rdev->mode_info.mode_config_initialized)
2435 		return;
2436 
2437 	radeon_update_display_priority(rdev);
2438 
2439 	for (i = 0; i < rdev->num_crtc; i++) {
2440 		if (rdev->mode_info.crtcs[i]->base.enabled)
2441 			num_heads++;
2442 	}
2443 	for (i = 0; i < rdev->num_crtc; i += 2) {
2444 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2445 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2446 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2447 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2448 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2449 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2450 	}
2451 }
2452 
2453 /*
2454  * Core functions
2455  */
2456 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2457 {
2458 	u32 *tile = rdev->config.si.tile_mode_array;
2459 	const u32 num_tile_mode_states =
2460 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2461 	u32 reg_offset, split_equal_to_row_size;
2462 
2463 	switch (rdev->config.si.mem_row_size_in_kb) {
2464 	case 1:
2465 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2466 		break;
2467 	case 2:
2468 	default:
2469 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2470 		break;
2471 	case 4:
2472 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2473 		break;
2474 	}
2475 
2476 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2477 		tile[reg_offset] = 0;
2478 
2479 	switch(rdev->family) {
2480 	case CHIP_TAHITI:
2481 	case CHIP_PITCAIRN:
2482 		/* non-AA compressed depth or any compressed stencil */
2483 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2485 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2486 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2487 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2488 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2490 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2491 		/* 2xAA/4xAA compressed depth only */
2492 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2494 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2495 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2496 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2497 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2499 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2500 		/* 8xAA compressed depth only */
2501 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2502 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2503 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2504 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2505 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2506 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2508 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2509 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2510 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2512 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2513 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2514 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2515 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2517 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2518 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2519 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2520 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2521 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2522 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2523 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2524 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2526 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2527 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2528 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2530 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2531 			   TILE_SPLIT(split_equal_to_row_size) |
2532 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2533 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2535 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2536 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2537 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2539 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2540 			   TILE_SPLIT(split_equal_to_row_size) |
2541 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2542 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2543 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2544 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2545 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2546 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2547 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2548 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2549 			   TILE_SPLIT(split_equal_to_row_size) |
2550 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2551 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2553 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2554 		/* 1D and 1D Array Surfaces */
2555 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2556 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2558 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2559 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2560 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2562 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2563 		/* Displayable maps. */
2564 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2565 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2567 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2568 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2569 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2572 		/* Display 8bpp. */
2573 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2575 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2576 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2577 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2578 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2580 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2581 		/* Display 16bpp. */
2582 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2584 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2585 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2586 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2587 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2589 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2590 		/* Display 32bpp. */
2591 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2593 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2594 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2595 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2596 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2598 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2599 		/* Thin. */
2600 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2601 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2602 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2603 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2604 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2605 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2607 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2608 		/* Thin 8 bpp. */
2609 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2611 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2612 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2613 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2614 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2616 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2617 		/* Thin 16 bpp. */
2618 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2620 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2621 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2622 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2623 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2625 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2626 		/* Thin 32 bpp. */
2627 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2629 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2630 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2631 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2632 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2635 		/* Thin 64 bpp. */
2636 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2637 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2638 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2639 			   TILE_SPLIT(split_equal_to_row_size) |
2640 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2641 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2643 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2644 		/* 8 bpp PRT. */
2645 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2646 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2647 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2648 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2649 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2650 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2651 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2653 		/* 16 bpp PRT */
2654 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2656 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2657 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2658 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2659 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2662 		/* 32 bpp PRT */
2663 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2664 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2665 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2666 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2667 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2668 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2670 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2671 		/* 64 bpp PRT */
2672 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2674 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2675 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2676 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2677 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2678 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2679 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2680 		/* 128 bpp PRT */
2681 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2683 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2684 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2685 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2686 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2689 
2690 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2691 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2692 		break;
2693 
2694 	case CHIP_VERDE:
2695 	case CHIP_OLAND:
2696 	case CHIP_HAINAN:
2697 		/* non-AA compressed depth or any compressed stencil */
2698 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2700 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2701 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2702 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2703 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2705 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2706 		/* 2xAA/4xAA compressed depth only */
2707 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2709 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2710 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2711 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2712 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2714 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2715 		/* 8xAA compressed depth only */
2716 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2717 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2718 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2719 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2720 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2721 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2723 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2724 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2725 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2727 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2728 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2729 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2730 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2732 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2733 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2734 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2735 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2736 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2737 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2738 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2739 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2741 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2742 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2743 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2744 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2745 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2746 			   TILE_SPLIT(split_equal_to_row_size) |
2747 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2748 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2750 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2751 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2752 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2754 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755 			   TILE_SPLIT(split_equal_to_row_size) |
2756 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2757 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2759 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2760 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2761 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2763 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2764 			   TILE_SPLIT(split_equal_to_row_size) |
2765 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2766 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2768 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2769 		/* 1D and 1D Array Surfaces */
2770 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2771 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2772 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2773 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2774 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2775 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2777 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2778 		/* Displayable maps. */
2779 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2780 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2781 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2783 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2784 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2786 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2787 		/* Display 8bpp. */
2788 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2792 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2793 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2794 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2795 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2796 		/* Display 16bpp. */
2797 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2799 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2800 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2801 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2802 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2804 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2805 		/* Display 32bpp. */
2806 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2808 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2809 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2810 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2811 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2812 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2813 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2814 		/* Thin. */
2815 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2817 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2818 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2819 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2820 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2822 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2823 		/* Thin 8 bpp. */
2824 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2825 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2826 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2827 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2828 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2829 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2831 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2832 		/* Thin 16 bpp. */
2833 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2835 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2836 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2837 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2838 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2840 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2841 		/* Thin 32 bpp. */
2842 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2844 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2845 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2846 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2847 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2850 		/* Thin 64 bpp. */
2851 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2852 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2853 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2854 			   TILE_SPLIT(split_equal_to_row_size) |
2855 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2856 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2859 		/* 8 bpp PRT. */
2860 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2861 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2862 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2863 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2864 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2865 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2866 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2867 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2868 		/* 16 bpp PRT */
2869 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2870 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2871 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2872 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2873 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2874 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2875 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2876 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2877 		/* 32 bpp PRT */
2878 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2880 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2881 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2882 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2883 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2884 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2885 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2886 		/* 64 bpp PRT */
2887 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2889 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2890 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2891 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2892 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2893 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2894 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2895 		/* 128 bpp PRT */
2896 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2898 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2899 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2900 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2901 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2902 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2903 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2904 
2905 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2906 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2907 		break;
2908 
2909 	default:
2910 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2911 	}
2912 }
2913 
2914 static void si_select_se_sh(struct radeon_device *rdev,
2915 			    u32 se_num, u32 sh_num)
2916 {
2917 	u32 data = INSTANCE_BROADCAST_WRITES;
2918 
2919 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2920 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2921 	else if (se_num == 0xffffffff)
2922 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2923 	else if (sh_num == 0xffffffff)
2924 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2925 	else
2926 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2927 	WREG32(GRBM_GFX_INDEX, data);
2928 }
2929 
2930 static u32 si_create_bitmask(u32 bit_width)
2931 {
2932 	u32 i, mask = 0;
2933 
2934 	for (i = 0; i < bit_width; i++) {
2935 		mask <<= 1;
2936 		mask |= 1;
2937 	}
2938 	return mask;
2939 }
2940 
2941 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2942 {
2943 	u32 data, mask;
2944 
2945 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2946 	if (data & 1)
2947 		data &= INACTIVE_CUS_MASK;
2948 	else
2949 		data = 0;
2950 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2951 
2952 	data >>= INACTIVE_CUS_SHIFT;
2953 
2954 	mask = si_create_bitmask(cu_per_sh);
2955 
2956 	return ~data & mask;
2957 }
2958 
2959 static void si_setup_spi(struct radeon_device *rdev,
2960 			 u32 se_num, u32 sh_per_se,
2961 			 u32 cu_per_sh)
2962 {
2963 	int i, j, k;
2964 	u32 data, mask, active_cu;
2965 
2966 	for (i = 0; i < se_num; i++) {
2967 		for (j = 0; j < sh_per_se; j++) {
2968 			si_select_se_sh(rdev, i, j);
2969 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2970 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2971 
2972 			mask = 1;
2973 			for (k = 0; k < 16; k++) {
2974 				mask <<= k;
2975 				if (active_cu & mask) {
2976 					data &= ~mask;
2977 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2978 					break;
2979 				}
2980 			}
2981 		}
2982 	}
2983 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2984 }
2985 
2986 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2987 			      u32 max_rb_num_per_se,
2988 			      u32 sh_per_se)
2989 {
2990 	u32 data, mask;
2991 
2992 	data = RREG32(CC_RB_BACKEND_DISABLE);
2993 	if (data & 1)
2994 		data &= BACKEND_DISABLE_MASK;
2995 	else
2996 		data = 0;
2997 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2998 
2999 	data >>= BACKEND_DISABLE_SHIFT;
3000 
3001 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3002 
3003 	return data & mask;
3004 }
3005 
3006 static void si_setup_rb(struct radeon_device *rdev,
3007 			u32 se_num, u32 sh_per_se,
3008 			u32 max_rb_num_per_se)
3009 {
3010 	int i, j;
3011 	u32 data, mask;
3012 	u32 disabled_rbs = 0;
3013 	u32 enabled_rbs = 0;
3014 
3015 	for (i = 0; i < se_num; i++) {
3016 		for (j = 0; j < sh_per_se; j++) {
3017 			si_select_se_sh(rdev, i, j);
3018 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3019 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3020 		}
3021 	}
3022 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3023 
3024 	mask = 1;
3025 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3026 		if (!(disabled_rbs & mask))
3027 			enabled_rbs |= mask;
3028 		mask <<= 1;
3029 	}
3030 
3031 	rdev->config.si.backend_enable_mask = enabled_rbs;
3032 
3033 	for (i = 0; i < se_num; i++) {
3034 		si_select_se_sh(rdev, i, 0xffffffff);
3035 		data = 0;
3036 		for (j = 0; j < sh_per_se; j++) {
3037 			switch (enabled_rbs & 3) {
3038 			case 1:
3039 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3040 				break;
3041 			case 2:
3042 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3043 				break;
3044 			case 3:
3045 			default:
3046 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3047 				break;
3048 			}
3049 			enabled_rbs >>= 2;
3050 		}
3051 		WREG32(PA_SC_RASTER_CONFIG, data);
3052 	}
3053 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3054 }
3055 
3056 static void si_gpu_init(struct radeon_device *rdev)
3057 {
3058 	u32 gb_addr_config = 0;
3059 	u32 mc_shared_chmap, mc_arb_ramcfg;
3060 	u32 sx_debug_1;
3061 	u32 hdp_host_path_cntl;
3062 	u32 tmp;
3063 	int i, j;
3064 
3065 	switch (rdev->family) {
3066 	case CHIP_TAHITI:
3067 		rdev->config.si.max_shader_engines = 2;
3068 		rdev->config.si.max_tile_pipes = 12;
3069 		rdev->config.si.max_cu_per_sh = 8;
3070 		rdev->config.si.max_sh_per_se = 2;
3071 		rdev->config.si.max_backends_per_se = 4;
3072 		rdev->config.si.max_texture_channel_caches = 12;
3073 		rdev->config.si.max_gprs = 256;
3074 		rdev->config.si.max_gs_threads = 32;
3075 		rdev->config.si.max_hw_contexts = 8;
3076 
3077 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3078 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3079 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3080 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3081 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3082 		break;
3083 	case CHIP_PITCAIRN:
3084 		rdev->config.si.max_shader_engines = 2;
3085 		rdev->config.si.max_tile_pipes = 8;
3086 		rdev->config.si.max_cu_per_sh = 5;
3087 		rdev->config.si.max_sh_per_se = 2;
3088 		rdev->config.si.max_backends_per_se = 4;
3089 		rdev->config.si.max_texture_channel_caches = 8;
3090 		rdev->config.si.max_gprs = 256;
3091 		rdev->config.si.max_gs_threads = 32;
3092 		rdev->config.si.max_hw_contexts = 8;
3093 
3094 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3095 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3096 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3097 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3098 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3099 		break;
3100 	case CHIP_VERDE:
3101 	default:
3102 		rdev->config.si.max_shader_engines = 1;
3103 		rdev->config.si.max_tile_pipes = 4;
3104 		rdev->config.si.max_cu_per_sh = 5;
3105 		rdev->config.si.max_sh_per_se = 2;
3106 		rdev->config.si.max_backends_per_se = 4;
3107 		rdev->config.si.max_texture_channel_caches = 4;
3108 		rdev->config.si.max_gprs = 256;
3109 		rdev->config.si.max_gs_threads = 32;
3110 		rdev->config.si.max_hw_contexts = 8;
3111 
3112 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3113 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3114 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3115 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3116 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3117 		break;
3118 	case CHIP_OLAND:
3119 		rdev->config.si.max_shader_engines = 1;
3120 		rdev->config.si.max_tile_pipes = 4;
3121 		rdev->config.si.max_cu_per_sh = 6;
3122 		rdev->config.si.max_sh_per_se = 1;
3123 		rdev->config.si.max_backends_per_se = 2;
3124 		rdev->config.si.max_texture_channel_caches = 4;
3125 		rdev->config.si.max_gprs = 256;
3126 		rdev->config.si.max_gs_threads = 16;
3127 		rdev->config.si.max_hw_contexts = 8;
3128 
3129 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3130 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3131 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3132 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3133 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3134 		break;
3135 	case CHIP_HAINAN:
3136 		rdev->config.si.max_shader_engines = 1;
3137 		rdev->config.si.max_tile_pipes = 4;
3138 		rdev->config.si.max_cu_per_sh = 5;
3139 		rdev->config.si.max_sh_per_se = 1;
3140 		rdev->config.si.max_backends_per_se = 1;
3141 		rdev->config.si.max_texture_channel_caches = 2;
3142 		rdev->config.si.max_gprs = 256;
3143 		rdev->config.si.max_gs_threads = 16;
3144 		rdev->config.si.max_hw_contexts = 8;
3145 
3146 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3147 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3148 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3149 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3150 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3151 		break;
3152 	}
3153 
3154 	/* Initialize HDP */
3155 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3156 		WREG32((0x2c14 + j), 0x00000000);
3157 		WREG32((0x2c18 + j), 0x00000000);
3158 		WREG32((0x2c1c + j), 0x00000000);
3159 		WREG32((0x2c20 + j), 0x00000000);
3160 		WREG32((0x2c24 + j), 0x00000000);
3161 	}
3162 
3163 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3164 	WREG32(SRBM_INT_CNTL, 1);
3165 	WREG32(SRBM_INT_ACK, 1);
3166 
3167 	evergreen_fix_pci_max_read_req_size(rdev);
3168 
3169 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3170 
3171 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3172 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3173 
3174 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3175 	rdev->config.si.mem_max_burst_length_bytes = 256;
3176 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3177 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3178 	if (rdev->config.si.mem_row_size_in_kb > 4)
3179 		rdev->config.si.mem_row_size_in_kb = 4;
3180 	/* XXX use MC settings? */
3181 	rdev->config.si.shader_engine_tile_size = 32;
3182 	rdev->config.si.num_gpus = 1;
3183 	rdev->config.si.multi_gpu_tile_size = 64;
3184 
3185 	/* fix up row size */
3186 	gb_addr_config &= ~ROW_SIZE_MASK;
3187 	switch (rdev->config.si.mem_row_size_in_kb) {
3188 	case 1:
3189 	default:
3190 		gb_addr_config |= ROW_SIZE(0);
3191 		break;
3192 	case 2:
3193 		gb_addr_config |= ROW_SIZE(1);
3194 		break;
3195 	case 4:
3196 		gb_addr_config |= ROW_SIZE(2);
3197 		break;
3198 	}
3199 
3200 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3201 	 * not have bank info, so create a custom tiling dword.
3202 	 * bits 3:0   num_pipes
3203 	 * bits 7:4   num_banks
3204 	 * bits 11:8  group_size
3205 	 * bits 15:12 row_size
3206 	 */
3207 	rdev->config.si.tile_config = 0;
3208 	switch (rdev->config.si.num_tile_pipes) {
3209 	case 1:
3210 		rdev->config.si.tile_config |= (0 << 0);
3211 		break;
3212 	case 2:
3213 		rdev->config.si.tile_config |= (1 << 0);
3214 		break;
3215 	case 4:
3216 		rdev->config.si.tile_config |= (2 << 0);
3217 		break;
3218 	case 8:
3219 	default:
3220 		/* XXX what about 12? */
3221 		rdev->config.si.tile_config |= (3 << 0);
3222 		break;
3223 	}
3224 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3225 	case 0: /* four banks */
3226 		rdev->config.si.tile_config |= 0 << 4;
3227 		break;
3228 	case 1: /* eight banks */
3229 		rdev->config.si.tile_config |= 1 << 4;
3230 		break;
3231 	case 2: /* sixteen banks */
3232 	default:
3233 		rdev->config.si.tile_config |= 2 << 4;
3234 		break;
3235 	}
3236 	rdev->config.si.tile_config |=
3237 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3238 	rdev->config.si.tile_config |=
3239 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3240 
3241 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3242 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3243 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3244 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3245 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3246 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3247 	if (rdev->has_uvd) {
3248 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3249 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3250 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3251 	}
3252 
3253 	si_tiling_mode_table_init(rdev);
3254 
3255 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3256 		    rdev->config.si.max_sh_per_se,
3257 		    rdev->config.si.max_backends_per_se);
3258 
3259 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3260 		     rdev->config.si.max_sh_per_se,
3261 		     rdev->config.si.max_cu_per_sh);
3262 
3263 	rdev->config.si.active_cus = 0;
3264 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3265 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3266 			rdev->config.si.active_cus +=
3267 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3268 		}
3269 	}
3270 
3271 	/* set HW defaults for 3D engine */
3272 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3273 				     ROQ_IB2_START(0x2b)));
3274 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3275 
3276 	sx_debug_1 = RREG32(SX_DEBUG_1);
3277 	WREG32(SX_DEBUG_1, sx_debug_1);
3278 
3279 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3280 
3281 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3282 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3283 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3284 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3285 
3286 	WREG32(VGT_NUM_INSTANCES, 1);
3287 
3288 	WREG32(CP_PERFMON_CNTL, 0);
3289 
3290 	WREG32(SQ_CONFIG, 0);
3291 
3292 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3293 					  FORCE_EOV_MAX_REZ_CNT(255)));
3294 
3295 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3296 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3297 
3298 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3299 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3300 
3301 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3302 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3303 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3304 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3305 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3306 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3307 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3308 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3309 
3310 	tmp = RREG32(HDP_MISC_CNTL);
3311 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3312 	WREG32(HDP_MISC_CNTL, tmp);
3313 
3314 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3315 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3316 
3317 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3318 
3319 	udelay(50);
3320 }
3321 
3322 /*
3323  * GPU scratch registers helpers function.
3324  */
3325 static void si_scratch_init(struct radeon_device *rdev)
3326 {
3327 	int i;
3328 
3329 	rdev->scratch.num_reg = 7;
3330 	rdev->scratch.reg_base = SCRATCH_REG0;
3331 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3332 		rdev->scratch.free[i] = true;
3333 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3334 	}
3335 }
3336 
3337 void si_fence_ring_emit(struct radeon_device *rdev,
3338 			struct radeon_fence *fence)
3339 {
3340 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3341 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3342 
3343 	/* flush read cache over gart */
3344 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3345 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3346 	radeon_ring_write(ring, 0);
3347 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3348 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3349 			  PACKET3_TC_ACTION_ENA |
3350 			  PACKET3_SH_KCACHE_ACTION_ENA |
3351 			  PACKET3_SH_ICACHE_ACTION_ENA);
3352 	radeon_ring_write(ring, 0xFFFFFFFF);
3353 	radeon_ring_write(ring, 0);
3354 	radeon_ring_write(ring, 10); /* poll interval */
3355 	/* EVENT_WRITE_EOP - flush caches, send int */
3356 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3357 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3358 	radeon_ring_write(ring, lower_32_bits(addr));
3359 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3360 	radeon_ring_write(ring, fence->seq);
3361 	radeon_ring_write(ring, 0);
3362 }
3363 
3364 /*
3365  * IB stuff
3366  */
3367 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3368 {
3369 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3370 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3371 	u32 header;
3372 
3373 	if (ib->is_const_ib) {
3374 		/* set switch buffer packet before const IB */
3375 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3376 		radeon_ring_write(ring, 0);
3377 
3378 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3379 	} else {
3380 		u32 next_rptr;
3381 		if (ring->rptr_save_reg) {
3382 			next_rptr = ring->wptr + 3 + 4 + 8;
3383 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3384 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3385 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3386 			radeon_ring_write(ring, next_rptr);
3387 		} else if (rdev->wb.enabled) {
3388 			next_rptr = ring->wptr + 5 + 4 + 8;
3389 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3390 			radeon_ring_write(ring, (1 << 8));
3391 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3392 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3393 			radeon_ring_write(ring, next_rptr);
3394 		}
3395 
3396 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3397 	}
3398 
3399 	radeon_ring_write(ring, header);
3400 	radeon_ring_write(ring,
3401 #ifdef __BIG_ENDIAN
3402 			  (2 << 0) |
3403 #endif
3404 			  (ib->gpu_addr & 0xFFFFFFFC));
3405 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3406 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3407 
3408 	if (!ib->is_const_ib) {
3409 		/* flush read cache over gart for this vmid */
3410 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3411 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3412 		radeon_ring_write(ring, vm_id);
3413 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3414 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3415 				  PACKET3_TC_ACTION_ENA |
3416 				  PACKET3_SH_KCACHE_ACTION_ENA |
3417 				  PACKET3_SH_ICACHE_ACTION_ENA);
3418 		radeon_ring_write(ring, 0xFFFFFFFF);
3419 		radeon_ring_write(ring, 0);
3420 		radeon_ring_write(ring, 10); /* poll interval */
3421 	}
3422 }
3423 
3424 /*
3425  * CP.
3426  */
3427 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3428 {
3429 	if (enable)
3430 		WREG32(CP_ME_CNTL, 0);
3431 	else {
3432 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3433 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3434 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3435 		WREG32(SCRATCH_UMSK, 0);
3436 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3437 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3438 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3439 	}
3440 	udelay(50);
3441 }
3442 
3443 static int si_cp_load_microcode(struct radeon_device *rdev)
3444 {
3445 	int i;
3446 
3447 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3448 		return -EINVAL;
3449 
3450 	si_cp_enable(rdev, false);
3451 
3452 	if (rdev->new_fw) {
3453 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3454 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3455 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3456 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3457 		const struct gfx_firmware_header_v1_0 *me_hdr =
3458 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3459 		const __le32 *fw_data;
3460 		u32 fw_size;
3461 
3462 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3463 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3464 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3465 
3466 		/* PFP */
3467 		fw_data = (const __le32 *)
3468 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3469 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3470 		WREG32(CP_PFP_UCODE_ADDR, 0);
3471 		for (i = 0; i < fw_size; i++)
3472 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3473 		WREG32(CP_PFP_UCODE_ADDR, 0);
3474 
3475 		/* CE */
3476 		fw_data = (const __le32 *)
3477 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3478 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3479 		WREG32(CP_CE_UCODE_ADDR, 0);
3480 		for (i = 0; i < fw_size; i++)
3481 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3482 		WREG32(CP_CE_UCODE_ADDR, 0);
3483 
3484 		/* ME */
3485 		fw_data = (const __be32 *)
3486 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3487 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3488 		WREG32(CP_ME_RAM_WADDR, 0);
3489 		for (i = 0; i < fw_size; i++)
3490 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3491 		WREG32(CP_ME_RAM_WADDR, 0);
3492 	} else {
3493 		const __be32 *fw_data;
3494 
3495 		/* PFP */
3496 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3497 		WREG32(CP_PFP_UCODE_ADDR, 0);
3498 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3499 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3500 		WREG32(CP_PFP_UCODE_ADDR, 0);
3501 
3502 		/* CE */
3503 		fw_data = (const __be32 *)rdev->ce_fw->data;
3504 		WREG32(CP_CE_UCODE_ADDR, 0);
3505 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3506 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3507 		WREG32(CP_CE_UCODE_ADDR, 0);
3508 
3509 		/* ME */
3510 		fw_data = (const __be32 *)rdev->me_fw->data;
3511 		WREG32(CP_ME_RAM_WADDR, 0);
3512 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3513 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3514 		WREG32(CP_ME_RAM_WADDR, 0);
3515 	}
3516 
3517 	WREG32(CP_PFP_UCODE_ADDR, 0);
3518 	WREG32(CP_CE_UCODE_ADDR, 0);
3519 	WREG32(CP_ME_RAM_WADDR, 0);
3520 	WREG32(CP_ME_RAM_RADDR, 0);
3521 	return 0;
3522 }
3523 
3524 static int si_cp_start(struct radeon_device *rdev)
3525 {
3526 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3527 	int r, i;
3528 
3529 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3530 	if (r) {
3531 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3532 		return r;
3533 	}
3534 	/* init the CP */
3535 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3536 	radeon_ring_write(ring, 0x1);
3537 	radeon_ring_write(ring, 0x0);
3538 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3539 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3540 	radeon_ring_write(ring, 0);
3541 	radeon_ring_write(ring, 0);
3542 
3543 	/* init the CE partitions */
3544 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3545 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3546 	radeon_ring_write(ring, 0xc000);
3547 	radeon_ring_write(ring, 0xe000);
3548 	radeon_ring_unlock_commit(rdev, ring, false);
3549 
3550 	si_cp_enable(rdev, true);
3551 
3552 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3553 	if (r) {
3554 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3555 		return r;
3556 	}
3557 
3558 	/* setup clear context state */
3559 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3560 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3561 
3562 	for (i = 0; i < si_default_size; i++)
3563 		radeon_ring_write(ring, si_default_state[i]);
3564 
3565 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3566 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3567 
3568 	/* set clear context state */
3569 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3570 	radeon_ring_write(ring, 0);
3571 
3572 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3573 	radeon_ring_write(ring, 0x00000316);
3574 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3575 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3576 
3577 	radeon_ring_unlock_commit(rdev, ring, false);
3578 
3579 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3580 		ring = &rdev->ring[i];
3581 		r = radeon_ring_lock(rdev, ring, 2);
3582 
3583 		/* clear the compute context state */
3584 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3585 		radeon_ring_write(ring, 0);
3586 
3587 		radeon_ring_unlock_commit(rdev, ring, false);
3588 	}
3589 
3590 	return 0;
3591 }
3592 
3593 static void si_cp_fini(struct radeon_device *rdev)
3594 {
3595 	struct radeon_ring *ring;
3596 	si_cp_enable(rdev, false);
3597 
3598 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3599 	radeon_ring_fini(rdev, ring);
3600 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3601 
3602 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3603 	radeon_ring_fini(rdev, ring);
3604 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3605 
3606 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3607 	radeon_ring_fini(rdev, ring);
3608 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3609 }
3610 
3611 static int si_cp_resume(struct radeon_device *rdev)
3612 {
3613 	struct radeon_ring *ring;
3614 	u32 tmp;
3615 	u32 rb_bufsz;
3616 	int r;
3617 
3618 	si_enable_gui_idle_interrupt(rdev, false);
3619 
3620 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3621 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3622 
3623 	/* Set the write pointer delay */
3624 	WREG32(CP_RB_WPTR_DELAY, 0);
3625 
3626 	WREG32(CP_DEBUG, 0);
3627 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3628 
3629 	/* ring 0 - compute and gfx */
3630 	/* Set ring buffer size */
3631 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3632 	rb_bufsz = order_base_2(ring->ring_size / 8);
3633 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3634 #ifdef __BIG_ENDIAN
3635 	tmp |= BUF_SWAP_32BIT;
3636 #endif
3637 	WREG32(CP_RB0_CNTL, tmp);
3638 
3639 	/* Initialize the ring buffer's read and write pointers */
3640 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3641 	ring->wptr = 0;
3642 	WREG32(CP_RB0_WPTR, ring->wptr);
3643 
3644 	/* set the wb address whether it's enabled or not */
3645 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3646 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3647 
3648 	if (rdev->wb.enabled)
3649 		WREG32(SCRATCH_UMSK, 0xff);
3650 	else {
3651 		tmp |= RB_NO_UPDATE;
3652 		WREG32(SCRATCH_UMSK, 0);
3653 	}
3654 
3655 	mdelay(1);
3656 	WREG32(CP_RB0_CNTL, tmp);
3657 
3658 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3659 
3660 	/* ring1  - compute only */
3661 	/* Set ring buffer size */
3662 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3663 	rb_bufsz = order_base_2(ring->ring_size / 8);
3664 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3665 #ifdef __BIG_ENDIAN
3666 	tmp |= BUF_SWAP_32BIT;
3667 #endif
3668 	WREG32(CP_RB1_CNTL, tmp);
3669 
3670 	/* Initialize the ring buffer's read and write pointers */
3671 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3672 	ring->wptr = 0;
3673 	WREG32(CP_RB1_WPTR, ring->wptr);
3674 
3675 	/* set the wb address whether it's enabled or not */
3676 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3677 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3678 
3679 	mdelay(1);
3680 	WREG32(CP_RB1_CNTL, tmp);
3681 
3682 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3683 
3684 	/* ring2 - compute only */
3685 	/* Set ring buffer size */
3686 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3687 	rb_bufsz = order_base_2(ring->ring_size / 8);
3688 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3689 #ifdef __BIG_ENDIAN
3690 	tmp |= BUF_SWAP_32BIT;
3691 #endif
3692 	WREG32(CP_RB2_CNTL, tmp);
3693 
3694 	/* Initialize the ring buffer's read and write pointers */
3695 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3696 	ring->wptr = 0;
3697 	WREG32(CP_RB2_WPTR, ring->wptr);
3698 
3699 	/* set the wb address whether it's enabled or not */
3700 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3701 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3702 
3703 	mdelay(1);
3704 	WREG32(CP_RB2_CNTL, tmp);
3705 
3706 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3707 
3708 	/* start the rings */
3709 	si_cp_start(rdev);
3710 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3711 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3712 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3713 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3714 	if (r) {
3715 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3716 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3717 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3718 		return r;
3719 	}
3720 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3721 	if (r) {
3722 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3723 	}
3724 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3725 	if (r) {
3726 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3727 	}
3728 
3729 	si_enable_gui_idle_interrupt(rdev, true);
3730 
3731 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3732 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3733 
3734 	return 0;
3735 }
3736 
3737 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3738 {
3739 	u32 reset_mask = 0;
3740 	u32 tmp;
3741 
3742 	/* GRBM_STATUS */
3743 	tmp = RREG32(GRBM_STATUS);
3744 	if (tmp & (PA_BUSY | SC_BUSY |
3745 		   BCI_BUSY | SX_BUSY |
3746 		   TA_BUSY | VGT_BUSY |
3747 		   DB_BUSY | CB_BUSY |
3748 		   GDS_BUSY | SPI_BUSY |
3749 		   IA_BUSY | IA_BUSY_NO_DMA))
3750 		reset_mask |= RADEON_RESET_GFX;
3751 
3752 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3753 		   CP_BUSY | CP_COHERENCY_BUSY))
3754 		reset_mask |= RADEON_RESET_CP;
3755 
3756 	if (tmp & GRBM_EE_BUSY)
3757 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3758 
3759 	/* GRBM_STATUS2 */
3760 	tmp = RREG32(GRBM_STATUS2);
3761 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3762 		reset_mask |= RADEON_RESET_RLC;
3763 
3764 	/* DMA_STATUS_REG 0 */
3765 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3766 	if (!(tmp & DMA_IDLE))
3767 		reset_mask |= RADEON_RESET_DMA;
3768 
3769 	/* DMA_STATUS_REG 1 */
3770 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3771 	if (!(tmp & DMA_IDLE))
3772 		reset_mask |= RADEON_RESET_DMA1;
3773 
3774 	/* SRBM_STATUS2 */
3775 	tmp = RREG32(SRBM_STATUS2);
3776 	if (tmp & DMA_BUSY)
3777 		reset_mask |= RADEON_RESET_DMA;
3778 
3779 	if (tmp & DMA1_BUSY)
3780 		reset_mask |= RADEON_RESET_DMA1;
3781 
3782 	/* SRBM_STATUS */
3783 	tmp = RREG32(SRBM_STATUS);
3784 
3785 	if (tmp & IH_BUSY)
3786 		reset_mask |= RADEON_RESET_IH;
3787 
3788 	if (tmp & SEM_BUSY)
3789 		reset_mask |= RADEON_RESET_SEM;
3790 
3791 	if (tmp & GRBM_RQ_PENDING)
3792 		reset_mask |= RADEON_RESET_GRBM;
3793 
3794 	if (tmp & VMC_BUSY)
3795 		reset_mask |= RADEON_RESET_VMC;
3796 
3797 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3798 		   MCC_BUSY | MCD_BUSY))
3799 		reset_mask |= RADEON_RESET_MC;
3800 
3801 	if (evergreen_is_display_hung(rdev))
3802 		reset_mask |= RADEON_RESET_DISPLAY;
3803 
3804 	/* VM_L2_STATUS */
3805 	tmp = RREG32(VM_L2_STATUS);
3806 	if (tmp & L2_BUSY)
3807 		reset_mask |= RADEON_RESET_VMC;
3808 
3809 	/* Skip MC reset as it's mostly likely not hung, just busy */
3810 	if (reset_mask & RADEON_RESET_MC) {
3811 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3812 		reset_mask &= ~RADEON_RESET_MC;
3813 	}
3814 
3815 	return reset_mask;
3816 }
3817 
3818 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3819 {
3820 	struct evergreen_mc_save save;
3821 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3822 	u32 tmp;
3823 
3824 	if (reset_mask == 0)
3825 		return;
3826 
3827 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3828 
3829 	evergreen_print_gpu_status_regs(rdev);
3830 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3831 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3832 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3833 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3834 
3835 	/* disable PG/CG */
3836 	si_fini_pg(rdev);
3837 	si_fini_cg(rdev);
3838 
3839 	/* stop the rlc */
3840 	si_rlc_stop(rdev);
3841 
3842 	/* Disable CP parsing/prefetching */
3843 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3844 
3845 	if (reset_mask & RADEON_RESET_DMA) {
3846 		/* dma0 */
3847 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3848 		tmp &= ~DMA_RB_ENABLE;
3849 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3850 	}
3851 	if (reset_mask & RADEON_RESET_DMA1) {
3852 		/* dma1 */
3853 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3854 		tmp &= ~DMA_RB_ENABLE;
3855 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3856 	}
3857 
3858 	udelay(50);
3859 
3860 	evergreen_mc_stop(rdev, &save);
3861 	if (evergreen_mc_wait_for_idle(rdev)) {
3862 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3863 	}
3864 
3865 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3866 		grbm_soft_reset = SOFT_RESET_CB |
3867 			SOFT_RESET_DB |
3868 			SOFT_RESET_GDS |
3869 			SOFT_RESET_PA |
3870 			SOFT_RESET_SC |
3871 			SOFT_RESET_BCI |
3872 			SOFT_RESET_SPI |
3873 			SOFT_RESET_SX |
3874 			SOFT_RESET_TC |
3875 			SOFT_RESET_TA |
3876 			SOFT_RESET_VGT |
3877 			SOFT_RESET_IA;
3878 	}
3879 
3880 	if (reset_mask & RADEON_RESET_CP) {
3881 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3882 
3883 		srbm_soft_reset |= SOFT_RESET_GRBM;
3884 	}
3885 
3886 	if (reset_mask & RADEON_RESET_DMA)
3887 		srbm_soft_reset |= SOFT_RESET_DMA;
3888 
3889 	if (reset_mask & RADEON_RESET_DMA1)
3890 		srbm_soft_reset |= SOFT_RESET_DMA1;
3891 
3892 	if (reset_mask & RADEON_RESET_DISPLAY)
3893 		srbm_soft_reset |= SOFT_RESET_DC;
3894 
3895 	if (reset_mask & RADEON_RESET_RLC)
3896 		grbm_soft_reset |= SOFT_RESET_RLC;
3897 
3898 	if (reset_mask & RADEON_RESET_SEM)
3899 		srbm_soft_reset |= SOFT_RESET_SEM;
3900 
3901 	if (reset_mask & RADEON_RESET_IH)
3902 		srbm_soft_reset |= SOFT_RESET_IH;
3903 
3904 	if (reset_mask & RADEON_RESET_GRBM)
3905 		srbm_soft_reset |= SOFT_RESET_GRBM;
3906 
3907 	if (reset_mask & RADEON_RESET_VMC)
3908 		srbm_soft_reset |= SOFT_RESET_VMC;
3909 
3910 	if (reset_mask & RADEON_RESET_MC)
3911 		srbm_soft_reset |= SOFT_RESET_MC;
3912 
3913 	if (grbm_soft_reset) {
3914 		tmp = RREG32(GRBM_SOFT_RESET);
3915 		tmp |= grbm_soft_reset;
3916 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3917 		WREG32(GRBM_SOFT_RESET, tmp);
3918 		tmp = RREG32(GRBM_SOFT_RESET);
3919 
3920 		udelay(50);
3921 
3922 		tmp &= ~grbm_soft_reset;
3923 		WREG32(GRBM_SOFT_RESET, tmp);
3924 		tmp = RREG32(GRBM_SOFT_RESET);
3925 	}
3926 
3927 	if (srbm_soft_reset) {
3928 		tmp = RREG32(SRBM_SOFT_RESET);
3929 		tmp |= srbm_soft_reset;
3930 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3931 		WREG32(SRBM_SOFT_RESET, tmp);
3932 		tmp = RREG32(SRBM_SOFT_RESET);
3933 
3934 		udelay(50);
3935 
3936 		tmp &= ~srbm_soft_reset;
3937 		WREG32(SRBM_SOFT_RESET, tmp);
3938 		tmp = RREG32(SRBM_SOFT_RESET);
3939 	}
3940 
3941 	/* Wait a little for things to settle down */
3942 	udelay(50);
3943 
3944 	evergreen_mc_resume(rdev, &save);
3945 	udelay(50);
3946 
3947 	evergreen_print_gpu_status_regs(rdev);
3948 }
3949 
3950 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3951 {
3952 	u32 tmp, i;
3953 
3954 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3955 	tmp |= SPLL_BYPASS_EN;
3956 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3957 
3958 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3959 	tmp |= SPLL_CTLREQ_CHG;
3960 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3961 
3962 	for (i = 0; i < rdev->usec_timeout; i++) {
3963 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3964 			break;
3965 		udelay(1);
3966 	}
3967 
3968 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3969 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3970 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3971 
3972 	tmp = RREG32(MPLL_CNTL_MODE);
3973 	tmp &= ~MPLL_MCLK_SEL;
3974 	WREG32(MPLL_CNTL_MODE, tmp);
3975 }
3976 
3977 static void si_spll_powerdown(struct radeon_device *rdev)
3978 {
3979 	u32 tmp;
3980 
3981 	tmp = RREG32(SPLL_CNTL_MODE);
3982 	tmp |= SPLL_SW_DIR_CONTROL;
3983 	WREG32(SPLL_CNTL_MODE, tmp);
3984 
3985 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3986 	tmp |= SPLL_RESET;
3987 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3988 
3989 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3990 	tmp |= SPLL_SLEEP;
3991 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3992 
3993 	tmp = RREG32(SPLL_CNTL_MODE);
3994 	tmp &= ~SPLL_SW_DIR_CONTROL;
3995 	WREG32(SPLL_CNTL_MODE, tmp);
3996 }
3997 
3998 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3999 {
4000 	struct evergreen_mc_save save;
4001 	u32 tmp, i;
4002 
4003 	dev_info(rdev->dev, "GPU pci config reset\n");
4004 
4005 	/* disable dpm? */
4006 
4007 	/* disable cg/pg */
4008 	si_fini_pg(rdev);
4009 	si_fini_cg(rdev);
4010 
4011 	/* Disable CP parsing/prefetching */
4012 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4013 	/* dma0 */
4014 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4015 	tmp &= ~DMA_RB_ENABLE;
4016 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4017 	/* dma1 */
4018 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4019 	tmp &= ~DMA_RB_ENABLE;
4020 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4021 	/* XXX other engines? */
4022 
4023 	/* halt the rlc, disable cp internal ints */
4024 	si_rlc_stop(rdev);
4025 
4026 	udelay(50);
4027 
4028 	/* disable mem access */
4029 	evergreen_mc_stop(rdev, &save);
4030 	if (evergreen_mc_wait_for_idle(rdev)) {
4031 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4032 	}
4033 
4034 	/* set mclk/sclk to bypass */
4035 	si_set_clk_bypass_mode(rdev);
4036 	/* powerdown spll */
4037 	si_spll_powerdown(rdev);
4038 	/* disable BM */
4039 	pci_clear_master(rdev->pdev);
4040 	/* reset */
4041 	radeon_pci_config_reset(rdev);
4042 	/* wait for asic to come out of reset */
4043 	for (i = 0; i < rdev->usec_timeout; i++) {
4044 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4045 			break;
4046 		udelay(1);
4047 	}
4048 }
4049 
4050 int si_asic_reset(struct radeon_device *rdev, bool hard)
4051 {
4052 	u32 reset_mask;
4053 
4054 	if (hard) {
4055 		si_gpu_pci_config_reset(rdev);
4056 		return 0;
4057 	}
4058 
4059 	reset_mask = si_gpu_check_soft_reset(rdev);
4060 
4061 	if (reset_mask)
4062 		r600_set_bios_scratch_engine_hung(rdev, true);
4063 
4064 	/* try soft reset */
4065 	si_gpu_soft_reset(rdev, reset_mask);
4066 
4067 	reset_mask = si_gpu_check_soft_reset(rdev);
4068 
4069 	/* try pci config reset */
4070 	if (reset_mask && radeon_hard_reset)
4071 		si_gpu_pci_config_reset(rdev);
4072 
4073 	reset_mask = si_gpu_check_soft_reset(rdev);
4074 
4075 	if (!reset_mask)
4076 		r600_set_bios_scratch_engine_hung(rdev, false);
4077 
4078 	return 0;
4079 }
4080 
4081 /**
4082  * si_gfx_is_lockup - Check if the GFX engine is locked up
4083  *
4084  * @rdev: radeon_device pointer
4085  * @ring: radeon_ring structure holding ring information
4086  *
4087  * Check if the GFX engine is locked up.
4088  * Returns true if the engine appears to be locked up, false if not.
4089  */
4090 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4091 {
4092 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4093 
4094 	if (!(reset_mask & (RADEON_RESET_GFX |
4095 			    RADEON_RESET_COMPUTE |
4096 			    RADEON_RESET_CP))) {
4097 		radeon_ring_lockup_update(rdev, ring);
4098 		return false;
4099 	}
4100 	return radeon_ring_test_lockup(rdev, ring);
4101 }
4102 
4103 /* MC */
4104 static void si_mc_program(struct radeon_device *rdev)
4105 {
4106 	struct evergreen_mc_save save;
4107 	u32 tmp;
4108 	int i, j;
4109 
4110 	/* Initialize HDP */
4111 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4112 		WREG32((0x2c14 + j), 0x00000000);
4113 		WREG32((0x2c18 + j), 0x00000000);
4114 		WREG32((0x2c1c + j), 0x00000000);
4115 		WREG32((0x2c20 + j), 0x00000000);
4116 		WREG32((0x2c24 + j), 0x00000000);
4117 	}
4118 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4119 
4120 	evergreen_mc_stop(rdev, &save);
4121 	if (radeon_mc_wait_for_idle(rdev)) {
4122 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4123 	}
4124 	if (!ASIC_IS_NODCE(rdev))
4125 		/* Lockout access through VGA aperture*/
4126 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4127 	/* Update configuration */
4128 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4129 	       rdev->mc.vram_start >> 12);
4130 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4131 	       rdev->mc.vram_end >> 12);
4132 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4133 	       rdev->vram_scratch.gpu_addr >> 12);
4134 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4135 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4136 	WREG32(MC_VM_FB_LOCATION, tmp);
4137 	/* XXX double check these! */
4138 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4139 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4140 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4141 	WREG32(MC_VM_AGP_BASE, 0);
4142 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4143 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4144 	if (radeon_mc_wait_for_idle(rdev)) {
4145 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4146 	}
4147 	evergreen_mc_resume(rdev, &save);
4148 	if (!ASIC_IS_NODCE(rdev)) {
4149 		/* we need to own VRAM, so turn off the VGA renderer here
4150 		 * to stop it overwriting our objects */
4151 		rv515_vga_render_disable(rdev);
4152 	}
4153 }
4154 
4155 void si_vram_gtt_location(struct radeon_device *rdev,
4156 			  struct radeon_mc *mc)
4157 {
4158 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4159 		/* leave room for at least 1024M GTT */
4160 		dev_warn(rdev->dev, "limiting VRAM\n");
4161 		mc->real_vram_size = 0xFFC0000000ULL;
4162 		mc->mc_vram_size = 0xFFC0000000ULL;
4163 	}
4164 	radeon_vram_location(rdev, &rdev->mc, 0);
4165 	rdev->mc.gtt_base_align = 0;
4166 	radeon_gtt_location(rdev, mc);
4167 }
4168 
4169 static int si_mc_init(struct radeon_device *rdev)
4170 {
4171 	u32 tmp;
4172 	int chansize, numchan;
4173 
4174 	/* Get VRAM informations */
4175 	rdev->mc.vram_is_ddr = true;
4176 	tmp = RREG32(MC_ARB_RAMCFG);
4177 	if (tmp & CHANSIZE_OVERRIDE) {
4178 		chansize = 16;
4179 	} else if (tmp & CHANSIZE_MASK) {
4180 		chansize = 64;
4181 	} else {
4182 		chansize = 32;
4183 	}
4184 	tmp = RREG32(MC_SHARED_CHMAP);
4185 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4186 	case 0:
4187 	default:
4188 		numchan = 1;
4189 		break;
4190 	case 1:
4191 		numchan = 2;
4192 		break;
4193 	case 2:
4194 		numchan = 4;
4195 		break;
4196 	case 3:
4197 		numchan = 8;
4198 		break;
4199 	case 4:
4200 		numchan = 3;
4201 		break;
4202 	case 5:
4203 		numchan = 6;
4204 		break;
4205 	case 6:
4206 		numchan = 10;
4207 		break;
4208 	case 7:
4209 		numchan = 12;
4210 		break;
4211 	case 8:
4212 		numchan = 16;
4213 		break;
4214 	}
4215 	rdev->mc.vram_width = numchan * chansize;
4216 	/* Could aper size report 0 ? */
4217 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4218 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4219 	/* size in MB on si */
4220 	tmp = RREG32(CONFIG_MEMSIZE);
4221 	/* some boards may have garbage in the upper 16 bits */
4222 	if (tmp & 0xffff0000) {
4223 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4224 		if (tmp & 0xffff)
4225 			tmp &= 0xffff;
4226 	}
4227 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4228 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4229 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4230 	si_vram_gtt_location(rdev, &rdev->mc);
4231 	radeon_update_bandwidth_info(rdev);
4232 
4233 	return 0;
4234 }
4235 
4236 /*
4237  * GART
4238  */
4239 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4240 {
4241 	/* flush hdp cache */
4242 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4243 
4244 	/* bits 0-15 are the VM contexts0-15 */
4245 	WREG32(VM_INVALIDATE_REQUEST, 1);
4246 }
4247 
4248 static int si_pcie_gart_enable(struct radeon_device *rdev)
4249 {
4250 	int r, i;
4251 
4252 	if (rdev->gart.robj == NULL) {
4253 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4254 		return -EINVAL;
4255 	}
4256 	r = radeon_gart_table_vram_pin(rdev);
4257 	if (r)
4258 		return r;
4259 	/* Setup TLB control */
4260 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4261 	       (0xA << 7) |
4262 	       ENABLE_L1_TLB |
4263 	       ENABLE_L1_FRAGMENT_PROCESSING |
4264 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4265 	       ENABLE_ADVANCED_DRIVER_MODEL |
4266 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4267 	/* Setup L2 cache */
4268 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4269 	       ENABLE_L2_FRAGMENT_PROCESSING |
4270 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4271 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4272 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4273 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4274 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4275 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4276 	       BANK_SELECT(4) |
4277 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4278 	/* setup context0 */
4279 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4280 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4281 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4282 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4283 			(u32)(rdev->dummy_page.addr >> 12));
4284 	WREG32(VM_CONTEXT0_CNTL2, 0);
4285 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4286 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4287 
4288 	WREG32(0x15D4, 0);
4289 	WREG32(0x15D8, 0);
4290 	WREG32(0x15DC, 0);
4291 
4292 	/* empty context1-15 */
4293 	/* set vm size, must be a multiple of 4 */
4294 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4295 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4296 	/* Assign the pt base to something valid for now; the pts used for
4297 	 * the VMs are determined by the application and setup and assigned
4298 	 * on the fly in the vm part of radeon_gart.c
4299 	 */
4300 	for (i = 1; i < 16; i++) {
4301 		if (i < 8)
4302 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4303 			       rdev->vm_manager.saved_table_addr[i]);
4304 		else
4305 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4306 			       rdev->vm_manager.saved_table_addr[i]);
4307 	}
4308 
4309 	/* enable context1-15 */
4310 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4311 	       (u32)(rdev->dummy_page.addr >> 12));
4312 	WREG32(VM_CONTEXT1_CNTL2, 4);
4313 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4314 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4315 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4316 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4317 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4318 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4319 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4320 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4321 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4322 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4323 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4324 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4325 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4326 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4327 
4328 	si_pcie_gart_tlb_flush(rdev);
4329 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4330 		 (unsigned)(rdev->mc.gtt_size >> 20),
4331 		 (unsigned long long)rdev->gart.table_addr);
4332 	rdev->gart.ready = true;
4333 	return 0;
4334 }
4335 
4336 static void si_pcie_gart_disable(struct radeon_device *rdev)
4337 {
4338 	unsigned i;
4339 
4340 	for (i = 1; i < 16; ++i) {
4341 		uint32_t reg;
4342 		if (i < 8)
4343 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4344 		else
4345 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4346 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4347 	}
4348 
4349 	/* Disable all tables */
4350 	WREG32(VM_CONTEXT0_CNTL, 0);
4351 	WREG32(VM_CONTEXT1_CNTL, 0);
4352 	/* Setup TLB control */
4353 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4354 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4355 	/* Setup L2 cache */
4356 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4357 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4358 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4359 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4360 	WREG32(VM_L2_CNTL2, 0);
4361 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4362 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4363 	radeon_gart_table_vram_unpin(rdev);
4364 }
4365 
4366 static void si_pcie_gart_fini(struct radeon_device *rdev)
4367 {
4368 	si_pcie_gart_disable(rdev);
4369 	radeon_gart_table_vram_free(rdev);
4370 	radeon_gart_fini(rdev);
4371 }
4372 
4373 /* vm parser */
4374 static bool si_vm_reg_valid(u32 reg)
4375 {
4376 	/* context regs are fine */
4377 	if (reg >= 0x28000)
4378 		return true;
4379 
4380 	/* shader regs are also fine */
4381 	if (reg >= 0xB000 && reg < 0xC000)
4382 		return true;
4383 
4384 	/* check config regs */
4385 	switch (reg) {
4386 	case GRBM_GFX_INDEX:
4387 	case CP_STRMOUT_CNTL:
4388 	case VGT_VTX_VECT_EJECT_REG:
4389 	case VGT_CACHE_INVALIDATION:
4390 	case VGT_ESGS_RING_SIZE:
4391 	case VGT_GSVS_RING_SIZE:
4392 	case VGT_GS_VERTEX_REUSE:
4393 	case VGT_PRIMITIVE_TYPE:
4394 	case VGT_INDEX_TYPE:
4395 	case VGT_NUM_INDICES:
4396 	case VGT_NUM_INSTANCES:
4397 	case VGT_TF_RING_SIZE:
4398 	case VGT_HS_OFFCHIP_PARAM:
4399 	case VGT_TF_MEMORY_BASE:
4400 	case PA_CL_ENHANCE:
4401 	case PA_SU_LINE_STIPPLE_VALUE:
4402 	case PA_SC_LINE_STIPPLE_STATE:
4403 	case PA_SC_ENHANCE:
4404 	case SQC_CACHES:
4405 	case SPI_STATIC_THREAD_MGMT_1:
4406 	case SPI_STATIC_THREAD_MGMT_2:
4407 	case SPI_STATIC_THREAD_MGMT_3:
4408 	case SPI_PS_MAX_WAVE_ID:
4409 	case SPI_CONFIG_CNTL:
4410 	case SPI_CONFIG_CNTL_1:
4411 	case TA_CNTL_AUX:
4412 		return true;
4413 	default:
4414 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4415 		return false;
4416 	}
4417 }
4418 
4419 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4420 				  u32 *ib, struct radeon_cs_packet *pkt)
4421 {
4422 	switch (pkt->opcode) {
4423 	case PACKET3_NOP:
4424 	case PACKET3_SET_BASE:
4425 	case PACKET3_SET_CE_DE_COUNTERS:
4426 	case PACKET3_LOAD_CONST_RAM:
4427 	case PACKET3_WRITE_CONST_RAM:
4428 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4429 	case PACKET3_DUMP_CONST_RAM:
4430 	case PACKET3_INCREMENT_CE_COUNTER:
4431 	case PACKET3_WAIT_ON_DE_COUNTER:
4432 	case PACKET3_CE_WRITE:
4433 		break;
4434 	default:
4435 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4436 		return -EINVAL;
4437 	}
4438 	return 0;
4439 }
4440 
4441 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4442 {
4443 	u32 start_reg, reg, i;
4444 	u32 command = ib[idx + 4];
4445 	u32 info = ib[idx + 1];
4446 	u32 idx_value = ib[idx];
4447 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4448 		/* src address space is register */
4449 		if (((info & 0x60000000) >> 29) == 0) {
4450 			start_reg = idx_value << 2;
4451 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4452 				reg = start_reg;
4453 				if (!si_vm_reg_valid(reg)) {
4454 					DRM_ERROR("CP DMA Bad SRC register\n");
4455 					return -EINVAL;
4456 				}
4457 			} else {
4458 				for (i = 0; i < (command & 0x1fffff); i++) {
4459 					reg = start_reg + (4 * i);
4460 					if (!si_vm_reg_valid(reg)) {
4461 						DRM_ERROR("CP DMA Bad SRC register\n");
4462 						return -EINVAL;
4463 					}
4464 				}
4465 			}
4466 		}
4467 	}
4468 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4469 		/* dst address space is register */
4470 		if (((info & 0x00300000) >> 20) == 0) {
4471 			start_reg = ib[idx + 2];
4472 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4473 				reg = start_reg;
4474 				if (!si_vm_reg_valid(reg)) {
4475 					DRM_ERROR("CP DMA Bad DST register\n");
4476 					return -EINVAL;
4477 				}
4478 			} else {
4479 				for (i = 0; i < (command & 0x1fffff); i++) {
4480 					reg = start_reg + (4 * i);
4481 				if (!si_vm_reg_valid(reg)) {
4482 						DRM_ERROR("CP DMA Bad DST register\n");
4483 						return -EINVAL;
4484 					}
4485 				}
4486 			}
4487 		}
4488 	}
4489 	return 0;
4490 }
4491 
4492 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4493 				   u32 *ib, struct radeon_cs_packet *pkt)
4494 {
4495 	int r;
4496 	u32 idx = pkt->idx + 1;
4497 	u32 idx_value = ib[idx];
4498 	u32 start_reg, end_reg, reg, i;
4499 
4500 	switch (pkt->opcode) {
4501 	case PACKET3_NOP:
4502 	case PACKET3_SET_BASE:
4503 	case PACKET3_CLEAR_STATE:
4504 	case PACKET3_INDEX_BUFFER_SIZE:
4505 	case PACKET3_DISPATCH_DIRECT:
4506 	case PACKET3_DISPATCH_INDIRECT:
4507 	case PACKET3_ALLOC_GDS:
4508 	case PACKET3_WRITE_GDS_RAM:
4509 	case PACKET3_ATOMIC_GDS:
4510 	case PACKET3_ATOMIC:
4511 	case PACKET3_OCCLUSION_QUERY:
4512 	case PACKET3_SET_PREDICATION:
4513 	case PACKET3_COND_EXEC:
4514 	case PACKET3_PRED_EXEC:
4515 	case PACKET3_DRAW_INDIRECT:
4516 	case PACKET3_DRAW_INDEX_INDIRECT:
4517 	case PACKET3_INDEX_BASE:
4518 	case PACKET3_DRAW_INDEX_2:
4519 	case PACKET3_CONTEXT_CONTROL:
4520 	case PACKET3_INDEX_TYPE:
4521 	case PACKET3_DRAW_INDIRECT_MULTI:
4522 	case PACKET3_DRAW_INDEX_AUTO:
4523 	case PACKET3_DRAW_INDEX_IMMD:
4524 	case PACKET3_NUM_INSTANCES:
4525 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4526 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4527 	case PACKET3_DRAW_INDEX_OFFSET_2:
4528 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4529 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4530 	case PACKET3_MPEG_INDEX:
4531 	case PACKET3_WAIT_REG_MEM:
4532 	case PACKET3_MEM_WRITE:
4533 	case PACKET3_PFP_SYNC_ME:
4534 	case PACKET3_SURFACE_SYNC:
4535 	case PACKET3_EVENT_WRITE:
4536 	case PACKET3_EVENT_WRITE_EOP:
4537 	case PACKET3_EVENT_WRITE_EOS:
4538 	case PACKET3_SET_CONTEXT_REG:
4539 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4540 	case PACKET3_SET_SH_REG:
4541 	case PACKET3_SET_SH_REG_OFFSET:
4542 	case PACKET3_INCREMENT_DE_COUNTER:
4543 	case PACKET3_WAIT_ON_CE_COUNTER:
4544 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4545 	case PACKET3_ME_WRITE:
4546 		break;
4547 	case PACKET3_COPY_DATA:
4548 		if ((idx_value & 0xf00) == 0) {
4549 			reg = ib[idx + 3] * 4;
4550 			if (!si_vm_reg_valid(reg))
4551 				return -EINVAL;
4552 		}
4553 		break;
4554 	case PACKET3_WRITE_DATA:
4555 		if ((idx_value & 0xf00) == 0) {
4556 			start_reg = ib[idx + 1] * 4;
4557 			if (idx_value & 0x10000) {
4558 				if (!si_vm_reg_valid(start_reg))
4559 					return -EINVAL;
4560 			} else {
4561 				for (i = 0; i < (pkt->count - 2); i++) {
4562 					reg = start_reg + (4 * i);
4563 					if (!si_vm_reg_valid(reg))
4564 						return -EINVAL;
4565 				}
4566 			}
4567 		}
4568 		break;
4569 	case PACKET3_COND_WRITE:
4570 		if (idx_value & 0x100) {
4571 			reg = ib[idx + 5] * 4;
4572 			if (!si_vm_reg_valid(reg))
4573 				return -EINVAL;
4574 		}
4575 		break;
4576 	case PACKET3_COPY_DW:
4577 		if (idx_value & 0x2) {
4578 			reg = ib[idx + 3] * 4;
4579 			if (!si_vm_reg_valid(reg))
4580 				return -EINVAL;
4581 		}
4582 		break;
4583 	case PACKET3_SET_CONFIG_REG:
4584 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4585 		end_reg = 4 * pkt->count + start_reg - 4;
4586 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4587 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4588 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4589 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4590 			return -EINVAL;
4591 		}
4592 		for (i = 0; i < pkt->count; i++) {
4593 			reg = start_reg + (4 * i);
4594 			if (!si_vm_reg_valid(reg))
4595 				return -EINVAL;
4596 		}
4597 		break;
4598 	case PACKET3_CP_DMA:
4599 		r = si_vm_packet3_cp_dma_check(ib, idx);
4600 		if (r)
4601 			return r;
4602 		break;
4603 	default:
4604 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4605 		return -EINVAL;
4606 	}
4607 	return 0;
4608 }
4609 
4610 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4611 				       u32 *ib, struct radeon_cs_packet *pkt)
4612 {
4613 	int r;
4614 	u32 idx = pkt->idx + 1;
4615 	u32 idx_value = ib[idx];
4616 	u32 start_reg, reg, i;
4617 
4618 	switch (pkt->opcode) {
4619 	case PACKET3_NOP:
4620 	case PACKET3_SET_BASE:
4621 	case PACKET3_CLEAR_STATE:
4622 	case PACKET3_DISPATCH_DIRECT:
4623 	case PACKET3_DISPATCH_INDIRECT:
4624 	case PACKET3_ALLOC_GDS:
4625 	case PACKET3_WRITE_GDS_RAM:
4626 	case PACKET3_ATOMIC_GDS:
4627 	case PACKET3_ATOMIC:
4628 	case PACKET3_OCCLUSION_QUERY:
4629 	case PACKET3_SET_PREDICATION:
4630 	case PACKET3_COND_EXEC:
4631 	case PACKET3_PRED_EXEC:
4632 	case PACKET3_CONTEXT_CONTROL:
4633 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4634 	case PACKET3_WAIT_REG_MEM:
4635 	case PACKET3_MEM_WRITE:
4636 	case PACKET3_PFP_SYNC_ME:
4637 	case PACKET3_SURFACE_SYNC:
4638 	case PACKET3_EVENT_WRITE:
4639 	case PACKET3_EVENT_WRITE_EOP:
4640 	case PACKET3_EVENT_WRITE_EOS:
4641 	case PACKET3_SET_CONTEXT_REG:
4642 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4643 	case PACKET3_SET_SH_REG:
4644 	case PACKET3_SET_SH_REG_OFFSET:
4645 	case PACKET3_INCREMENT_DE_COUNTER:
4646 	case PACKET3_WAIT_ON_CE_COUNTER:
4647 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4648 	case PACKET3_ME_WRITE:
4649 		break;
4650 	case PACKET3_COPY_DATA:
4651 		if ((idx_value & 0xf00) == 0) {
4652 			reg = ib[idx + 3] * 4;
4653 			if (!si_vm_reg_valid(reg))
4654 				return -EINVAL;
4655 		}
4656 		break;
4657 	case PACKET3_WRITE_DATA:
4658 		if ((idx_value & 0xf00) == 0) {
4659 			start_reg = ib[idx + 1] * 4;
4660 			if (idx_value & 0x10000) {
4661 				if (!si_vm_reg_valid(start_reg))
4662 					return -EINVAL;
4663 			} else {
4664 				for (i = 0; i < (pkt->count - 2); i++) {
4665 					reg = start_reg + (4 * i);
4666 					if (!si_vm_reg_valid(reg))
4667 						return -EINVAL;
4668 				}
4669 			}
4670 		}
4671 		break;
4672 	case PACKET3_COND_WRITE:
4673 		if (idx_value & 0x100) {
4674 			reg = ib[idx + 5] * 4;
4675 			if (!si_vm_reg_valid(reg))
4676 				return -EINVAL;
4677 		}
4678 		break;
4679 	case PACKET3_COPY_DW:
4680 		if (idx_value & 0x2) {
4681 			reg = ib[idx + 3] * 4;
4682 			if (!si_vm_reg_valid(reg))
4683 				return -EINVAL;
4684 		}
4685 		break;
4686 	case PACKET3_CP_DMA:
4687 		r = si_vm_packet3_cp_dma_check(ib, idx);
4688 		if (r)
4689 			return r;
4690 		break;
4691 	default:
4692 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4693 		return -EINVAL;
4694 	}
4695 	return 0;
4696 }
4697 
4698 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4699 {
4700 	int ret = 0;
4701 	u32 idx = 0, i;
4702 	struct radeon_cs_packet pkt;
4703 
4704 	do {
4705 		pkt.idx = idx;
4706 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4707 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4708 		pkt.one_reg_wr = 0;
4709 		switch (pkt.type) {
4710 		case RADEON_PACKET_TYPE0:
4711 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4712 			ret = -EINVAL;
4713 			break;
4714 		case RADEON_PACKET_TYPE2:
4715 			idx += 1;
4716 			break;
4717 		case RADEON_PACKET_TYPE3:
4718 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4719 			if (ib->is_const_ib)
4720 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4721 			else {
4722 				switch (ib->ring) {
4723 				case RADEON_RING_TYPE_GFX_INDEX:
4724 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4725 					break;
4726 				case CAYMAN_RING_TYPE_CP1_INDEX:
4727 				case CAYMAN_RING_TYPE_CP2_INDEX:
4728 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4729 					break;
4730 				default:
4731 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4732 					ret = -EINVAL;
4733 					break;
4734 				}
4735 			}
4736 			idx += pkt.count + 2;
4737 			break;
4738 		default:
4739 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4740 			ret = -EINVAL;
4741 			break;
4742 		}
4743 		if (ret) {
4744 			for (i = 0; i < ib->length_dw; i++) {
4745 				if (i == idx)
4746 					printk("\t0x%08x <---\n", ib->ptr[i]);
4747 				else
4748 					printk("\t0x%08x\n", ib->ptr[i]);
4749 			}
4750 			break;
4751 		}
4752 	} while (idx < ib->length_dw);
4753 
4754 	return ret;
4755 }
4756 
4757 /*
4758  * vm
4759  */
4760 int si_vm_init(struct radeon_device *rdev)
4761 {
4762 	/* number of VMs */
4763 	rdev->vm_manager.nvm = 16;
4764 	/* base offset of vram pages */
4765 	rdev->vm_manager.vram_base_offset = 0;
4766 
4767 	return 0;
4768 }
4769 
4770 void si_vm_fini(struct radeon_device *rdev)
4771 {
4772 }
4773 
4774 /**
4775  * si_vm_decode_fault - print human readable fault info
4776  *
4777  * @rdev: radeon_device pointer
4778  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4779  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4780  *
4781  * Print human readable fault information (SI).
4782  */
4783 static void si_vm_decode_fault(struct radeon_device *rdev,
4784 			       u32 status, u32 addr)
4785 {
4786 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4787 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4788 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4789 	char *block;
4790 
4791 	if (rdev->family == CHIP_TAHITI) {
4792 		switch (mc_id) {
4793 		case 160:
4794 		case 144:
4795 		case 96:
4796 		case 80:
4797 		case 224:
4798 		case 208:
4799 		case 32:
4800 		case 16:
4801 			block = "CB";
4802 			break;
4803 		case 161:
4804 		case 145:
4805 		case 97:
4806 		case 81:
4807 		case 225:
4808 		case 209:
4809 		case 33:
4810 		case 17:
4811 			block = "CB_FMASK";
4812 			break;
4813 		case 162:
4814 		case 146:
4815 		case 98:
4816 		case 82:
4817 		case 226:
4818 		case 210:
4819 		case 34:
4820 		case 18:
4821 			block = "CB_CMASK";
4822 			break;
4823 		case 163:
4824 		case 147:
4825 		case 99:
4826 		case 83:
4827 		case 227:
4828 		case 211:
4829 		case 35:
4830 		case 19:
4831 			block = "CB_IMMED";
4832 			break;
4833 		case 164:
4834 		case 148:
4835 		case 100:
4836 		case 84:
4837 		case 228:
4838 		case 212:
4839 		case 36:
4840 		case 20:
4841 			block = "DB";
4842 			break;
4843 		case 165:
4844 		case 149:
4845 		case 101:
4846 		case 85:
4847 		case 229:
4848 		case 213:
4849 		case 37:
4850 		case 21:
4851 			block = "DB_HTILE";
4852 			break;
4853 		case 167:
4854 		case 151:
4855 		case 103:
4856 		case 87:
4857 		case 231:
4858 		case 215:
4859 		case 39:
4860 		case 23:
4861 			block = "DB_STEN";
4862 			break;
4863 		case 72:
4864 		case 68:
4865 		case 64:
4866 		case 8:
4867 		case 4:
4868 		case 0:
4869 		case 136:
4870 		case 132:
4871 		case 128:
4872 		case 200:
4873 		case 196:
4874 		case 192:
4875 			block = "TC";
4876 			break;
4877 		case 112:
4878 		case 48:
4879 			block = "CP";
4880 			break;
4881 		case 49:
4882 		case 177:
4883 		case 50:
4884 		case 178:
4885 			block = "SH";
4886 			break;
4887 		case 53:
4888 		case 190:
4889 			block = "VGT";
4890 			break;
4891 		case 117:
4892 			block = "IH";
4893 			break;
4894 		case 51:
4895 		case 115:
4896 			block = "RLC";
4897 			break;
4898 		case 119:
4899 		case 183:
4900 			block = "DMA0";
4901 			break;
4902 		case 61:
4903 			block = "DMA1";
4904 			break;
4905 		case 248:
4906 		case 120:
4907 			block = "HDP";
4908 			break;
4909 		default:
4910 			block = "unknown";
4911 			break;
4912 		}
4913 	} else {
4914 		switch (mc_id) {
4915 		case 32:
4916 		case 16:
4917 		case 96:
4918 		case 80:
4919 		case 160:
4920 		case 144:
4921 		case 224:
4922 		case 208:
4923 			block = "CB";
4924 			break;
4925 		case 33:
4926 		case 17:
4927 		case 97:
4928 		case 81:
4929 		case 161:
4930 		case 145:
4931 		case 225:
4932 		case 209:
4933 			block = "CB_FMASK";
4934 			break;
4935 		case 34:
4936 		case 18:
4937 		case 98:
4938 		case 82:
4939 		case 162:
4940 		case 146:
4941 		case 226:
4942 		case 210:
4943 			block = "CB_CMASK";
4944 			break;
4945 		case 35:
4946 		case 19:
4947 		case 99:
4948 		case 83:
4949 		case 163:
4950 		case 147:
4951 		case 227:
4952 		case 211:
4953 			block = "CB_IMMED";
4954 			break;
4955 		case 36:
4956 		case 20:
4957 		case 100:
4958 		case 84:
4959 		case 164:
4960 		case 148:
4961 		case 228:
4962 		case 212:
4963 			block = "DB";
4964 			break;
4965 		case 37:
4966 		case 21:
4967 		case 101:
4968 		case 85:
4969 		case 165:
4970 		case 149:
4971 		case 229:
4972 		case 213:
4973 			block = "DB_HTILE";
4974 			break;
4975 		case 39:
4976 		case 23:
4977 		case 103:
4978 		case 87:
4979 		case 167:
4980 		case 151:
4981 		case 231:
4982 		case 215:
4983 			block = "DB_STEN";
4984 			break;
4985 		case 72:
4986 		case 68:
4987 		case 8:
4988 		case 4:
4989 		case 136:
4990 		case 132:
4991 		case 200:
4992 		case 196:
4993 			block = "TC";
4994 			break;
4995 		case 112:
4996 		case 48:
4997 			block = "CP";
4998 			break;
4999 		case 49:
5000 		case 177:
5001 		case 50:
5002 		case 178:
5003 			block = "SH";
5004 			break;
5005 		case 53:
5006 			block = "VGT";
5007 			break;
5008 		case 117:
5009 			block = "IH";
5010 			break;
5011 		case 51:
5012 		case 115:
5013 			block = "RLC";
5014 			break;
5015 		case 119:
5016 		case 183:
5017 			block = "DMA0";
5018 			break;
5019 		case 61:
5020 			block = "DMA1";
5021 			break;
5022 		case 248:
5023 		case 120:
5024 			block = "HDP";
5025 			break;
5026 		default:
5027 			block = "unknown";
5028 			break;
5029 		}
5030 	}
5031 
5032 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5033 	       protections, vmid, addr,
5034 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5035 	       block, mc_id);
5036 }
5037 
5038 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5039 		 unsigned vm_id, uint64_t pd_addr)
5040 {
5041 	/* write new base address */
5042 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5043 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5044 				 WRITE_DATA_DST_SEL(0)));
5045 
5046 	if (vm_id < 8) {
5047 		radeon_ring_write(ring,
5048 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5049 	} else {
5050 		radeon_ring_write(ring,
5051 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5052 	}
5053 	radeon_ring_write(ring, 0);
5054 	radeon_ring_write(ring, pd_addr >> 12);
5055 
5056 	/* flush hdp cache */
5057 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5058 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5059 				 WRITE_DATA_DST_SEL(0)));
5060 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5061 	radeon_ring_write(ring, 0);
5062 	radeon_ring_write(ring, 0x1);
5063 
5064 	/* bits 0-15 are the VM contexts0-15 */
5065 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5066 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5067 				 WRITE_DATA_DST_SEL(0)));
5068 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5069 	radeon_ring_write(ring, 0);
5070 	radeon_ring_write(ring, 1 << vm_id);
5071 
5072 	/* wait for the invalidate to complete */
5073 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5074 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5075 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5076 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5077 	radeon_ring_write(ring, 0);
5078 	radeon_ring_write(ring, 0); /* ref */
5079 	radeon_ring_write(ring, 0); /* mask */
5080 	radeon_ring_write(ring, 0x20); /* poll interval */
5081 
5082 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5083 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5084 	radeon_ring_write(ring, 0x0);
5085 }
5086 
5087 /*
5088  *  Power and clock gating
5089  */
5090 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5091 {
5092 	int i;
5093 
5094 	for (i = 0; i < rdev->usec_timeout; i++) {
5095 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5096 			break;
5097 		udelay(1);
5098 	}
5099 
5100 	for (i = 0; i < rdev->usec_timeout; i++) {
5101 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5102 			break;
5103 		udelay(1);
5104 	}
5105 }
5106 
5107 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5108 					 bool enable)
5109 {
5110 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5111 	u32 mask;
5112 	int i;
5113 
5114 	if (enable)
5115 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5116 	else
5117 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5118 	WREG32(CP_INT_CNTL_RING0, tmp);
5119 
5120 	if (!enable) {
5121 		/* read a gfx register */
5122 		tmp = RREG32(DB_DEPTH_INFO);
5123 
5124 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5125 		for (i = 0; i < rdev->usec_timeout; i++) {
5126 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5127 				break;
5128 			udelay(1);
5129 		}
5130 	}
5131 }
5132 
5133 static void si_set_uvd_dcm(struct radeon_device *rdev,
5134 			   bool sw_mode)
5135 {
5136 	u32 tmp, tmp2;
5137 
5138 	tmp = RREG32(UVD_CGC_CTRL);
5139 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5140 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5141 
5142 	if (sw_mode) {
5143 		tmp &= ~0x7ffff800;
5144 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5145 	} else {
5146 		tmp |= 0x7ffff800;
5147 		tmp2 = 0;
5148 	}
5149 
5150 	WREG32(UVD_CGC_CTRL, tmp);
5151 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5152 }
5153 
5154 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5155 {
5156 	bool hw_mode = true;
5157 
5158 	if (hw_mode) {
5159 		si_set_uvd_dcm(rdev, false);
5160 	} else {
5161 		u32 tmp = RREG32(UVD_CGC_CTRL);
5162 		tmp &= ~DCM;
5163 		WREG32(UVD_CGC_CTRL, tmp);
5164 	}
5165 }
5166 
5167 static u32 si_halt_rlc(struct radeon_device *rdev)
5168 {
5169 	u32 data, orig;
5170 
5171 	orig = data = RREG32(RLC_CNTL);
5172 
5173 	if (data & RLC_ENABLE) {
5174 		data &= ~RLC_ENABLE;
5175 		WREG32(RLC_CNTL, data);
5176 
5177 		si_wait_for_rlc_serdes(rdev);
5178 	}
5179 
5180 	return orig;
5181 }
5182 
5183 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5184 {
5185 	u32 tmp;
5186 
5187 	tmp = RREG32(RLC_CNTL);
5188 	if (tmp != rlc)
5189 		WREG32(RLC_CNTL, rlc);
5190 }
5191 
5192 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5193 {
5194 	u32 data, orig;
5195 
5196 	orig = data = RREG32(DMA_PG);
5197 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5198 		data |= PG_CNTL_ENABLE;
5199 	else
5200 		data &= ~PG_CNTL_ENABLE;
5201 	if (orig != data)
5202 		WREG32(DMA_PG, data);
5203 }
5204 
5205 static void si_init_dma_pg(struct radeon_device *rdev)
5206 {
5207 	u32 tmp;
5208 
5209 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5210 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5211 
5212 	for (tmp = 0; tmp < 5; tmp++)
5213 		WREG32(DMA_PGFSM_WRITE, 0);
5214 }
5215 
5216 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5217 			       bool enable)
5218 {
5219 	u32 tmp;
5220 
5221 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5222 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5223 		WREG32(RLC_TTOP_D, tmp);
5224 
5225 		tmp = RREG32(RLC_PG_CNTL);
5226 		tmp |= GFX_PG_ENABLE;
5227 		WREG32(RLC_PG_CNTL, tmp);
5228 
5229 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5230 		tmp |= AUTO_PG_EN;
5231 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5232 	} else {
5233 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5234 		tmp &= ~AUTO_PG_EN;
5235 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5236 
5237 		tmp = RREG32(DB_RENDER_CONTROL);
5238 	}
5239 }
5240 
5241 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5242 {
5243 	u32 tmp;
5244 
5245 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5246 
5247 	tmp = RREG32(RLC_PG_CNTL);
5248 	tmp |= GFX_PG_SRC;
5249 	WREG32(RLC_PG_CNTL, tmp);
5250 
5251 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5252 
5253 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5254 
5255 	tmp &= ~GRBM_REG_SGIT_MASK;
5256 	tmp |= GRBM_REG_SGIT(0x700);
5257 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5258 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5259 }
5260 
5261 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5262 {
5263 	u32 mask = 0, tmp, tmp1;
5264 	int i;
5265 
5266 	si_select_se_sh(rdev, se, sh);
5267 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5268 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5269 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5270 
5271 	tmp &= 0xffff0000;
5272 
5273 	tmp |= tmp1;
5274 	tmp >>= 16;
5275 
5276 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5277 		mask <<= 1;
5278 		mask |= 1;
5279 	}
5280 
5281 	return (~tmp) & mask;
5282 }
5283 
5284 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5285 {
5286 	u32 i, j, k, active_cu_number = 0;
5287 	u32 mask, counter, cu_bitmap;
5288 	u32 tmp = 0;
5289 
5290 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5291 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5292 			mask = 1;
5293 			cu_bitmap = 0;
5294 			counter  = 0;
5295 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5296 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5297 					if (counter < 2)
5298 						cu_bitmap |= mask;
5299 					counter++;
5300 				}
5301 				mask <<= 1;
5302 			}
5303 
5304 			active_cu_number += counter;
5305 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5306 		}
5307 	}
5308 
5309 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5310 
5311 	tmp = RREG32(RLC_MAX_PG_CU);
5312 	tmp &= ~MAX_PU_CU_MASK;
5313 	tmp |= MAX_PU_CU(active_cu_number);
5314 	WREG32(RLC_MAX_PG_CU, tmp);
5315 }
5316 
5317 static void si_enable_cgcg(struct radeon_device *rdev,
5318 			   bool enable)
5319 {
5320 	u32 data, orig, tmp;
5321 
5322 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5323 
5324 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5325 		si_enable_gui_idle_interrupt(rdev, true);
5326 
5327 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5328 
5329 		tmp = si_halt_rlc(rdev);
5330 
5331 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5332 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5333 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5334 
5335 		si_wait_for_rlc_serdes(rdev);
5336 
5337 		si_update_rlc(rdev, tmp);
5338 
5339 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5340 
5341 		data |= CGCG_EN | CGLS_EN;
5342 	} else {
5343 		si_enable_gui_idle_interrupt(rdev, false);
5344 
5345 		RREG32(CB_CGTT_SCLK_CTRL);
5346 		RREG32(CB_CGTT_SCLK_CTRL);
5347 		RREG32(CB_CGTT_SCLK_CTRL);
5348 		RREG32(CB_CGTT_SCLK_CTRL);
5349 
5350 		data &= ~(CGCG_EN | CGLS_EN);
5351 	}
5352 
5353 	if (orig != data)
5354 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5355 }
5356 
5357 static void si_enable_mgcg(struct radeon_device *rdev,
5358 			   bool enable)
5359 {
5360 	u32 data, orig, tmp = 0;
5361 
5362 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5363 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5364 		data = 0x96940200;
5365 		if (orig != data)
5366 			WREG32(CGTS_SM_CTRL_REG, data);
5367 
5368 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5369 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5370 			data |= CP_MEM_LS_EN;
5371 			if (orig != data)
5372 				WREG32(CP_MEM_SLP_CNTL, data);
5373 		}
5374 
5375 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5376 		data &= 0xffffffc0;
5377 		if (orig != data)
5378 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5379 
5380 		tmp = si_halt_rlc(rdev);
5381 
5382 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5383 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5384 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5385 
5386 		si_update_rlc(rdev, tmp);
5387 	} else {
5388 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5389 		data |= 0x00000003;
5390 		if (orig != data)
5391 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5392 
5393 		data = RREG32(CP_MEM_SLP_CNTL);
5394 		if (data & CP_MEM_LS_EN) {
5395 			data &= ~CP_MEM_LS_EN;
5396 			WREG32(CP_MEM_SLP_CNTL, data);
5397 		}
5398 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5399 		data |= LS_OVERRIDE | OVERRIDE;
5400 		if (orig != data)
5401 			WREG32(CGTS_SM_CTRL_REG, data);
5402 
5403 		tmp = si_halt_rlc(rdev);
5404 
5405 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5406 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5407 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5408 
5409 		si_update_rlc(rdev, tmp);
5410 	}
5411 }
5412 
5413 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5414 			       bool enable)
5415 {
5416 	u32 orig, data, tmp;
5417 
5418 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5419 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5420 		tmp |= 0x3fff;
5421 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5422 
5423 		orig = data = RREG32(UVD_CGC_CTRL);
5424 		data |= DCM;
5425 		if (orig != data)
5426 			WREG32(UVD_CGC_CTRL, data);
5427 
5428 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5429 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5430 	} else {
5431 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5432 		tmp &= ~0x3fff;
5433 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5434 
5435 		orig = data = RREG32(UVD_CGC_CTRL);
5436 		data &= ~DCM;
5437 		if (orig != data)
5438 			WREG32(UVD_CGC_CTRL, data);
5439 
5440 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5441 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5442 	}
5443 }
5444 
5445 static const u32 mc_cg_registers[] =
5446 {
5447 	MC_HUB_MISC_HUB_CG,
5448 	MC_HUB_MISC_SIP_CG,
5449 	MC_HUB_MISC_VM_CG,
5450 	MC_XPB_CLK_GAT,
5451 	ATC_MISC_CG,
5452 	MC_CITF_MISC_WR_CG,
5453 	MC_CITF_MISC_RD_CG,
5454 	MC_CITF_MISC_VM_CG,
5455 	VM_L2_CG,
5456 };
5457 
5458 static void si_enable_mc_ls(struct radeon_device *rdev,
5459 			    bool enable)
5460 {
5461 	int i;
5462 	u32 orig, data;
5463 
5464 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5465 		orig = data = RREG32(mc_cg_registers[i]);
5466 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5467 			data |= MC_LS_ENABLE;
5468 		else
5469 			data &= ~MC_LS_ENABLE;
5470 		if (data != orig)
5471 			WREG32(mc_cg_registers[i], data);
5472 	}
5473 }
5474 
5475 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5476 			       bool enable)
5477 {
5478 	int i;
5479 	u32 orig, data;
5480 
5481 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5482 		orig = data = RREG32(mc_cg_registers[i]);
5483 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5484 			data |= MC_CG_ENABLE;
5485 		else
5486 			data &= ~MC_CG_ENABLE;
5487 		if (data != orig)
5488 			WREG32(mc_cg_registers[i], data);
5489 	}
5490 }
5491 
5492 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5493 			       bool enable)
5494 {
5495 	u32 orig, data, offset;
5496 	int i;
5497 
5498 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5499 		for (i = 0; i < 2; i++) {
5500 			if (i == 0)
5501 				offset = DMA0_REGISTER_OFFSET;
5502 			else
5503 				offset = DMA1_REGISTER_OFFSET;
5504 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5505 			data &= ~MEM_POWER_OVERRIDE;
5506 			if (data != orig)
5507 				WREG32(DMA_POWER_CNTL + offset, data);
5508 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5509 		}
5510 	} else {
5511 		for (i = 0; i < 2; i++) {
5512 			if (i == 0)
5513 				offset = DMA0_REGISTER_OFFSET;
5514 			else
5515 				offset = DMA1_REGISTER_OFFSET;
5516 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5517 			data |= MEM_POWER_OVERRIDE;
5518 			if (data != orig)
5519 				WREG32(DMA_POWER_CNTL + offset, data);
5520 
5521 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5522 			data = 0xff000000;
5523 			if (data != orig)
5524 				WREG32(DMA_CLK_CTRL + offset, data);
5525 		}
5526 	}
5527 }
5528 
5529 static void si_enable_bif_mgls(struct radeon_device *rdev,
5530 			       bool enable)
5531 {
5532 	u32 orig, data;
5533 
5534 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5535 
5536 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5537 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5538 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5539 	else
5540 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5541 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5542 
5543 	if (orig != data)
5544 		WREG32_PCIE(PCIE_CNTL2, data);
5545 }
5546 
5547 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5548 			       bool enable)
5549 {
5550 	u32 orig, data;
5551 
5552 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5553 
5554 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5555 		data &= ~CLOCK_GATING_DIS;
5556 	else
5557 		data |= CLOCK_GATING_DIS;
5558 
5559 	if (orig != data)
5560 		WREG32(HDP_HOST_PATH_CNTL, data);
5561 }
5562 
5563 static void si_enable_hdp_ls(struct radeon_device *rdev,
5564 			     bool enable)
5565 {
5566 	u32 orig, data;
5567 
5568 	orig = data = RREG32(HDP_MEM_POWER_LS);
5569 
5570 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5571 		data |= HDP_LS_ENABLE;
5572 	else
5573 		data &= ~HDP_LS_ENABLE;
5574 
5575 	if (orig != data)
5576 		WREG32(HDP_MEM_POWER_LS, data);
5577 }
5578 
5579 static void si_update_cg(struct radeon_device *rdev,
5580 			 u32 block, bool enable)
5581 {
5582 	if (block & RADEON_CG_BLOCK_GFX) {
5583 		si_enable_gui_idle_interrupt(rdev, false);
5584 		/* order matters! */
5585 		if (enable) {
5586 			si_enable_mgcg(rdev, true);
5587 			si_enable_cgcg(rdev, true);
5588 		} else {
5589 			si_enable_cgcg(rdev, false);
5590 			si_enable_mgcg(rdev, false);
5591 		}
5592 		si_enable_gui_idle_interrupt(rdev, true);
5593 	}
5594 
5595 	if (block & RADEON_CG_BLOCK_MC) {
5596 		si_enable_mc_mgcg(rdev, enable);
5597 		si_enable_mc_ls(rdev, enable);
5598 	}
5599 
5600 	if (block & RADEON_CG_BLOCK_SDMA) {
5601 		si_enable_dma_mgcg(rdev, enable);
5602 	}
5603 
5604 	if (block & RADEON_CG_BLOCK_BIF) {
5605 		si_enable_bif_mgls(rdev, enable);
5606 	}
5607 
5608 	if (block & RADEON_CG_BLOCK_UVD) {
5609 		if (rdev->has_uvd) {
5610 			si_enable_uvd_mgcg(rdev, enable);
5611 		}
5612 	}
5613 
5614 	if (block & RADEON_CG_BLOCK_HDP) {
5615 		si_enable_hdp_mgcg(rdev, enable);
5616 		si_enable_hdp_ls(rdev, enable);
5617 	}
5618 }
5619 
5620 static void si_init_cg(struct radeon_device *rdev)
5621 {
5622 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5623 			    RADEON_CG_BLOCK_MC |
5624 			    RADEON_CG_BLOCK_SDMA |
5625 			    RADEON_CG_BLOCK_BIF |
5626 			    RADEON_CG_BLOCK_HDP), true);
5627 	if (rdev->has_uvd) {
5628 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5629 		si_init_uvd_internal_cg(rdev);
5630 	}
5631 }
5632 
5633 static void si_fini_cg(struct radeon_device *rdev)
5634 {
5635 	if (rdev->has_uvd) {
5636 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5637 	}
5638 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5639 			    RADEON_CG_BLOCK_MC |
5640 			    RADEON_CG_BLOCK_SDMA |
5641 			    RADEON_CG_BLOCK_BIF |
5642 			    RADEON_CG_BLOCK_HDP), false);
5643 }
5644 
5645 u32 si_get_csb_size(struct radeon_device *rdev)
5646 {
5647 	u32 count = 0;
5648 	const struct cs_section_def *sect = NULL;
5649 	const struct cs_extent_def *ext = NULL;
5650 
5651 	if (rdev->rlc.cs_data == NULL)
5652 		return 0;
5653 
5654 	/* begin clear state */
5655 	count += 2;
5656 	/* context control state */
5657 	count += 3;
5658 
5659 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5660 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5661 			if (sect->id == SECT_CONTEXT)
5662 				count += 2 + ext->reg_count;
5663 			else
5664 				return 0;
5665 		}
5666 	}
5667 	/* pa_sc_raster_config */
5668 	count += 3;
5669 	/* end clear state */
5670 	count += 2;
5671 	/* clear state */
5672 	count += 2;
5673 
5674 	return count;
5675 }
5676 
5677 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5678 {
5679 	u32 count = 0, i;
5680 	const struct cs_section_def *sect = NULL;
5681 	const struct cs_extent_def *ext = NULL;
5682 
5683 	if (rdev->rlc.cs_data == NULL)
5684 		return;
5685 	if (buffer == NULL)
5686 		return;
5687 
5688 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5689 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5690 
5691 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5692 	buffer[count++] = cpu_to_le32(0x80000000);
5693 	buffer[count++] = cpu_to_le32(0x80000000);
5694 
5695 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5696 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5697 			if (sect->id == SECT_CONTEXT) {
5698 				buffer[count++] =
5699 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5700 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5701 				for (i = 0; i < ext->reg_count; i++)
5702 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5703 			} else {
5704 				return;
5705 			}
5706 		}
5707 	}
5708 
5709 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5710 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5711 	switch (rdev->family) {
5712 	case CHIP_TAHITI:
5713 	case CHIP_PITCAIRN:
5714 		buffer[count++] = cpu_to_le32(0x2a00126a);
5715 		break;
5716 	case CHIP_VERDE:
5717 		buffer[count++] = cpu_to_le32(0x0000124a);
5718 		break;
5719 	case CHIP_OLAND:
5720 		buffer[count++] = cpu_to_le32(0x00000082);
5721 		break;
5722 	case CHIP_HAINAN:
5723 		buffer[count++] = cpu_to_le32(0x00000000);
5724 		break;
5725 	default:
5726 		buffer[count++] = cpu_to_le32(0x00000000);
5727 		break;
5728 	}
5729 
5730 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5731 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5732 
5733 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5734 	buffer[count++] = cpu_to_le32(0);
5735 }
5736 
5737 static void si_init_pg(struct radeon_device *rdev)
5738 {
5739 	if (rdev->pg_flags) {
5740 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5741 			si_init_dma_pg(rdev);
5742 		}
5743 		si_init_ao_cu_mask(rdev);
5744 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5745 			si_init_gfx_cgpg(rdev);
5746 		} else {
5747 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5748 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5749 		}
5750 		si_enable_dma_pg(rdev, true);
5751 		si_enable_gfx_cgpg(rdev, true);
5752 	} else {
5753 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5754 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5755 	}
5756 }
5757 
5758 static void si_fini_pg(struct radeon_device *rdev)
5759 {
5760 	if (rdev->pg_flags) {
5761 		si_enable_dma_pg(rdev, false);
5762 		si_enable_gfx_cgpg(rdev, false);
5763 	}
5764 }
5765 
5766 /*
5767  * RLC
5768  */
5769 void si_rlc_reset(struct radeon_device *rdev)
5770 {
5771 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5772 
5773 	tmp |= SOFT_RESET_RLC;
5774 	WREG32(GRBM_SOFT_RESET, tmp);
5775 	udelay(50);
5776 	tmp &= ~SOFT_RESET_RLC;
5777 	WREG32(GRBM_SOFT_RESET, tmp);
5778 	udelay(50);
5779 }
5780 
5781 static void si_rlc_stop(struct radeon_device *rdev)
5782 {
5783 	WREG32(RLC_CNTL, 0);
5784 
5785 	si_enable_gui_idle_interrupt(rdev, false);
5786 
5787 	si_wait_for_rlc_serdes(rdev);
5788 }
5789 
5790 static void si_rlc_start(struct radeon_device *rdev)
5791 {
5792 	WREG32(RLC_CNTL, RLC_ENABLE);
5793 
5794 	si_enable_gui_idle_interrupt(rdev, true);
5795 
5796 	udelay(50);
5797 }
5798 
5799 static bool si_lbpw_supported(struct radeon_device *rdev)
5800 {
5801 	u32 tmp;
5802 
5803 	/* Enable LBPW only for DDR3 */
5804 	tmp = RREG32(MC_SEQ_MISC0);
5805 	if ((tmp & 0xF0000000) == 0xB0000000)
5806 		return true;
5807 	return false;
5808 }
5809 
5810 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5811 {
5812 	u32 tmp;
5813 
5814 	tmp = RREG32(RLC_LB_CNTL);
5815 	if (enable)
5816 		tmp |= LOAD_BALANCE_ENABLE;
5817 	else
5818 		tmp &= ~LOAD_BALANCE_ENABLE;
5819 	WREG32(RLC_LB_CNTL, tmp);
5820 
5821 	if (!enable) {
5822 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5823 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5824 	}
5825 }
5826 
5827 static int si_rlc_resume(struct radeon_device *rdev)
5828 {
5829 	u32 i;
5830 
5831 	if (!rdev->rlc_fw)
5832 		return -EINVAL;
5833 
5834 	si_rlc_stop(rdev);
5835 
5836 	si_rlc_reset(rdev);
5837 
5838 	si_init_pg(rdev);
5839 
5840 	si_init_cg(rdev);
5841 
5842 	WREG32(RLC_RL_BASE, 0);
5843 	WREG32(RLC_RL_SIZE, 0);
5844 	WREG32(RLC_LB_CNTL, 0);
5845 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5846 	WREG32(RLC_LB_CNTR_INIT, 0);
5847 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5848 
5849 	WREG32(RLC_MC_CNTL, 0);
5850 	WREG32(RLC_UCODE_CNTL, 0);
5851 
5852 	if (rdev->new_fw) {
5853 		const struct rlc_firmware_header_v1_0 *hdr =
5854 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5855 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5856 		const __le32 *fw_data = (const __le32 *)
5857 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5858 
5859 		radeon_ucode_print_rlc_hdr(&hdr->header);
5860 
5861 		for (i = 0; i < fw_size; i++) {
5862 			WREG32(RLC_UCODE_ADDR, i);
5863 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5864 		}
5865 	} else {
5866 		const __be32 *fw_data =
5867 			(const __be32 *)rdev->rlc_fw->data;
5868 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5869 			WREG32(RLC_UCODE_ADDR, i);
5870 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5871 		}
5872 	}
5873 	WREG32(RLC_UCODE_ADDR, 0);
5874 
5875 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5876 
5877 	si_rlc_start(rdev);
5878 
5879 	return 0;
5880 }
5881 
5882 static void si_enable_interrupts(struct radeon_device *rdev)
5883 {
5884 	u32 ih_cntl = RREG32(IH_CNTL);
5885 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5886 
5887 	ih_cntl |= ENABLE_INTR;
5888 	ih_rb_cntl |= IH_RB_ENABLE;
5889 	WREG32(IH_CNTL, ih_cntl);
5890 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5891 	rdev->ih.enabled = true;
5892 }
5893 
5894 static void si_disable_interrupts(struct radeon_device *rdev)
5895 {
5896 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5897 	u32 ih_cntl = RREG32(IH_CNTL);
5898 
5899 	ih_rb_cntl &= ~IH_RB_ENABLE;
5900 	ih_cntl &= ~ENABLE_INTR;
5901 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5902 	WREG32(IH_CNTL, ih_cntl);
5903 	/* set rptr, wptr to 0 */
5904 	WREG32(IH_RB_RPTR, 0);
5905 	WREG32(IH_RB_WPTR, 0);
5906 	rdev->ih.enabled = false;
5907 	rdev->ih.rptr = 0;
5908 }
5909 
5910 static void si_disable_interrupt_state(struct radeon_device *rdev)
5911 {
5912 	u32 tmp;
5913 
5914 	tmp = RREG32(CP_INT_CNTL_RING0) &
5915 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5916 	WREG32(CP_INT_CNTL_RING0, tmp);
5917 	WREG32(CP_INT_CNTL_RING1, 0);
5918 	WREG32(CP_INT_CNTL_RING2, 0);
5919 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5920 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5921 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5922 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5923 	WREG32(GRBM_INT_CNTL, 0);
5924 	WREG32(SRBM_INT_CNTL, 0);
5925 	if (rdev->num_crtc >= 2) {
5926 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5927 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5928 	}
5929 	if (rdev->num_crtc >= 4) {
5930 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5931 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5932 	}
5933 	if (rdev->num_crtc >= 6) {
5934 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5935 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5936 	}
5937 
5938 	if (rdev->num_crtc >= 2) {
5939 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5940 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5941 	}
5942 	if (rdev->num_crtc >= 4) {
5943 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5944 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5945 	}
5946 	if (rdev->num_crtc >= 6) {
5947 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5948 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5949 	}
5950 
5951 	if (!ASIC_IS_NODCE(rdev)) {
5952 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5953 
5954 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5955 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5956 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5957 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5958 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5959 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5960 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5961 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5962 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5963 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5964 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5965 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5966 	}
5967 }
5968 
5969 static int si_irq_init(struct radeon_device *rdev)
5970 {
5971 	int ret = 0;
5972 	int rb_bufsz;
5973 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5974 
5975 	/* allocate ring */
5976 	ret = r600_ih_ring_alloc(rdev);
5977 	if (ret)
5978 		return ret;
5979 
5980 	/* disable irqs */
5981 	si_disable_interrupts(rdev);
5982 
5983 	/* init rlc */
5984 	ret = si_rlc_resume(rdev);
5985 	if (ret) {
5986 		r600_ih_ring_fini(rdev);
5987 		return ret;
5988 	}
5989 
5990 	/* setup interrupt control */
5991 	/* set dummy read address to ring address */
5992 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5993 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5994 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5995 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5996 	 */
5997 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5998 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5999 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6000 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6001 
6002 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6003 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6004 
6005 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6006 		      IH_WPTR_OVERFLOW_CLEAR |
6007 		      (rb_bufsz << 1));
6008 
6009 	if (rdev->wb.enabled)
6010 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6011 
6012 	/* set the writeback address whether it's enabled or not */
6013 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6014 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6015 
6016 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6017 
6018 	/* set rptr, wptr to 0 */
6019 	WREG32(IH_RB_RPTR, 0);
6020 	WREG32(IH_RB_WPTR, 0);
6021 
6022 	/* Default settings for IH_CNTL (disabled at first) */
6023 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6024 	/* RPTR_REARM only works if msi's are enabled */
6025 	if (rdev->msi_enabled)
6026 		ih_cntl |= RPTR_REARM;
6027 	WREG32(IH_CNTL, ih_cntl);
6028 
6029 	/* force the active interrupt state to all disabled */
6030 	si_disable_interrupt_state(rdev);
6031 
6032 	pci_set_master(rdev->pdev);
6033 
6034 	/* enable irqs */
6035 	si_enable_interrupts(rdev);
6036 
6037 	return ret;
6038 }
6039 
6040 int si_irq_set(struct radeon_device *rdev)
6041 {
6042 	u32 cp_int_cntl;
6043 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6044 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6045 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6046 	u32 grbm_int_cntl = 0;
6047 	u32 dma_cntl, dma_cntl1;
6048 	u32 thermal_int = 0;
6049 
6050 	if (!rdev->irq.installed) {
6051 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6052 		return -EINVAL;
6053 	}
6054 	/* don't enable anything if the ih is disabled */
6055 	if (!rdev->ih.enabled) {
6056 		si_disable_interrupts(rdev);
6057 		/* force the active interrupt state to all disabled */
6058 		si_disable_interrupt_state(rdev);
6059 		return 0;
6060 	}
6061 
6062 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6063 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6064 
6065 	if (!ASIC_IS_NODCE(rdev)) {
6066 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6067 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6068 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6069 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6070 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6071 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6072 	}
6073 
6074 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6075 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6076 
6077 	thermal_int = RREG32(CG_THERMAL_INT) &
6078 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6079 
6080 	/* enable CP interrupts on all rings */
6081 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6082 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6083 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6084 	}
6085 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6086 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6087 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6088 	}
6089 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6090 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6091 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6092 	}
6093 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6094 		DRM_DEBUG("si_irq_set: sw int dma\n");
6095 		dma_cntl |= TRAP_ENABLE;
6096 	}
6097 
6098 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6099 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6100 		dma_cntl1 |= TRAP_ENABLE;
6101 	}
6102 	if (rdev->irq.crtc_vblank_int[0] ||
6103 	    atomic_read(&rdev->irq.pflip[0])) {
6104 		DRM_DEBUG("si_irq_set: vblank 0\n");
6105 		crtc1 |= VBLANK_INT_MASK;
6106 	}
6107 	if (rdev->irq.crtc_vblank_int[1] ||
6108 	    atomic_read(&rdev->irq.pflip[1])) {
6109 		DRM_DEBUG("si_irq_set: vblank 1\n");
6110 		crtc2 |= VBLANK_INT_MASK;
6111 	}
6112 	if (rdev->irq.crtc_vblank_int[2] ||
6113 	    atomic_read(&rdev->irq.pflip[2])) {
6114 		DRM_DEBUG("si_irq_set: vblank 2\n");
6115 		crtc3 |= VBLANK_INT_MASK;
6116 	}
6117 	if (rdev->irq.crtc_vblank_int[3] ||
6118 	    atomic_read(&rdev->irq.pflip[3])) {
6119 		DRM_DEBUG("si_irq_set: vblank 3\n");
6120 		crtc4 |= VBLANK_INT_MASK;
6121 	}
6122 	if (rdev->irq.crtc_vblank_int[4] ||
6123 	    atomic_read(&rdev->irq.pflip[4])) {
6124 		DRM_DEBUG("si_irq_set: vblank 4\n");
6125 		crtc5 |= VBLANK_INT_MASK;
6126 	}
6127 	if (rdev->irq.crtc_vblank_int[5] ||
6128 	    atomic_read(&rdev->irq.pflip[5])) {
6129 		DRM_DEBUG("si_irq_set: vblank 5\n");
6130 		crtc6 |= VBLANK_INT_MASK;
6131 	}
6132 	if (rdev->irq.hpd[0]) {
6133 		DRM_DEBUG("si_irq_set: hpd 1\n");
6134 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6135 	}
6136 	if (rdev->irq.hpd[1]) {
6137 		DRM_DEBUG("si_irq_set: hpd 2\n");
6138 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6139 	}
6140 	if (rdev->irq.hpd[2]) {
6141 		DRM_DEBUG("si_irq_set: hpd 3\n");
6142 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6143 	}
6144 	if (rdev->irq.hpd[3]) {
6145 		DRM_DEBUG("si_irq_set: hpd 4\n");
6146 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6147 	}
6148 	if (rdev->irq.hpd[4]) {
6149 		DRM_DEBUG("si_irq_set: hpd 5\n");
6150 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6151 	}
6152 	if (rdev->irq.hpd[5]) {
6153 		DRM_DEBUG("si_irq_set: hpd 6\n");
6154 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6155 	}
6156 
6157 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6158 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6159 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6160 
6161 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6162 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6163 
6164 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6165 
6166 	if (rdev->irq.dpm_thermal) {
6167 		DRM_DEBUG("dpm thermal\n");
6168 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6169 	}
6170 
6171 	if (rdev->num_crtc >= 2) {
6172 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6173 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6174 	}
6175 	if (rdev->num_crtc >= 4) {
6176 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6177 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6178 	}
6179 	if (rdev->num_crtc >= 6) {
6180 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6181 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6182 	}
6183 
6184 	if (rdev->num_crtc >= 2) {
6185 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6186 		       GRPH_PFLIP_INT_MASK);
6187 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6188 		       GRPH_PFLIP_INT_MASK);
6189 	}
6190 	if (rdev->num_crtc >= 4) {
6191 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6192 		       GRPH_PFLIP_INT_MASK);
6193 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6194 		       GRPH_PFLIP_INT_MASK);
6195 	}
6196 	if (rdev->num_crtc >= 6) {
6197 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6198 		       GRPH_PFLIP_INT_MASK);
6199 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6200 		       GRPH_PFLIP_INT_MASK);
6201 	}
6202 
6203 	if (!ASIC_IS_NODCE(rdev)) {
6204 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6205 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6206 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6207 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6208 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6209 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6210 	}
6211 
6212 	WREG32(CG_THERMAL_INT, thermal_int);
6213 
6214 	/* posting read */
6215 	RREG32(SRBM_STATUS);
6216 
6217 	return 0;
6218 }
6219 
6220 static inline void si_irq_ack(struct radeon_device *rdev)
6221 {
6222 	u32 tmp;
6223 
6224 	if (ASIC_IS_NODCE(rdev))
6225 		return;
6226 
6227 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6228 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6229 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6230 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6231 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6232 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6233 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6234 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6235 	if (rdev->num_crtc >= 4) {
6236 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6237 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6238 	}
6239 	if (rdev->num_crtc >= 6) {
6240 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6241 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6242 	}
6243 
6244 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6245 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6246 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6247 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6248 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6249 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6250 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6251 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6252 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6253 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6254 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6255 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6256 
6257 	if (rdev->num_crtc >= 4) {
6258 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6259 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6260 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6261 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6262 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6263 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6264 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6265 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6266 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6267 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6268 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6269 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6270 	}
6271 
6272 	if (rdev->num_crtc >= 6) {
6273 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6274 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6275 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6276 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6277 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6278 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6279 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6280 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6281 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6282 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6283 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6284 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6285 	}
6286 
6287 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6288 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6289 		tmp |= DC_HPDx_INT_ACK;
6290 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6291 	}
6292 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6293 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6294 		tmp |= DC_HPDx_INT_ACK;
6295 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6296 	}
6297 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6298 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6299 		tmp |= DC_HPDx_INT_ACK;
6300 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6301 	}
6302 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6303 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6304 		tmp |= DC_HPDx_INT_ACK;
6305 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6306 	}
6307 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6308 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6309 		tmp |= DC_HPDx_INT_ACK;
6310 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6311 	}
6312 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6313 		tmp = RREG32(DC_HPD6_INT_CONTROL);
6314 		tmp |= DC_HPDx_INT_ACK;
6315 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6316 	}
6317 
6318 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6319 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6320 		tmp |= DC_HPDx_RX_INT_ACK;
6321 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6322 	}
6323 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6324 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6325 		tmp |= DC_HPDx_RX_INT_ACK;
6326 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6327 	}
6328 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6329 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6330 		tmp |= DC_HPDx_RX_INT_ACK;
6331 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6332 	}
6333 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6334 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6335 		tmp |= DC_HPDx_RX_INT_ACK;
6336 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6337 	}
6338 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6339 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6340 		tmp |= DC_HPDx_RX_INT_ACK;
6341 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6342 	}
6343 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6344 		tmp = RREG32(DC_HPD6_INT_CONTROL);
6345 		tmp |= DC_HPDx_RX_INT_ACK;
6346 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6347 	}
6348 }
6349 
6350 static void si_irq_disable(struct radeon_device *rdev)
6351 {
6352 	si_disable_interrupts(rdev);
6353 	/* Wait and acknowledge irq */
6354 	mdelay(1);
6355 	si_irq_ack(rdev);
6356 	si_disable_interrupt_state(rdev);
6357 }
6358 
6359 static void si_irq_suspend(struct radeon_device *rdev)
6360 {
6361 	si_irq_disable(rdev);
6362 	si_rlc_stop(rdev);
6363 }
6364 
6365 static void si_irq_fini(struct radeon_device *rdev)
6366 {
6367 	si_irq_suspend(rdev);
6368 	r600_ih_ring_fini(rdev);
6369 }
6370 
6371 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6372 {
6373 	u32 wptr, tmp;
6374 
6375 	if (rdev->wb.enabled)
6376 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6377 	else
6378 		wptr = RREG32(IH_RB_WPTR);
6379 
6380 	if (wptr & RB_OVERFLOW) {
6381 		wptr &= ~RB_OVERFLOW;
6382 		/* When a ring buffer overflow happen start parsing interrupt
6383 		 * from the last not overwritten vector (wptr + 16). Hopefully
6384 		 * this should allow us to catchup.
6385 		 */
6386 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6387 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6388 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6389 		tmp = RREG32(IH_RB_CNTL);
6390 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6391 		WREG32(IH_RB_CNTL, tmp);
6392 	}
6393 	return (wptr & rdev->ih.ptr_mask);
6394 }
6395 
6396 /*        SI IV Ring
6397  * Each IV ring entry is 128 bits:
6398  * [7:0]    - interrupt source id
6399  * [31:8]   - reserved
6400  * [59:32]  - interrupt source data
6401  * [63:60]  - reserved
6402  * [71:64]  - RINGID
6403  * [79:72]  - VMID
6404  * [127:80] - reserved
6405  */
6406 irqreturn_t si_irq_process(struct radeon_device *rdev)
6407 {
6408 	u32 wptr;
6409 	u32 rptr;
6410 	u32 src_id, src_data, ring_id;
6411 	u32 ring_index;
6412 	bool queue_hotplug = false;
6413 	bool queue_dp = false;
6414 	bool queue_thermal = false;
6415 	u32 status, addr;
6416 
6417 	if (!rdev->ih.enabled || rdev->shutdown)
6418 		return IRQ_NONE;
6419 
6420 	wptr = si_get_ih_wptr(rdev);
6421 
6422 restart_ih:
6423 	/* is somebody else already processing irqs? */
6424 	if (atomic_xchg(&rdev->ih.lock, 1))
6425 		return IRQ_NONE;
6426 
6427 	rptr = rdev->ih.rptr;
6428 	DRM_DEBUG_VBLANK("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6429 
6430 	/* Order reading of wptr vs. reading of IH ring data */
6431 	rmb();
6432 
6433 	/* display interrupts */
6434 	si_irq_ack(rdev);
6435 
6436 	while (rptr != wptr) {
6437 		/* wptr/rptr are in bytes! */
6438 		ring_index = rptr / 4;
6439 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6440 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6441 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6442 
6443 		switch (src_id) {
6444 		case 1: /* D1 vblank/vline */
6445 			switch (src_data) {
6446 			case 0: /* D1 vblank */
6447 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6448 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6449 
6450 				if (rdev->irq.crtc_vblank_int[0]) {
6451 					drm_handle_vblank(rdev->ddev, 0);
6452 					rdev->pm.vblank_sync = true;
6453 					wake_up(&rdev->irq.vblank_queue);
6454 				}
6455 				if (atomic_read(&rdev->irq.pflip[0]))
6456 					radeon_crtc_handle_vblank(rdev, 0);
6457 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6458 				DRM_DEBUG_VBLANK("IH: D1 vblank\n");
6459 
6460 				break;
6461 			case 1: /* D1 vline */
6462 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6463 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6464 
6465 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6466 				DRM_DEBUG_VBLANK("IH: D1 vline\n");
6467 
6468 				break;
6469 			default:
6470 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6471 				break;
6472 			}
6473 			break;
6474 		case 2: /* D2 vblank/vline */
6475 			switch (src_data) {
6476 			case 0: /* D2 vblank */
6477 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6478 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6479 
6480 				if (rdev->irq.crtc_vblank_int[1]) {
6481 					drm_handle_vblank(rdev->ddev, 1);
6482 					rdev->pm.vblank_sync = true;
6483 					wake_up(&rdev->irq.vblank_queue);
6484 				}
6485 				if (atomic_read(&rdev->irq.pflip[1]))
6486 					radeon_crtc_handle_vblank(rdev, 1);
6487 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6488 				DRM_DEBUG_VBLANK("IH: D2 vblank\n");
6489 
6490 				break;
6491 			case 1: /* D2 vline */
6492 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6493 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6494 
6495 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6496 				DRM_DEBUG_VBLANK("IH: D2 vline\n");
6497 
6498 				break;
6499 			default:
6500 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6501 				break;
6502 			}
6503 			break;
6504 		case 3: /* D3 vblank/vline */
6505 			switch (src_data) {
6506 			case 0: /* D3 vblank */
6507 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6508 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6509 
6510 				if (rdev->irq.crtc_vblank_int[2]) {
6511 					drm_handle_vblank(rdev->ddev, 2);
6512 					rdev->pm.vblank_sync = true;
6513 					wake_up(&rdev->irq.vblank_queue);
6514 				}
6515 				if (atomic_read(&rdev->irq.pflip[2]))
6516 					radeon_crtc_handle_vblank(rdev, 2);
6517 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6518 				DRM_DEBUG_VBLANK("IH: D3 vblank\n");
6519 
6520 				break;
6521 			case 1: /* D3 vline */
6522 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6523 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6524 
6525 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6526 				DRM_DEBUG_VBLANK("IH: D3 vline\n");
6527 
6528 				break;
6529 			default:
6530 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6531 				break;
6532 			}
6533 			break;
6534 		case 4: /* D4 vblank/vline */
6535 			switch (src_data) {
6536 			case 0: /* D4 vblank */
6537 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6538 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6539 
6540 				if (rdev->irq.crtc_vblank_int[3]) {
6541 					drm_handle_vblank(rdev->ddev, 3);
6542 					rdev->pm.vblank_sync = true;
6543 					wake_up(&rdev->irq.vblank_queue);
6544 				}
6545 				if (atomic_read(&rdev->irq.pflip[3]))
6546 					radeon_crtc_handle_vblank(rdev, 3);
6547 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6548 				DRM_DEBUG_VBLANK("IH: D4 vblank\n");
6549 
6550 				break;
6551 			case 1: /* D4 vline */
6552 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6553 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6554 
6555 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6556 				DRM_DEBUG_VBLANK("IH: D4 vline\n");
6557 
6558 				break;
6559 			default:
6560 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6561 				break;
6562 			}
6563 			break;
6564 		case 5: /* D5 vblank/vline */
6565 			switch (src_data) {
6566 			case 0: /* D5 vblank */
6567 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6568 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6569 
6570 				if (rdev->irq.crtc_vblank_int[4]) {
6571 					drm_handle_vblank(rdev->ddev, 4);
6572 					rdev->pm.vblank_sync = true;
6573 					wake_up(&rdev->irq.vblank_queue);
6574 				}
6575 				if (atomic_read(&rdev->irq.pflip[4]))
6576 					radeon_crtc_handle_vblank(rdev, 4);
6577 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6578 				DRM_DEBUG_VBLANK("IH: D5 vblank\n");
6579 
6580 				break;
6581 			case 1: /* D5 vline */
6582 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6583 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6584 
6585 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6586 				DRM_DEBUG_VBLANK("IH: D5 vline\n");
6587 
6588 				break;
6589 			default:
6590 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6591 				break;
6592 			}
6593 			break;
6594 		case 6: /* D6 vblank/vline */
6595 			switch (src_data) {
6596 			case 0: /* D6 vblank */
6597 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6598 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6599 
6600 				if (rdev->irq.crtc_vblank_int[5]) {
6601 					drm_handle_vblank(rdev->ddev, 5);
6602 					rdev->pm.vblank_sync = true;
6603 					wake_up(&rdev->irq.vblank_queue);
6604 				}
6605 				if (atomic_read(&rdev->irq.pflip[5]))
6606 					radeon_crtc_handle_vblank(rdev, 5);
6607 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6608 				DRM_DEBUG_VBLANK("IH: D6 vblank\n");
6609 
6610 				break;
6611 			case 1: /* D6 vline */
6612 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6613 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6614 
6615 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6616 				DRM_DEBUG_VBLANK("IH: D6 vline\n");
6617 
6618 				break;
6619 			default:
6620 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6621 				break;
6622 			}
6623 			break;
6624 		case 8: /* D1 page flip */
6625 		case 10: /* D2 page flip */
6626 		case 12: /* D3 page flip */
6627 		case 14: /* D4 page flip */
6628 		case 16: /* D5 page flip */
6629 		case 18: /* D6 page flip */
6630 			DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6631 			if (radeon_use_pflipirq > 0)
6632 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6633 			break;
6634 		case 42: /* HPD hotplug */
6635 			switch (src_data) {
6636 			case 0:
6637 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6638 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6639 
6640 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6641 				queue_hotplug = true;
6642 				DRM_DEBUG("IH: HPD1\n");
6643 
6644 				break;
6645 			case 1:
6646 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6647 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6648 
6649 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6650 				queue_hotplug = true;
6651 				DRM_DEBUG("IH: HPD2\n");
6652 
6653 				break;
6654 			case 2:
6655 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6656 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6657 
6658 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6659 				queue_hotplug = true;
6660 				DRM_DEBUG("IH: HPD3\n");
6661 
6662 				break;
6663 			case 3:
6664 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6665 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6666 
6667 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6668 				queue_hotplug = true;
6669 				DRM_DEBUG("IH: HPD4\n");
6670 
6671 				break;
6672 			case 4:
6673 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6674 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6675 
6676 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6677 				queue_hotplug = true;
6678 				DRM_DEBUG("IH: HPD5\n");
6679 
6680 				break;
6681 			case 5:
6682 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6683 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6684 
6685 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6686 				queue_hotplug = true;
6687 				DRM_DEBUG("IH: HPD6\n");
6688 
6689 				break;
6690 			case 6:
6691 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6692 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6693 
6694 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6695 				queue_dp = true;
6696 				DRM_DEBUG("IH: HPD_RX 1\n");
6697 
6698 				break;
6699 			case 7:
6700 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6701 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6702 
6703 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6704 				queue_dp = true;
6705 				DRM_DEBUG("IH: HPD_RX 2\n");
6706 
6707 				break;
6708 			case 8:
6709 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6710 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6711 
6712 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6713 				queue_dp = true;
6714 				DRM_DEBUG("IH: HPD_RX 3\n");
6715 
6716 				break;
6717 			case 9:
6718 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6719 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6720 
6721 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6722 				queue_dp = true;
6723 				DRM_DEBUG("IH: HPD_RX 4\n");
6724 
6725 				break;
6726 			case 10:
6727 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6728 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6729 
6730 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6731 				queue_dp = true;
6732 				DRM_DEBUG("IH: HPD_RX 5\n");
6733 
6734 				break;
6735 			case 11:
6736 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6737 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6738 
6739 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6740 				queue_dp = true;
6741 				DRM_DEBUG("IH: HPD_RX 6\n");
6742 
6743 				break;
6744 			default:
6745 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6746 				break;
6747 			}
6748 			break;
6749 		case 96:
6750 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6751 			WREG32(SRBM_INT_ACK, 0x1);
6752 			break;
6753 		case 124: /* UVD */
6754 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6755 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6756 			break;
6757 		case 146:
6758 		case 147:
6759 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6760 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6761 			/* reset addr and status */
6762 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6763 			if (addr == 0x0 && status == 0x0)
6764 				break;
6765 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6766 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6767 				addr);
6768 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6769 				status);
6770 			si_vm_decode_fault(rdev, status, addr);
6771 			break;
6772 		case 176: /* RINGID0 CP_INT */
6773 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6774 			break;
6775 		case 177: /* RINGID1 CP_INT */
6776 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6777 			break;
6778 		case 178: /* RINGID2 CP_INT */
6779 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6780 			break;
6781 		case 181: /* CP EOP event */
6782 			DRM_DEBUG("IH: CP EOP\n");
6783 			switch (ring_id) {
6784 			case 0:
6785 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6786 				break;
6787 			case 1:
6788 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6789 				break;
6790 			case 2:
6791 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6792 				break;
6793 			}
6794 			break;
6795 		case 224: /* DMA trap event */
6796 			DRM_DEBUG("IH: DMA trap\n");
6797 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6798 			break;
6799 		case 230: /* thermal low to high */
6800 			DRM_DEBUG("IH: thermal low to high\n");
6801 			rdev->pm.dpm.thermal.high_to_low = false;
6802 			queue_thermal = true;
6803 			break;
6804 		case 231: /* thermal high to low */
6805 			DRM_DEBUG("IH: thermal high to low\n");
6806 			rdev->pm.dpm.thermal.high_to_low = true;
6807 			queue_thermal = true;
6808 			break;
6809 		case 233: /* GUI IDLE */
6810 			DRM_DEBUG("IH: GUI idle\n");
6811 			break;
6812 		case 244: /* DMA trap event */
6813 			DRM_DEBUG("IH: DMA1 trap\n");
6814 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6815 			break;
6816 		default:
6817 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6818 			break;
6819 		}
6820 
6821 		/* wptr/rptr are in bytes! */
6822 		rptr += 16;
6823 		rptr &= rdev->ih.ptr_mask;
6824 		WREG32(IH_RB_RPTR, rptr);
6825 	}
6826 	if (queue_dp)
6827 		schedule_work(&rdev->dp_work);
6828 	if (queue_hotplug)
6829 		schedule_delayed_work(&rdev->hotplug_work, 0);
6830 	if (queue_thermal && rdev->pm.dpm_enabled)
6831 		schedule_work(&rdev->pm.dpm.thermal.work);
6832 	rdev->ih.rptr = rptr;
6833 	atomic_set(&rdev->ih.lock, 0);
6834 
6835 	/* make sure wptr hasn't changed while processing */
6836 	wptr = si_get_ih_wptr(rdev);
6837 	if (wptr != rptr)
6838 		goto restart_ih;
6839 
6840 	return IRQ_HANDLED;
6841 }
6842 
6843 /*
6844  * startup/shutdown callbacks
6845  */
6846 static void si_uvd_init(struct radeon_device *rdev)
6847 {
6848 	int r;
6849 
6850 	if (!rdev->has_uvd)
6851 		return;
6852 
6853 	r = radeon_uvd_init(rdev);
6854 	if (r) {
6855 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6856 		/*
6857 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6858 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6859 		 * there. So it is pointless to try to go through that code
6860 		 * hence why we disable uvd here.
6861 		 */
6862 		rdev->has_uvd = 0;
6863 		return;
6864 	}
6865 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6866 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6867 }
6868 
6869 static void si_uvd_start(struct radeon_device *rdev)
6870 {
6871 	int r;
6872 
6873 	if (!rdev->has_uvd)
6874 		return;
6875 
6876 	r = uvd_v2_2_resume(rdev);
6877 	if (r) {
6878 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6879 		goto error;
6880 	}
6881 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6882 	if (r) {
6883 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6884 		goto error;
6885 	}
6886 	return;
6887 
6888 error:
6889 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6890 }
6891 
6892 static void si_uvd_resume(struct radeon_device *rdev)
6893 {
6894 	struct radeon_ring *ring;
6895 	int r;
6896 
6897 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6898 		return;
6899 
6900 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6901 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, RADEON_CP_PACKET2);
6902 	if (r) {
6903 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6904 		return;
6905 	}
6906 	r = uvd_v1_0_init(rdev);
6907 	if (r) {
6908 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6909 		return;
6910 	}
6911 }
6912 
6913 static void si_vce_init(struct radeon_device *rdev)
6914 {
6915 	int r;
6916 
6917 	if (!rdev->has_vce)
6918 		return;
6919 
6920 	r = radeon_vce_init(rdev);
6921 	if (r) {
6922 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6923 		/*
6924 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6925 		 * to early fails si_vce_start() and thus nothing happens
6926 		 * there. So it is pointless to try to go through that code
6927 		 * hence why we disable vce here.
6928 		 */
6929 		rdev->has_vce = 0;
6930 		return;
6931 	}
6932 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6933 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6934 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6935 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6936 }
6937 
6938 static void si_vce_start(struct radeon_device *rdev)
6939 {
6940 	int r;
6941 
6942 	if (!rdev->has_vce)
6943 		return;
6944 
6945 	r = radeon_vce_resume(rdev);
6946 	if (r) {
6947 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6948 		goto error;
6949 	}
6950 	r = vce_v1_0_resume(rdev);
6951 	if (r) {
6952 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6953 		goto error;
6954 	}
6955 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6956 	if (r) {
6957 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6958 		goto error;
6959 	}
6960 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6961 	if (r) {
6962 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6963 		goto error;
6964 	}
6965 	return;
6966 
6967 error:
6968 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6969 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6970 }
6971 
6972 static void si_vce_resume(struct radeon_device *rdev)
6973 {
6974 	struct radeon_ring *ring;
6975 	int r;
6976 
6977 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6978 		return;
6979 
6980 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6981 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6982 	if (r) {
6983 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6984 		return;
6985 	}
6986 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6987 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6988 	if (r) {
6989 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6990 		return;
6991 	}
6992 	r = vce_v1_0_init(rdev);
6993 	if (r) {
6994 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6995 		return;
6996 	}
6997 }
6998 
6999 static int si_startup(struct radeon_device *rdev)
7000 {
7001 	struct radeon_ring *ring;
7002 	int r;
7003 
7004 	/* enable pcie gen2/3 link */
7005 	si_pcie_gen3_enable(rdev);
7006 	/* enable aspm */
7007 	si_program_aspm(rdev);
7008 
7009 	/* scratch needs to be initialized before MC */
7010 	r = r600_vram_scratch_init(rdev);
7011 	if (r)
7012 		return r;
7013 
7014 	si_mc_program(rdev);
7015 
7016 	if (!rdev->pm.dpm_enabled) {
7017 		r = si_mc_load_microcode(rdev);
7018 		if (r) {
7019 			DRM_ERROR("Failed to load MC firmware!\n");
7020 			return r;
7021 		}
7022 	}
7023 
7024 	r = si_pcie_gart_enable(rdev);
7025 	if (r)
7026 		return r;
7027 	si_gpu_init(rdev);
7028 
7029 	/* allocate rlc buffers */
7030 	if (rdev->family == CHIP_VERDE) {
7031 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7032 		rdev->rlc.reg_list_size =
7033 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7034 	}
7035 	rdev->rlc.cs_data = si_cs_data;
7036 	r = sumo_rlc_init(rdev);
7037 	if (r) {
7038 		DRM_ERROR("Failed to init rlc BOs!\n");
7039 		return r;
7040 	}
7041 
7042 	/* allocate wb buffer */
7043 	r = radeon_wb_init(rdev);
7044 	if (r)
7045 		return r;
7046 
7047 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7048 	if (r) {
7049 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7050 		return r;
7051 	}
7052 
7053 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7054 	if (r) {
7055 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7056 		return r;
7057 	}
7058 
7059 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7060 	if (r) {
7061 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7062 		return r;
7063 	}
7064 
7065 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7066 	if (r) {
7067 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7068 		return r;
7069 	}
7070 
7071 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7072 	if (r) {
7073 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7074 		return r;
7075 	}
7076 
7077 	si_uvd_start(rdev);
7078 	si_vce_start(rdev);
7079 
7080 	/* Enable IRQ */
7081 	if (!rdev->irq.installed) {
7082 		r = radeon_irq_kms_init(rdev);
7083 		if (r)
7084 			return r;
7085 	}
7086 
7087 	r = si_irq_init(rdev);
7088 	if (r) {
7089 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7090 		radeon_irq_kms_fini(rdev);
7091 		return r;
7092 	}
7093 	si_irq_set(rdev);
7094 
7095 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7096 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7097 			     RADEON_CP_PACKET2);
7098 	if (r)
7099 		return r;
7100 
7101 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7102 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7103 			     RADEON_CP_PACKET2);
7104 	if (r)
7105 		return r;
7106 
7107 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7108 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7109 			     RADEON_CP_PACKET2);
7110 	if (r)
7111 		return r;
7112 
7113 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7114 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7115 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7116 	if (r)
7117 		return r;
7118 
7119 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7120 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7121 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7122 	if (r)
7123 		return r;
7124 
7125 	r = si_cp_load_microcode(rdev);
7126 	if (r)
7127 		return r;
7128 	r = si_cp_resume(rdev);
7129 	if (r)
7130 		return r;
7131 
7132 	r = cayman_dma_resume(rdev);
7133 	if (r)
7134 		return r;
7135 
7136 	si_uvd_resume(rdev);
7137 	si_vce_resume(rdev);
7138 
7139 	r = radeon_ib_pool_init(rdev);
7140 	if (r) {
7141 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7142 		return r;
7143 	}
7144 
7145 	r = radeon_vm_manager_init(rdev);
7146 	if (r) {
7147 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7148 		return r;
7149 	}
7150 
7151 	r = radeon_audio_init(rdev);
7152 	if (r)
7153 		return r;
7154 
7155 	return 0;
7156 }
7157 
7158 int si_resume(struct radeon_device *rdev)
7159 {
7160 	int r;
7161 
7162 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7163 	 * posting will perform necessary task to bring back GPU into good
7164 	 * shape.
7165 	 */
7166 	/* post card */
7167 	atom_asic_init(rdev->mode_info.atom_context);
7168 
7169 	/* init golden registers */
7170 	si_init_golden_registers(rdev);
7171 
7172 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7173 		radeon_pm_resume(rdev);
7174 
7175 	rdev->accel_working = true;
7176 	r = si_startup(rdev);
7177 	if (r) {
7178 		DRM_ERROR("si startup failed on resume\n");
7179 		rdev->accel_working = false;
7180 		return r;
7181 	}
7182 
7183 	return r;
7184 
7185 }
7186 
7187 int si_suspend(struct radeon_device *rdev)
7188 {
7189 	radeon_pm_suspend(rdev);
7190 	radeon_audio_fini(rdev);
7191 	radeon_vm_manager_fini(rdev);
7192 	si_cp_enable(rdev, false);
7193 	cayman_dma_stop(rdev);
7194 	if (rdev->has_uvd) {
7195 		uvd_v1_0_fini(rdev);
7196 		radeon_uvd_suspend(rdev);
7197 	}
7198 	if (rdev->has_vce)
7199 		radeon_vce_suspend(rdev);
7200 	si_fini_pg(rdev);
7201 	si_fini_cg(rdev);
7202 	si_irq_suspend(rdev);
7203 	radeon_wb_disable(rdev);
7204 	si_pcie_gart_disable(rdev);
7205 	return 0;
7206 }
7207 
7208 /* Plan is to move initialization in that function and use
7209  * helper function so that radeon_device_init pretty much
7210  * do nothing more than calling asic specific function. This
7211  * should also allow to remove a bunch of callback function
7212  * like vram_info.
7213  */
7214 int si_init(struct radeon_device *rdev)
7215 {
7216 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7217 	int r;
7218 
7219 	/* Read BIOS */
7220 	if (!radeon_get_bios(rdev)) {
7221 		if (ASIC_IS_AVIVO(rdev))
7222 			return -EINVAL;
7223 	}
7224 	/* Must be an ATOMBIOS */
7225 	if (!rdev->is_atom_bios) {
7226 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7227 		return -EINVAL;
7228 	}
7229 	r = radeon_atombios_init(rdev);
7230 	if (r)
7231 		return r;
7232 
7233 	/* Post card if necessary */
7234 	if (!radeon_card_posted(rdev)) {
7235 		if (!rdev->bios) {
7236 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7237 			return -EINVAL;
7238 		}
7239 		DRM_INFO("GPU not posted. posting now...\n");
7240 		atom_asic_init(rdev->mode_info.atom_context);
7241 	}
7242 	/* init golden registers */
7243 	si_init_golden_registers(rdev);
7244 	/* Initialize scratch registers */
7245 	si_scratch_init(rdev);
7246 	/* Initialize surface registers */
7247 	radeon_surface_init(rdev);
7248 	/* Initialize clocks */
7249 	radeon_get_clock_info(rdev->ddev);
7250 
7251 	/* Fence driver */
7252 	r = radeon_fence_driver_init(rdev);
7253 	if (r)
7254 		return r;
7255 
7256 	/* initialize memory controller */
7257 	r = si_mc_init(rdev);
7258 	if (r)
7259 		return r;
7260 	/* Memory manager */
7261 	r = radeon_bo_init(rdev);
7262 	if (r)
7263 		return r;
7264 
7265 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7266 	    !rdev->rlc_fw || !rdev->mc_fw) {
7267 		r = si_init_microcode(rdev);
7268 		if (r) {
7269 			DRM_ERROR("Failed to load firmware!\n");
7270 			return r;
7271 		}
7272 	}
7273 
7274 	/* Initialize power management */
7275 	radeon_pm_init(rdev);
7276 
7277 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7278 	ring->ring_obj = NULL;
7279 	r600_ring_init(rdev, ring, 1024 * 1024);
7280 
7281 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7282 	ring->ring_obj = NULL;
7283 	r600_ring_init(rdev, ring, 1024 * 1024);
7284 
7285 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7286 	ring->ring_obj = NULL;
7287 	r600_ring_init(rdev, ring, 1024 * 1024);
7288 
7289 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7290 	ring->ring_obj = NULL;
7291 	r600_ring_init(rdev, ring, 64 * 1024);
7292 
7293 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7294 	ring->ring_obj = NULL;
7295 	r600_ring_init(rdev, ring, 64 * 1024);
7296 
7297 	si_uvd_init(rdev);
7298 	si_vce_init(rdev);
7299 
7300 	rdev->ih.ring_obj = NULL;
7301 	r600_ih_ring_init(rdev, 64 * 1024);
7302 
7303 	r = r600_pcie_gart_init(rdev);
7304 	if (r)
7305 		return r;
7306 
7307 	rdev->accel_working = true;
7308 	r = si_startup(rdev);
7309 	if (r) {
7310 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7311 		si_cp_fini(rdev);
7312 		cayman_dma_fini(rdev);
7313 		si_irq_fini(rdev);
7314 		sumo_rlc_fini(rdev);
7315 		radeon_wb_fini(rdev);
7316 		radeon_ib_pool_fini(rdev);
7317 		radeon_vm_manager_fini(rdev);
7318 		radeon_irq_kms_fini(rdev);
7319 		si_pcie_gart_fini(rdev);
7320 		rdev->accel_working = false;
7321 	}
7322 
7323 	/* Don't start up if the MC ucode is missing.
7324 	 * The default clocks and voltages before the MC ucode
7325 	 * is loaded are not suffient for advanced operations.
7326 	 */
7327 	if (!rdev->mc_fw) {
7328 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7329 		return -EINVAL;
7330 	}
7331 
7332 	return 0;
7333 }
7334 
7335 void si_fini(struct radeon_device *rdev)
7336 {
7337 	radeon_pm_fini(rdev);
7338 	si_cp_fini(rdev);
7339 	cayman_dma_fini(rdev);
7340 	si_fini_pg(rdev);
7341 	si_fini_cg(rdev);
7342 	si_irq_fini(rdev);
7343 	sumo_rlc_fini(rdev);
7344 	radeon_wb_fini(rdev);
7345 	radeon_vm_manager_fini(rdev);
7346 	radeon_ib_pool_fini(rdev);
7347 	radeon_irq_kms_fini(rdev);
7348 	if (rdev->has_uvd) {
7349 		uvd_v1_0_fini(rdev);
7350 		radeon_uvd_fini(rdev);
7351 	}
7352 	if (rdev->has_vce)
7353 		radeon_vce_fini(rdev);
7354 	si_pcie_gart_fini(rdev);
7355 	r600_vram_scratch_fini(rdev);
7356 	radeon_gem_fini(rdev);
7357 	radeon_fence_driver_fini(rdev);
7358 	radeon_bo_fini(rdev);
7359 	radeon_atombios_fini(rdev);
7360 	si_fini_microcode(rdev);
7361 	kfree(rdev->bios);
7362 	rdev->bios = NULL;
7363 }
7364 
7365 /**
7366  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7367  *
7368  * @rdev: radeon_device pointer
7369  *
7370  * Fetches a GPU clock counter snapshot (SI).
7371  * Returns the 64 bit clock counter snapshot.
7372  */
7373 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7374 {
7375 	uint64_t clock;
7376 
7377 	mutex_lock(&rdev->gpu_clock_mutex);
7378 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7379 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7380 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7381 	mutex_unlock(&rdev->gpu_clock_mutex);
7382 	return clock;
7383 }
7384 
7385 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7386 {
7387 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7388 	int r;
7389 
7390 	/* bypass vclk and dclk with bclk */
7391 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7392 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7393 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7394 
7395 	/* put PLL in bypass mode */
7396 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7397 
7398 	if (!vclk || !dclk) {
7399 		/* keep the Bypass mode */
7400 		return 0;
7401 	}
7402 
7403 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7404 					  16384, 0x03FFFFFF, 0, 128, 5,
7405 					  &fb_div, &vclk_div, &dclk_div);
7406 	if (r)
7407 		return r;
7408 
7409 	/* set RESET_ANTI_MUX to 0 */
7410 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7411 
7412 	/* set VCO_MODE to 1 */
7413 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7414 
7415 	/* disable sleep mode */
7416 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7417 
7418 	/* deassert UPLL_RESET */
7419 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7420 
7421 	mdelay(1);
7422 
7423 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7424 	if (r)
7425 		return r;
7426 
7427 	/* assert UPLL_RESET again */
7428 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7429 
7430 	/* disable spread spectrum. */
7431 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7432 
7433 	/* set feedback divider */
7434 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7435 
7436 	/* set ref divider to 0 */
7437 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7438 
7439 	if (fb_div < 307200)
7440 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7441 	else
7442 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7443 
7444 	/* set PDIV_A and PDIV_B */
7445 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7446 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7447 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7448 
7449 	/* give the PLL some time to settle */
7450 	mdelay(15);
7451 
7452 	/* deassert PLL_RESET */
7453 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7454 
7455 	mdelay(15);
7456 
7457 	/* switch from bypass mode to normal mode */
7458 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7459 
7460 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7461 	if (r)
7462 		return r;
7463 
7464 	/* switch VCLK and DCLK selection */
7465 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7466 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7467 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7468 
7469 	mdelay(100);
7470 
7471 	return 0;
7472 }
7473 
7474 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7475 {
7476 	struct pci_dev *root = rdev->pdev->bus->self;
7477 	int bridge_pos, gpu_pos;
7478 	u32 speed_cntl, mask, current_data_rate;
7479 	int ret, i;
7480 	u16 tmp16;
7481 
7482 #if 0
7483 	if (pci_is_root_bus(rdev->pdev->bus))
7484 		return;
7485 #endif
7486 
7487 	if (radeon_pcie_gen2 == 0)
7488 		return;
7489 
7490 	if (rdev->flags & RADEON_IS_IGP)
7491 		return;
7492 
7493 	if (!(rdev->flags & RADEON_IS_PCIE))
7494 		return;
7495 
7496 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7497 	if (ret != 0)
7498 		return;
7499 
7500 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7501 		return;
7502 
7503 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7504 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7505 		LC_CURRENT_DATA_RATE_SHIFT;
7506 	if (mask & DRM_PCIE_SPEED_80) {
7507 		if (current_data_rate == 2) {
7508 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7509 			return;
7510 		}
7511 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7512 	} else if (mask & DRM_PCIE_SPEED_50) {
7513 		if (current_data_rate == 1) {
7514 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7515 			return;
7516 		}
7517 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7518 	}
7519 
7520 	bridge_pos = pci_pcie_cap(root);
7521 	if (!bridge_pos)
7522 		return;
7523 
7524 	gpu_pos = pci_pcie_cap(rdev->pdev);
7525 	if (!gpu_pos)
7526 		return;
7527 
7528 	if (mask & DRM_PCIE_SPEED_80) {
7529 		/* re-try equalization if gen3 is not already enabled */
7530 		if (current_data_rate != 2) {
7531 			u16 bridge_cfg, gpu_cfg;
7532 			u16 bridge_cfg2, gpu_cfg2;
7533 			u32 max_lw, current_lw, tmp;
7534 
7535 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7536 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7537 
7538 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7539 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7540 
7541 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7542 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7543 
7544 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7545 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7546 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7547 
7548 			if (current_lw < max_lw) {
7549 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7550 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7551 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7552 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7553 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7554 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7555 				}
7556 			}
7557 
7558 			for (i = 0; i < 10; i++) {
7559 				/* check status */
7560 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7561 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7562 					break;
7563 
7564 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7565 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7566 
7567 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7568 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7569 
7570 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7571 				tmp |= LC_SET_QUIESCE;
7572 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7573 
7574 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7575 				tmp |= LC_REDO_EQ;
7576 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7577 
7578 				mdelay(100);
7579 
7580 				/* linkctl */
7581 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7582 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7583 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7584 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7585 
7586 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7587 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7588 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7589 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7590 
7591 				/* linkctl2 */
7592 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7593 				tmp16 &= ~((1 << 4) | (7 << 9));
7594 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7595 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7596 
7597 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7598 				tmp16 &= ~((1 << 4) | (7 << 9));
7599 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7600 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7601 
7602 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7603 				tmp &= ~LC_SET_QUIESCE;
7604 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7605 			}
7606 		}
7607 	}
7608 
7609 	/* set the link speed */
7610 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7611 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7612 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7613 
7614 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7615 	tmp16 &= ~0xf;
7616 	if (mask & DRM_PCIE_SPEED_80)
7617 		tmp16 |= 3; /* gen3 */
7618 	else if (mask & DRM_PCIE_SPEED_50)
7619 		tmp16 |= 2; /* gen2 */
7620 	else
7621 		tmp16 |= 1; /* gen1 */
7622 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7623 
7624 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7625 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7626 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7627 
7628 	for (i = 0; i < rdev->usec_timeout; i++) {
7629 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7630 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7631 			break;
7632 		udelay(1);
7633 	}
7634 }
7635 
7636 static void si_program_aspm(struct radeon_device *rdev)
7637 {
7638 	u32 data, orig;
7639 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7640 #if 0
7641 	bool disable_clkreq = false;
7642 #endif
7643 
7644 	if (radeon_aspm == 0)
7645 		return;
7646 
7647 	if (!(rdev->flags & RADEON_IS_PCIE))
7648 		return;
7649 
7650 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7651 	data &= ~LC_XMIT_N_FTS_MASK;
7652 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7653 	if (orig != data)
7654 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7655 
7656 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7657 	data |= LC_GO_TO_RECOVERY;
7658 	if (orig != data)
7659 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7660 
7661 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7662 	data |= P_IGNORE_EDB_ERR;
7663 	if (orig != data)
7664 		WREG32_PCIE(PCIE_P_CNTL, data);
7665 
7666 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7667 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7668 	data |= LC_PMI_TO_L1_DIS;
7669 	if (!disable_l0s)
7670 		data |= LC_L0S_INACTIVITY(7);
7671 
7672 	if (!disable_l1) {
7673 		data |= LC_L1_INACTIVITY(7);
7674 		data &= ~LC_PMI_TO_L1_DIS;
7675 		if (orig != data)
7676 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7677 
7678 		if (!disable_plloff_in_l1) {
7679 			bool clk_req_support;
7680 
7681 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7682 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7683 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7684 			if (orig != data)
7685 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7686 
7687 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7688 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7689 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7690 			if (orig != data)
7691 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7692 
7693 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7694 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7695 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7696 			if (orig != data)
7697 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7698 
7699 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7700 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7701 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7702 			if (orig != data)
7703 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7704 
7705 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7706 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7707 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7708 				if (orig != data)
7709 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7710 
7711 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7712 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7713 				if (orig != data)
7714 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7715 
7716 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7717 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7718 				if (orig != data)
7719 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7720 
7721 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7722 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7723 				if (orig != data)
7724 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7725 
7726 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7727 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7728 				if (orig != data)
7729 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7730 
7731 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7732 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7733 				if (orig != data)
7734 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7735 
7736 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7737 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7738 				if (orig != data)
7739 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7740 
7741 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7742 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7743 				if (orig != data)
7744 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7745 			}
7746 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7747 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7748 			data |= LC_DYN_LANES_PWR_STATE(3);
7749 			if (orig != data)
7750 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7751 
7752 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7753 			data &= ~LS2_EXIT_TIME_MASK;
7754 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7755 				data |= LS2_EXIT_TIME(5);
7756 			if (orig != data)
7757 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7758 
7759 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7760 			data &= ~LS2_EXIT_TIME_MASK;
7761 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7762 				data |= LS2_EXIT_TIME(5);
7763 			if (orig != data)
7764 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7765 
7766 #ifdef zMN_TODO
7767 			if (!disable_clkreq &&
7768 			    !pci_is_root_bus(rdev->pdev->bus)) {
7769 				struct pci_dev *root = rdev->pdev->bus->self;
7770 				u32 lnkcap;
7771 
7772 				clk_req_support = false;
7773 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7774 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7775 					clk_req_support = true;
7776 			} else {
7777 				clk_req_support = false;
7778 			}
7779 #else
7780 			clk_req_support = false;
7781 #endif
7782 
7783 			if (clk_req_support) {
7784 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7785 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7786 				if (orig != data)
7787 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7788 
7789 				orig = data = RREG32(THM_CLK_CNTL);
7790 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7791 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7792 				if (orig != data)
7793 					WREG32(THM_CLK_CNTL, data);
7794 
7795 				orig = data = RREG32(MISC_CLK_CNTL);
7796 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7797 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7798 				if (orig != data)
7799 					WREG32(MISC_CLK_CNTL, data);
7800 
7801 				orig = data = RREG32(CG_CLKPIN_CNTL);
7802 				data &= ~BCLK_AS_XCLK;
7803 				if (orig != data)
7804 					WREG32(CG_CLKPIN_CNTL, data);
7805 
7806 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7807 				data &= ~FORCE_BIF_REFCLK_EN;
7808 				if (orig != data)
7809 					WREG32(CG_CLKPIN_CNTL_2, data);
7810 
7811 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7812 				data &= ~MPLL_CLKOUT_SEL_MASK;
7813 				data |= MPLL_CLKOUT_SEL(4);
7814 				if (orig != data)
7815 					WREG32(MPLL_BYPASSCLK_SEL, data);
7816 
7817 				orig = data = RREG32(SPLL_CNTL_MODE);
7818 				data &= ~SPLL_REFCLK_SEL_MASK;
7819 				if (orig != data)
7820 					WREG32(SPLL_CNTL_MODE, data);
7821 			}
7822 		}
7823 	} else {
7824 		if (orig != data)
7825 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7826 	}
7827 
7828 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7829 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7830 	if (orig != data)
7831 		WREG32_PCIE(PCIE_CNTL2, data);
7832 
7833 	if (!disable_l0s) {
7834 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7835 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7836 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7837 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7838 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7839 				data &= ~LC_L0S_INACTIVITY_MASK;
7840 				if (orig != data)
7841 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7842 			}
7843 		}
7844 	}
7845 }
7846 
7847 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7848 {
7849 	unsigned i;
7850 
7851 	/* make sure VCEPLL_CTLREQ is deasserted */
7852 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7853 
7854 	mdelay(10);
7855 
7856 	/* assert UPLL_CTLREQ */
7857 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7858 
7859 	/* wait for CTLACK and CTLACK2 to get asserted */
7860 	for (i = 0; i < 100; ++i) {
7861 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7862 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7863 			break;
7864 		mdelay(10);
7865 	}
7866 
7867 	/* deassert UPLL_CTLREQ */
7868 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7869 
7870 	if (i == 100) {
7871 		DRM_ERROR("Timeout setting UVD clocks!\n");
7872 		return -ETIMEDOUT;
7873 	}
7874 
7875 	return 0;
7876 }
7877 
7878 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7879 {
7880 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7881 	int r;
7882 
7883 	/* bypass evclk and ecclk with bclk */
7884 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7885 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7886 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7887 
7888 	/* put PLL in bypass mode */
7889 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7890 		     ~VCEPLL_BYPASS_EN_MASK);
7891 
7892 	if (!evclk || !ecclk) {
7893 		/* keep the Bypass mode, put PLL to sleep */
7894 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7895 			     ~VCEPLL_SLEEP_MASK);
7896 		return 0;
7897 	}
7898 
7899 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7900 					  16384, 0x03FFFFFF, 0, 128, 5,
7901 					  &fb_div, &evclk_div, &ecclk_div);
7902 	if (r)
7903 		return r;
7904 
7905 	/* set RESET_ANTI_MUX to 0 */
7906 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7907 
7908 	/* set VCO_MODE to 1 */
7909 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7910 		     ~VCEPLL_VCO_MODE_MASK);
7911 
7912 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7913 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7914 		     ~VCEPLL_SLEEP_MASK);
7915 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7916 
7917 	/* deassert VCEPLL_RESET */
7918 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7919 
7920 	mdelay(1);
7921 
7922 	r = si_vce_send_vcepll_ctlreq(rdev);
7923 	if (r)
7924 		return r;
7925 
7926 	/* assert VCEPLL_RESET again */
7927 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7928 
7929 	/* disable spread spectrum. */
7930 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7931 
7932 	/* set feedback divider */
7933 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7934 
7935 	/* set ref divider to 0 */
7936 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7937 
7938 	/* set PDIV_A and PDIV_B */
7939 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7940 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7941 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7942 
7943 	/* give the PLL some time to settle */
7944 	mdelay(15);
7945 
7946 	/* deassert PLL_RESET */
7947 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7948 
7949 	mdelay(15);
7950 
7951 	/* switch from bypass mode to normal mode */
7952 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7953 
7954 	r = si_vce_send_vcepll_ctlreq(rdev);
7955 	if (r)
7956 		return r;
7957 
7958 	/* switch VCLK and DCLK selection */
7959 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7960 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7961 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7962 
7963 	mdelay(100);
7964 
7965 	return 0;
7966 }
7967