xref: /dragonfly/sys/dev/drm/radeon/si.c (revision edf2e657)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "radeon_audio.h"
30 #include <uapi_drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
45 
46 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
47 MODULE_FIRMWARE("radeon/tahiti_me.bin");
48 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
49 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
50 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
52 
53 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
60 
61 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
62 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
67 
68 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
69 MODULE_FIRMWARE("radeon/VERDE_me.bin");
70 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
71 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
72 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
73 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/verde_pfp.bin");
77 MODULE_FIRMWARE("radeon/verde_me.bin");
78 MODULE_FIRMWARE("radeon/verde_ce.bin");
79 MODULE_FIRMWARE("radeon/verde_mc.bin");
80 MODULE_FIRMWARE("radeon/verde_rlc.bin");
81 MODULE_FIRMWARE("radeon/verde_smc.bin");
82 
83 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
84 MODULE_FIRMWARE("radeon/OLAND_me.bin");
85 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
86 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
87 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
88 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
89 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
90 
91 MODULE_FIRMWARE("radeon/oland_pfp.bin");
92 MODULE_FIRMWARE("radeon/oland_me.bin");
93 MODULE_FIRMWARE("radeon/oland_ce.bin");
94 MODULE_FIRMWARE("radeon/oland_mc.bin");
95 MODULE_FIRMWARE("radeon/oland_rlc.bin");
96 MODULE_FIRMWARE("radeon/oland_smc.bin");
97 
98 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
99 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
105 
106 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
107 MODULE_FIRMWARE("radeon/hainan_me.bin");
108 MODULE_FIRMWARE("radeon/hainan_ce.bin");
109 MODULE_FIRMWARE("radeon/hainan_mc.bin");
110 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
111 MODULE_FIRMWARE("radeon/hainan_smc.bin");
112 
113 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
114 static void si_pcie_gen3_enable(struct radeon_device *rdev);
115 static void si_program_aspm(struct radeon_device *rdev);
116 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
117 					 bool enable);
118 static void si_init_pg(struct radeon_device *rdev);
119 static void si_init_cg(struct radeon_device *rdev);
120 static void si_fini_pg(struct radeon_device *rdev);
121 static void si_fini_cg(struct radeon_device *rdev);
122 static void si_rlc_stop(struct radeon_device *rdev);
123 
124 int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev);
125 
126 static const u32 verde_rlc_save_restore_register_list[] =
127 {
128 	(0x8000 << 16) | (0x98f4 >> 2),
129 	0x00000000,
130 	(0x8040 << 16) | (0x98f4 >> 2),
131 	0x00000000,
132 	(0x8000 << 16) | (0xe80 >> 2),
133 	0x00000000,
134 	(0x8040 << 16) | (0xe80 >> 2),
135 	0x00000000,
136 	(0x8000 << 16) | (0x89bc >> 2),
137 	0x00000000,
138 	(0x8040 << 16) | (0x89bc >> 2),
139 	0x00000000,
140 	(0x8000 << 16) | (0x8c1c >> 2),
141 	0x00000000,
142 	(0x8040 << 16) | (0x8c1c >> 2),
143 	0x00000000,
144 	(0x9c00 << 16) | (0x98f0 >> 2),
145 	0x00000000,
146 	(0x9c00 << 16) | (0xe7c >> 2),
147 	0x00000000,
148 	(0x8000 << 16) | (0x9148 >> 2),
149 	0x00000000,
150 	(0x8040 << 16) | (0x9148 >> 2),
151 	0x00000000,
152 	(0x9c00 << 16) | (0x9150 >> 2),
153 	0x00000000,
154 	(0x9c00 << 16) | (0x897c >> 2),
155 	0x00000000,
156 	(0x9c00 << 16) | (0x8d8c >> 2),
157 	0x00000000,
158 	(0x9c00 << 16) | (0xac54 >> 2),
159 	0X00000000,
160 	0x3,
161 	(0x9c00 << 16) | (0x98f8 >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x9910 >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x9914 >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0x9918 >> 2),
168 	0x00000000,
169 	(0x9c00 << 16) | (0x991c >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x9920 >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x9924 >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x9928 >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x992c >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9930 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9934 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x9938 >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x993c >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9940 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9944 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x9948 >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x994c >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9950 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9954 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x9958 >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x995c >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9960 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9964 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x9968 >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x996c >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9970 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9974 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9978 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x997c >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9980 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9984 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x9988 >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x998c >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x8c00 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x8c14 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x8c04 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x8c08 >> 2),
234 	0x00000000,
235 	(0x8000 << 16) | (0x9b7c >> 2),
236 	0x00000000,
237 	(0x8040 << 16) | (0x9b7c >> 2),
238 	0x00000000,
239 	(0x8000 << 16) | (0xe84 >> 2),
240 	0x00000000,
241 	(0x8040 << 16) | (0xe84 >> 2),
242 	0x00000000,
243 	(0x8000 << 16) | (0x89c0 >> 2),
244 	0x00000000,
245 	(0x8040 << 16) | (0x89c0 >> 2),
246 	0x00000000,
247 	(0x8000 << 16) | (0x914c >> 2),
248 	0x00000000,
249 	(0x8040 << 16) | (0x914c >> 2),
250 	0x00000000,
251 	(0x8000 << 16) | (0x8c20 >> 2),
252 	0x00000000,
253 	(0x8040 << 16) | (0x8c20 >> 2),
254 	0x00000000,
255 	(0x8000 << 16) | (0x9354 >> 2),
256 	0x00000000,
257 	(0x8040 << 16) | (0x9354 >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0x9060 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0x9364 >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x9100 >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0x913c >> 2),
266 	0x00000000,
267 	(0x8000 << 16) | (0x90e0 >> 2),
268 	0x00000000,
269 	(0x8000 << 16) | (0x90e4 >> 2),
270 	0x00000000,
271 	(0x8000 << 16) | (0x90e8 >> 2),
272 	0x00000000,
273 	(0x8040 << 16) | (0x90e0 >> 2),
274 	0x00000000,
275 	(0x8040 << 16) | (0x90e4 >> 2),
276 	0x00000000,
277 	(0x8040 << 16) | (0x90e8 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x8bcc >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x8b24 >> 2),
282 	0x00000000,
283 	(0x9c00 << 16) | (0x88c4 >> 2),
284 	0x00000000,
285 	(0x9c00 << 16) | (0x8e50 >> 2),
286 	0x00000000,
287 	(0x9c00 << 16) | (0x8c0c >> 2),
288 	0x00000000,
289 	(0x9c00 << 16) | (0x8e58 >> 2),
290 	0x00000000,
291 	(0x9c00 << 16) | (0x8e5c >> 2),
292 	0x00000000,
293 	(0x9c00 << 16) | (0x9508 >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x950c >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0x9494 >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0xac0c >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0xac10 >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0xac14 >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0xae00 >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0xac08 >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x88d4 >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0x88c8 >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0x88cc >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0x89b0 >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0x8b10 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0x8a14 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0x9830 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x9834 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x9838 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x9a10 >> 2),
328 	0x00000000,
329 	(0x8000 << 16) | (0x9870 >> 2),
330 	0x00000000,
331 	(0x8000 << 16) | (0x9874 >> 2),
332 	0x00000000,
333 	(0x8001 << 16) | (0x9870 >> 2),
334 	0x00000000,
335 	(0x8001 << 16) | (0x9874 >> 2),
336 	0x00000000,
337 	(0x8040 << 16) | (0x9870 >> 2),
338 	0x00000000,
339 	(0x8040 << 16) | (0x9874 >> 2),
340 	0x00000000,
341 	(0x8041 << 16) | (0x9870 >> 2),
342 	0x00000000,
343 	(0x8041 << 16) | (0x9874 >> 2),
344 	0x00000000,
345 	0x00000000
346 };
347 
348 static const u32 tahiti_golden_rlc_registers[] =
349 {
350 	0xc424, 0xffffffff, 0x00601005,
351 	0xc47c, 0xffffffff, 0x10104040,
352 	0xc488, 0xffffffff, 0x0100000a,
353 	0xc314, 0xffffffff, 0x00000800,
354 	0xc30c, 0xffffffff, 0x800000f4,
355 	0xf4a8, 0xffffffff, 0x00000000
356 };
357 
358 static const u32 tahiti_golden_registers[] =
359 {
360 	0x9a10, 0x00010000, 0x00018208,
361 	0x9830, 0xffffffff, 0x00000000,
362 	0x9834, 0xf00fffff, 0x00000400,
363 	0x9838, 0x0002021c, 0x00020200,
364 	0xc78, 0x00000080, 0x00000000,
365 	0xd030, 0x000300c0, 0x00800040,
366 	0xd830, 0x000300c0, 0x00800040,
367 	0x5bb0, 0x000000f0, 0x00000070,
368 	0x5bc0, 0x00200000, 0x50100000,
369 	0x7030, 0x31000311, 0x00000011,
370 	0x277c, 0x00000003, 0x000007ff,
371 	0x240c, 0x000007ff, 0x00000000,
372 	0x8a14, 0xf000001f, 0x00000007,
373 	0x8b24, 0xffffffff, 0x00ffffff,
374 	0x8b10, 0x0000ff0f, 0x00000000,
375 	0x28a4c, 0x07ffffff, 0x4e000000,
376 	0x28350, 0x3f3f3fff, 0x2a00126a,
377 	0x30, 0x000000ff, 0x0040,
378 	0x34, 0x00000040, 0x00004040,
379 	0x9100, 0x07ffffff, 0x03000000,
380 	0x8e88, 0x01ff1f3f, 0x00000000,
381 	0x8e84, 0x01ff1f3f, 0x00000000,
382 	0x9060, 0x0000007f, 0x00000020,
383 	0x9508, 0x00010000, 0x00010000,
384 	0xac14, 0x00000200, 0x000002fb,
385 	0xac10, 0xffffffff, 0x0000543b,
386 	0xac0c, 0xffffffff, 0xa9210876,
387 	0x88d0, 0xffffffff, 0x000fff40,
388 	0x88d4, 0x0000001f, 0x00000010,
389 	0x1410, 0x20000000, 0x20fffed8,
390 	0x15c0, 0x000c0fc0, 0x000c0400
391 };
392 
393 static const u32 tahiti_golden_registers2[] =
394 {
395 	0xc64, 0x00000001, 0x00000001
396 };
397 
398 static const u32 pitcairn_golden_rlc_registers[] =
399 {
400 	0xc424, 0xffffffff, 0x00601004,
401 	0xc47c, 0xffffffff, 0x10102020,
402 	0xc488, 0xffffffff, 0x01000020,
403 	0xc314, 0xffffffff, 0x00000800,
404 	0xc30c, 0xffffffff, 0x800000a4
405 };
406 
407 static const u32 pitcairn_golden_registers[] =
408 {
409 	0x9a10, 0x00010000, 0x00018208,
410 	0x9830, 0xffffffff, 0x00000000,
411 	0x9834, 0xf00fffff, 0x00000400,
412 	0x9838, 0x0002021c, 0x00020200,
413 	0xc78, 0x00000080, 0x00000000,
414 	0xd030, 0x000300c0, 0x00800040,
415 	0xd830, 0x000300c0, 0x00800040,
416 	0x5bb0, 0x000000f0, 0x00000070,
417 	0x5bc0, 0x00200000, 0x50100000,
418 	0x7030, 0x31000311, 0x00000011,
419 	0x2ae4, 0x00073ffe, 0x000022a2,
420 	0x240c, 0x000007ff, 0x00000000,
421 	0x8a14, 0xf000001f, 0x00000007,
422 	0x8b24, 0xffffffff, 0x00ffffff,
423 	0x8b10, 0x0000ff0f, 0x00000000,
424 	0x28a4c, 0x07ffffff, 0x4e000000,
425 	0x28350, 0x3f3f3fff, 0x2a00126a,
426 	0x30, 0x000000ff, 0x0040,
427 	0x34, 0x00000040, 0x00004040,
428 	0x9100, 0x07ffffff, 0x03000000,
429 	0x9060, 0x0000007f, 0x00000020,
430 	0x9508, 0x00010000, 0x00010000,
431 	0xac14, 0x000003ff, 0x000000f7,
432 	0xac10, 0xffffffff, 0x00000000,
433 	0xac0c, 0xffffffff, 0x32761054,
434 	0x88d4, 0x0000001f, 0x00000010,
435 	0x15c0, 0x000c0fc0, 0x000c0400
436 };
437 
438 static const u32 verde_golden_rlc_registers[] =
439 {
440 	0xc424, 0xffffffff, 0x033f1005,
441 	0xc47c, 0xffffffff, 0x10808020,
442 	0xc488, 0xffffffff, 0x00800008,
443 	0xc314, 0xffffffff, 0x00001000,
444 	0xc30c, 0xffffffff, 0x80010014
445 };
446 
447 static const u32 verde_golden_registers[] =
448 {
449 	0x9a10, 0x00010000, 0x00018208,
450 	0x9830, 0xffffffff, 0x00000000,
451 	0x9834, 0xf00fffff, 0x00000400,
452 	0x9838, 0x0002021c, 0x00020200,
453 	0xc78, 0x00000080, 0x00000000,
454 	0xd030, 0x000300c0, 0x00800040,
455 	0xd030, 0x000300c0, 0x00800040,
456 	0xd830, 0x000300c0, 0x00800040,
457 	0xd830, 0x000300c0, 0x00800040,
458 	0x5bb0, 0x000000f0, 0x00000070,
459 	0x5bc0, 0x00200000, 0x50100000,
460 	0x7030, 0x31000311, 0x00000011,
461 	0x2ae4, 0x00073ffe, 0x000022a2,
462 	0x2ae4, 0x00073ffe, 0x000022a2,
463 	0x2ae4, 0x00073ffe, 0x000022a2,
464 	0x240c, 0x000007ff, 0x00000000,
465 	0x240c, 0x000007ff, 0x00000000,
466 	0x240c, 0x000007ff, 0x00000000,
467 	0x8a14, 0xf000001f, 0x00000007,
468 	0x8a14, 0xf000001f, 0x00000007,
469 	0x8a14, 0xf000001f, 0x00000007,
470 	0x8b24, 0xffffffff, 0x00ffffff,
471 	0x8b10, 0x0000ff0f, 0x00000000,
472 	0x28a4c, 0x07ffffff, 0x4e000000,
473 	0x28350, 0x3f3f3fff, 0x0000124a,
474 	0x28350, 0x3f3f3fff, 0x0000124a,
475 	0x28350, 0x3f3f3fff, 0x0000124a,
476 	0x30, 0x000000ff, 0x0040,
477 	0x34, 0x00000040, 0x00004040,
478 	0x9100, 0x07ffffff, 0x03000000,
479 	0x9100, 0x07ffffff, 0x03000000,
480 	0x8e88, 0x01ff1f3f, 0x00000000,
481 	0x8e88, 0x01ff1f3f, 0x00000000,
482 	0x8e88, 0x01ff1f3f, 0x00000000,
483 	0x8e84, 0x01ff1f3f, 0x00000000,
484 	0x8e84, 0x01ff1f3f, 0x00000000,
485 	0x8e84, 0x01ff1f3f, 0x00000000,
486 	0x9060, 0x0000007f, 0x00000020,
487 	0x9508, 0x00010000, 0x00010000,
488 	0xac14, 0x000003ff, 0x00000003,
489 	0xac14, 0x000003ff, 0x00000003,
490 	0xac14, 0x000003ff, 0x00000003,
491 	0xac10, 0xffffffff, 0x00000000,
492 	0xac10, 0xffffffff, 0x00000000,
493 	0xac10, 0xffffffff, 0x00000000,
494 	0xac0c, 0xffffffff, 0x00001032,
495 	0xac0c, 0xffffffff, 0x00001032,
496 	0xac0c, 0xffffffff, 0x00001032,
497 	0x88d4, 0x0000001f, 0x00000010,
498 	0x88d4, 0x0000001f, 0x00000010,
499 	0x88d4, 0x0000001f, 0x00000010,
500 	0x15c0, 0x000c0fc0, 0x000c0400
501 };
502 
503 static const u32 oland_golden_rlc_registers[] =
504 {
505 	0xc424, 0xffffffff, 0x00601005,
506 	0xc47c, 0xffffffff, 0x10104040,
507 	0xc488, 0xffffffff, 0x0100000a,
508 	0xc314, 0xffffffff, 0x00000800,
509 	0xc30c, 0xffffffff, 0x800000f4
510 };
511 
512 static const u32 oland_golden_registers[] =
513 {
514 	0x9a10, 0x00010000, 0x00018208,
515 	0x9830, 0xffffffff, 0x00000000,
516 	0x9834, 0xf00fffff, 0x00000400,
517 	0x9838, 0x0002021c, 0x00020200,
518 	0xc78, 0x00000080, 0x00000000,
519 	0xd030, 0x000300c0, 0x00800040,
520 	0xd830, 0x000300c0, 0x00800040,
521 	0x5bb0, 0x000000f0, 0x00000070,
522 	0x5bc0, 0x00200000, 0x50100000,
523 	0x7030, 0x31000311, 0x00000011,
524 	0x2ae4, 0x00073ffe, 0x000022a2,
525 	0x240c, 0x000007ff, 0x00000000,
526 	0x8a14, 0xf000001f, 0x00000007,
527 	0x8b24, 0xffffffff, 0x00ffffff,
528 	0x8b10, 0x0000ff0f, 0x00000000,
529 	0x28a4c, 0x07ffffff, 0x4e000000,
530 	0x28350, 0x3f3f3fff, 0x00000082,
531 	0x30, 0x000000ff, 0x0040,
532 	0x34, 0x00000040, 0x00004040,
533 	0x9100, 0x07ffffff, 0x03000000,
534 	0x9060, 0x0000007f, 0x00000020,
535 	0x9508, 0x00010000, 0x00010000,
536 	0xac14, 0x000003ff, 0x000000f3,
537 	0xac10, 0xffffffff, 0x00000000,
538 	0xac0c, 0xffffffff, 0x00003210,
539 	0x88d4, 0x0000001f, 0x00000010,
540 	0x15c0, 0x000c0fc0, 0x000c0400
541 };
542 
543 static const u32 hainan_golden_registers[] =
544 {
545 	0x9a10, 0x00010000, 0x00018208,
546 	0x9830, 0xffffffff, 0x00000000,
547 	0x9834, 0xf00fffff, 0x00000400,
548 	0x9838, 0x0002021c, 0x00020200,
549 	0xd0c0, 0xff000fff, 0x00000100,
550 	0xd030, 0x000300c0, 0x00800040,
551 	0xd8c0, 0xff000fff, 0x00000100,
552 	0xd830, 0x000300c0, 0x00800040,
553 	0x2ae4, 0x00073ffe, 0x000022a2,
554 	0x240c, 0x000007ff, 0x00000000,
555 	0x8a14, 0xf000001f, 0x00000007,
556 	0x8b24, 0xffffffff, 0x00ffffff,
557 	0x8b10, 0x0000ff0f, 0x00000000,
558 	0x28a4c, 0x07ffffff, 0x4e000000,
559 	0x28350, 0x3f3f3fff, 0x00000000,
560 	0x30, 0x000000ff, 0x0040,
561 	0x34, 0x00000040, 0x00004040,
562 	0x9100, 0x03e00000, 0x03600000,
563 	0x9060, 0x0000007f, 0x00000020,
564 	0x9508, 0x00010000, 0x00010000,
565 	0xac14, 0x000003ff, 0x000000f1,
566 	0xac10, 0xffffffff, 0x00000000,
567 	0xac0c, 0xffffffff, 0x00003210,
568 	0x88d4, 0x0000001f, 0x00000010,
569 	0x15c0, 0x000c0fc0, 0x000c0400
570 };
571 
572 static const u32 hainan_golden_registers2[] =
573 {
574 	0x98f8, 0xffffffff, 0x02010001
575 };
576 
577 static const u32 tahiti_mgcg_cgcg_init[] =
578 {
579 	0xc400, 0xffffffff, 0xfffffffc,
580 	0x802c, 0xffffffff, 0xe0000000,
581 	0x9a60, 0xffffffff, 0x00000100,
582 	0x92a4, 0xffffffff, 0x00000100,
583 	0xc164, 0xffffffff, 0x00000100,
584 	0x9774, 0xffffffff, 0x00000100,
585 	0x8984, 0xffffffff, 0x06000100,
586 	0x8a18, 0xffffffff, 0x00000100,
587 	0x92a0, 0xffffffff, 0x00000100,
588 	0xc380, 0xffffffff, 0x00000100,
589 	0x8b28, 0xffffffff, 0x00000100,
590 	0x9144, 0xffffffff, 0x00000100,
591 	0x8d88, 0xffffffff, 0x00000100,
592 	0x8d8c, 0xffffffff, 0x00000100,
593 	0x9030, 0xffffffff, 0x00000100,
594 	0x9034, 0xffffffff, 0x00000100,
595 	0x9038, 0xffffffff, 0x00000100,
596 	0x903c, 0xffffffff, 0x00000100,
597 	0xad80, 0xffffffff, 0x00000100,
598 	0xac54, 0xffffffff, 0x00000100,
599 	0x897c, 0xffffffff, 0x06000100,
600 	0x9868, 0xffffffff, 0x00000100,
601 	0x9510, 0xffffffff, 0x00000100,
602 	0xaf04, 0xffffffff, 0x00000100,
603 	0xae04, 0xffffffff, 0x00000100,
604 	0x949c, 0xffffffff, 0x00000100,
605 	0x802c, 0xffffffff, 0xe0000000,
606 	0x9160, 0xffffffff, 0x00010000,
607 	0x9164, 0xffffffff, 0x00030002,
608 	0x9168, 0xffffffff, 0x00040007,
609 	0x916c, 0xffffffff, 0x00060005,
610 	0x9170, 0xffffffff, 0x00090008,
611 	0x9174, 0xffffffff, 0x00020001,
612 	0x9178, 0xffffffff, 0x00040003,
613 	0x917c, 0xffffffff, 0x00000007,
614 	0x9180, 0xffffffff, 0x00060005,
615 	0x9184, 0xffffffff, 0x00090008,
616 	0x9188, 0xffffffff, 0x00030002,
617 	0x918c, 0xffffffff, 0x00050004,
618 	0x9190, 0xffffffff, 0x00000008,
619 	0x9194, 0xffffffff, 0x00070006,
620 	0x9198, 0xffffffff, 0x000a0009,
621 	0x919c, 0xffffffff, 0x00040003,
622 	0x91a0, 0xffffffff, 0x00060005,
623 	0x91a4, 0xffffffff, 0x00000009,
624 	0x91a8, 0xffffffff, 0x00080007,
625 	0x91ac, 0xffffffff, 0x000b000a,
626 	0x91b0, 0xffffffff, 0x00050004,
627 	0x91b4, 0xffffffff, 0x00070006,
628 	0x91b8, 0xffffffff, 0x0008000b,
629 	0x91bc, 0xffffffff, 0x000a0009,
630 	0x91c0, 0xffffffff, 0x000d000c,
631 	0x91c4, 0xffffffff, 0x00060005,
632 	0x91c8, 0xffffffff, 0x00080007,
633 	0x91cc, 0xffffffff, 0x0000000b,
634 	0x91d0, 0xffffffff, 0x000a0009,
635 	0x91d4, 0xffffffff, 0x000d000c,
636 	0x91d8, 0xffffffff, 0x00070006,
637 	0x91dc, 0xffffffff, 0x00090008,
638 	0x91e0, 0xffffffff, 0x0000000c,
639 	0x91e4, 0xffffffff, 0x000b000a,
640 	0x91e8, 0xffffffff, 0x000e000d,
641 	0x91ec, 0xffffffff, 0x00080007,
642 	0x91f0, 0xffffffff, 0x000a0009,
643 	0x91f4, 0xffffffff, 0x0000000d,
644 	0x91f8, 0xffffffff, 0x000c000b,
645 	0x91fc, 0xffffffff, 0x000f000e,
646 	0x9200, 0xffffffff, 0x00090008,
647 	0x9204, 0xffffffff, 0x000b000a,
648 	0x9208, 0xffffffff, 0x000c000f,
649 	0x920c, 0xffffffff, 0x000e000d,
650 	0x9210, 0xffffffff, 0x00110010,
651 	0x9214, 0xffffffff, 0x000a0009,
652 	0x9218, 0xffffffff, 0x000c000b,
653 	0x921c, 0xffffffff, 0x0000000f,
654 	0x9220, 0xffffffff, 0x000e000d,
655 	0x9224, 0xffffffff, 0x00110010,
656 	0x9228, 0xffffffff, 0x000b000a,
657 	0x922c, 0xffffffff, 0x000d000c,
658 	0x9230, 0xffffffff, 0x00000010,
659 	0x9234, 0xffffffff, 0x000f000e,
660 	0x9238, 0xffffffff, 0x00120011,
661 	0x923c, 0xffffffff, 0x000c000b,
662 	0x9240, 0xffffffff, 0x000e000d,
663 	0x9244, 0xffffffff, 0x00000011,
664 	0x9248, 0xffffffff, 0x0010000f,
665 	0x924c, 0xffffffff, 0x00130012,
666 	0x9250, 0xffffffff, 0x000d000c,
667 	0x9254, 0xffffffff, 0x000f000e,
668 	0x9258, 0xffffffff, 0x00100013,
669 	0x925c, 0xffffffff, 0x00120011,
670 	0x9260, 0xffffffff, 0x00150014,
671 	0x9264, 0xffffffff, 0x000e000d,
672 	0x9268, 0xffffffff, 0x0010000f,
673 	0x926c, 0xffffffff, 0x00000013,
674 	0x9270, 0xffffffff, 0x00120011,
675 	0x9274, 0xffffffff, 0x00150014,
676 	0x9278, 0xffffffff, 0x000f000e,
677 	0x927c, 0xffffffff, 0x00110010,
678 	0x9280, 0xffffffff, 0x00000014,
679 	0x9284, 0xffffffff, 0x00130012,
680 	0x9288, 0xffffffff, 0x00160015,
681 	0x928c, 0xffffffff, 0x0010000f,
682 	0x9290, 0xffffffff, 0x00120011,
683 	0x9294, 0xffffffff, 0x00000015,
684 	0x9298, 0xffffffff, 0x00140013,
685 	0x929c, 0xffffffff, 0x00170016,
686 	0x9150, 0xffffffff, 0x96940200,
687 	0x8708, 0xffffffff, 0x00900100,
688 	0xc478, 0xffffffff, 0x00000080,
689 	0xc404, 0xffffffff, 0x0020003f,
690 	0x30, 0xffffffff, 0x0000001c,
691 	0x34, 0x000f0000, 0x000f0000,
692 	0x160c, 0xffffffff, 0x00000100,
693 	0x1024, 0xffffffff, 0x00000100,
694 	0x102c, 0x00000101, 0x00000000,
695 	0x20a8, 0xffffffff, 0x00000104,
696 	0x264c, 0x000c0000, 0x000c0000,
697 	0x2648, 0x000c0000, 0x000c0000,
698 	0x55e4, 0xff000fff, 0x00000100,
699 	0x55e8, 0x00000001, 0x00000001,
700 	0x2f50, 0x00000001, 0x00000001,
701 	0x30cc, 0xc0000fff, 0x00000104,
702 	0xc1e4, 0x00000001, 0x00000001,
703 	0xd0c0, 0xfffffff0, 0x00000100,
704 	0xd8c0, 0xfffffff0, 0x00000100
705 };
706 
707 static const u32 pitcairn_mgcg_cgcg_init[] =
708 {
709 	0xc400, 0xffffffff, 0xfffffffc,
710 	0x802c, 0xffffffff, 0xe0000000,
711 	0x9a60, 0xffffffff, 0x00000100,
712 	0x92a4, 0xffffffff, 0x00000100,
713 	0xc164, 0xffffffff, 0x00000100,
714 	0x9774, 0xffffffff, 0x00000100,
715 	0x8984, 0xffffffff, 0x06000100,
716 	0x8a18, 0xffffffff, 0x00000100,
717 	0x92a0, 0xffffffff, 0x00000100,
718 	0xc380, 0xffffffff, 0x00000100,
719 	0x8b28, 0xffffffff, 0x00000100,
720 	0x9144, 0xffffffff, 0x00000100,
721 	0x8d88, 0xffffffff, 0x00000100,
722 	0x8d8c, 0xffffffff, 0x00000100,
723 	0x9030, 0xffffffff, 0x00000100,
724 	0x9034, 0xffffffff, 0x00000100,
725 	0x9038, 0xffffffff, 0x00000100,
726 	0x903c, 0xffffffff, 0x00000100,
727 	0xad80, 0xffffffff, 0x00000100,
728 	0xac54, 0xffffffff, 0x00000100,
729 	0x897c, 0xffffffff, 0x06000100,
730 	0x9868, 0xffffffff, 0x00000100,
731 	0x9510, 0xffffffff, 0x00000100,
732 	0xaf04, 0xffffffff, 0x00000100,
733 	0xae04, 0xffffffff, 0x00000100,
734 	0x949c, 0xffffffff, 0x00000100,
735 	0x802c, 0xffffffff, 0xe0000000,
736 	0x9160, 0xffffffff, 0x00010000,
737 	0x9164, 0xffffffff, 0x00030002,
738 	0x9168, 0xffffffff, 0x00040007,
739 	0x916c, 0xffffffff, 0x00060005,
740 	0x9170, 0xffffffff, 0x00090008,
741 	0x9174, 0xffffffff, 0x00020001,
742 	0x9178, 0xffffffff, 0x00040003,
743 	0x917c, 0xffffffff, 0x00000007,
744 	0x9180, 0xffffffff, 0x00060005,
745 	0x9184, 0xffffffff, 0x00090008,
746 	0x9188, 0xffffffff, 0x00030002,
747 	0x918c, 0xffffffff, 0x00050004,
748 	0x9190, 0xffffffff, 0x00000008,
749 	0x9194, 0xffffffff, 0x00070006,
750 	0x9198, 0xffffffff, 0x000a0009,
751 	0x919c, 0xffffffff, 0x00040003,
752 	0x91a0, 0xffffffff, 0x00060005,
753 	0x91a4, 0xffffffff, 0x00000009,
754 	0x91a8, 0xffffffff, 0x00080007,
755 	0x91ac, 0xffffffff, 0x000b000a,
756 	0x91b0, 0xffffffff, 0x00050004,
757 	0x91b4, 0xffffffff, 0x00070006,
758 	0x91b8, 0xffffffff, 0x0008000b,
759 	0x91bc, 0xffffffff, 0x000a0009,
760 	0x91c0, 0xffffffff, 0x000d000c,
761 	0x9200, 0xffffffff, 0x00090008,
762 	0x9204, 0xffffffff, 0x000b000a,
763 	0x9208, 0xffffffff, 0x000c000f,
764 	0x920c, 0xffffffff, 0x000e000d,
765 	0x9210, 0xffffffff, 0x00110010,
766 	0x9214, 0xffffffff, 0x000a0009,
767 	0x9218, 0xffffffff, 0x000c000b,
768 	0x921c, 0xffffffff, 0x0000000f,
769 	0x9220, 0xffffffff, 0x000e000d,
770 	0x9224, 0xffffffff, 0x00110010,
771 	0x9228, 0xffffffff, 0x000b000a,
772 	0x922c, 0xffffffff, 0x000d000c,
773 	0x9230, 0xffffffff, 0x00000010,
774 	0x9234, 0xffffffff, 0x000f000e,
775 	0x9238, 0xffffffff, 0x00120011,
776 	0x923c, 0xffffffff, 0x000c000b,
777 	0x9240, 0xffffffff, 0x000e000d,
778 	0x9244, 0xffffffff, 0x00000011,
779 	0x9248, 0xffffffff, 0x0010000f,
780 	0x924c, 0xffffffff, 0x00130012,
781 	0x9250, 0xffffffff, 0x000d000c,
782 	0x9254, 0xffffffff, 0x000f000e,
783 	0x9258, 0xffffffff, 0x00100013,
784 	0x925c, 0xffffffff, 0x00120011,
785 	0x9260, 0xffffffff, 0x00150014,
786 	0x9150, 0xffffffff, 0x96940200,
787 	0x8708, 0xffffffff, 0x00900100,
788 	0xc478, 0xffffffff, 0x00000080,
789 	0xc404, 0xffffffff, 0x0020003f,
790 	0x30, 0xffffffff, 0x0000001c,
791 	0x34, 0x000f0000, 0x000f0000,
792 	0x160c, 0xffffffff, 0x00000100,
793 	0x1024, 0xffffffff, 0x00000100,
794 	0x102c, 0x00000101, 0x00000000,
795 	0x20a8, 0xffffffff, 0x00000104,
796 	0x55e4, 0xff000fff, 0x00000100,
797 	0x55e8, 0x00000001, 0x00000001,
798 	0x2f50, 0x00000001, 0x00000001,
799 	0x30cc, 0xc0000fff, 0x00000104,
800 	0xc1e4, 0x00000001, 0x00000001,
801 	0xd0c0, 0xfffffff0, 0x00000100,
802 	0xd8c0, 0xfffffff0, 0x00000100
803 };
804 
805 static const u32 verde_mgcg_cgcg_init[] =
806 {
807 	0xc400, 0xffffffff, 0xfffffffc,
808 	0x802c, 0xffffffff, 0xe0000000,
809 	0x9a60, 0xffffffff, 0x00000100,
810 	0x92a4, 0xffffffff, 0x00000100,
811 	0xc164, 0xffffffff, 0x00000100,
812 	0x9774, 0xffffffff, 0x00000100,
813 	0x8984, 0xffffffff, 0x06000100,
814 	0x8a18, 0xffffffff, 0x00000100,
815 	0x92a0, 0xffffffff, 0x00000100,
816 	0xc380, 0xffffffff, 0x00000100,
817 	0x8b28, 0xffffffff, 0x00000100,
818 	0x9144, 0xffffffff, 0x00000100,
819 	0x8d88, 0xffffffff, 0x00000100,
820 	0x8d8c, 0xffffffff, 0x00000100,
821 	0x9030, 0xffffffff, 0x00000100,
822 	0x9034, 0xffffffff, 0x00000100,
823 	0x9038, 0xffffffff, 0x00000100,
824 	0x903c, 0xffffffff, 0x00000100,
825 	0xad80, 0xffffffff, 0x00000100,
826 	0xac54, 0xffffffff, 0x00000100,
827 	0x897c, 0xffffffff, 0x06000100,
828 	0x9868, 0xffffffff, 0x00000100,
829 	0x9510, 0xffffffff, 0x00000100,
830 	0xaf04, 0xffffffff, 0x00000100,
831 	0xae04, 0xffffffff, 0x00000100,
832 	0x949c, 0xffffffff, 0x00000100,
833 	0x802c, 0xffffffff, 0xe0000000,
834 	0x9160, 0xffffffff, 0x00010000,
835 	0x9164, 0xffffffff, 0x00030002,
836 	0x9168, 0xffffffff, 0x00040007,
837 	0x916c, 0xffffffff, 0x00060005,
838 	0x9170, 0xffffffff, 0x00090008,
839 	0x9174, 0xffffffff, 0x00020001,
840 	0x9178, 0xffffffff, 0x00040003,
841 	0x917c, 0xffffffff, 0x00000007,
842 	0x9180, 0xffffffff, 0x00060005,
843 	0x9184, 0xffffffff, 0x00090008,
844 	0x9188, 0xffffffff, 0x00030002,
845 	0x918c, 0xffffffff, 0x00050004,
846 	0x9190, 0xffffffff, 0x00000008,
847 	0x9194, 0xffffffff, 0x00070006,
848 	0x9198, 0xffffffff, 0x000a0009,
849 	0x919c, 0xffffffff, 0x00040003,
850 	0x91a0, 0xffffffff, 0x00060005,
851 	0x91a4, 0xffffffff, 0x00000009,
852 	0x91a8, 0xffffffff, 0x00080007,
853 	0x91ac, 0xffffffff, 0x000b000a,
854 	0x91b0, 0xffffffff, 0x00050004,
855 	0x91b4, 0xffffffff, 0x00070006,
856 	0x91b8, 0xffffffff, 0x0008000b,
857 	0x91bc, 0xffffffff, 0x000a0009,
858 	0x91c0, 0xffffffff, 0x000d000c,
859 	0x9200, 0xffffffff, 0x00090008,
860 	0x9204, 0xffffffff, 0x000b000a,
861 	0x9208, 0xffffffff, 0x000c000f,
862 	0x920c, 0xffffffff, 0x000e000d,
863 	0x9210, 0xffffffff, 0x00110010,
864 	0x9214, 0xffffffff, 0x000a0009,
865 	0x9218, 0xffffffff, 0x000c000b,
866 	0x921c, 0xffffffff, 0x0000000f,
867 	0x9220, 0xffffffff, 0x000e000d,
868 	0x9224, 0xffffffff, 0x00110010,
869 	0x9228, 0xffffffff, 0x000b000a,
870 	0x922c, 0xffffffff, 0x000d000c,
871 	0x9230, 0xffffffff, 0x00000010,
872 	0x9234, 0xffffffff, 0x000f000e,
873 	0x9238, 0xffffffff, 0x00120011,
874 	0x923c, 0xffffffff, 0x000c000b,
875 	0x9240, 0xffffffff, 0x000e000d,
876 	0x9244, 0xffffffff, 0x00000011,
877 	0x9248, 0xffffffff, 0x0010000f,
878 	0x924c, 0xffffffff, 0x00130012,
879 	0x9250, 0xffffffff, 0x000d000c,
880 	0x9254, 0xffffffff, 0x000f000e,
881 	0x9258, 0xffffffff, 0x00100013,
882 	0x925c, 0xffffffff, 0x00120011,
883 	0x9260, 0xffffffff, 0x00150014,
884 	0x9150, 0xffffffff, 0x96940200,
885 	0x8708, 0xffffffff, 0x00900100,
886 	0xc478, 0xffffffff, 0x00000080,
887 	0xc404, 0xffffffff, 0x0020003f,
888 	0x30, 0xffffffff, 0x0000001c,
889 	0x34, 0x000f0000, 0x000f0000,
890 	0x160c, 0xffffffff, 0x00000100,
891 	0x1024, 0xffffffff, 0x00000100,
892 	0x102c, 0x00000101, 0x00000000,
893 	0x20a8, 0xffffffff, 0x00000104,
894 	0x264c, 0x000c0000, 0x000c0000,
895 	0x2648, 0x000c0000, 0x000c0000,
896 	0x55e4, 0xff000fff, 0x00000100,
897 	0x55e8, 0x00000001, 0x00000001,
898 	0x2f50, 0x00000001, 0x00000001,
899 	0x30cc, 0xc0000fff, 0x00000104,
900 	0xc1e4, 0x00000001, 0x00000001,
901 	0xd0c0, 0xfffffff0, 0x00000100,
902 	0xd8c0, 0xfffffff0, 0x00000100
903 };
904 
905 static const u32 oland_mgcg_cgcg_init[] =
906 {
907 	0xc400, 0xffffffff, 0xfffffffc,
908 	0x802c, 0xffffffff, 0xe0000000,
909 	0x9a60, 0xffffffff, 0x00000100,
910 	0x92a4, 0xffffffff, 0x00000100,
911 	0xc164, 0xffffffff, 0x00000100,
912 	0x9774, 0xffffffff, 0x00000100,
913 	0x8984, 0xffffffff, 0x06000100,
914 	0x8a18, 0xffffffff, 0x00000100,
915 	0x92a0, 0xffffffff, 0x00000100,
916 	0xc380, 0xffffffff, 0x00000100,
917 	0x8b28, 0xffffffff, 0x00000100,
918 	0x9144, 0xffffffff, 0x00000100,
919 	0x8d88, 0xffffffff, 0x00000100,
920 	0x8d8c, 0xffffffff, 0x00000100,
921 	0x9030, 0xffffffff, 0x00000100,
922 	0x9034, 0xffffffff, 0x00000100,
923 	0x9038, 0xffffffff, 0x00000100,
924 	0x903c, 0xffffffff, 0x00000100,
925 	0xad80, 0xffffffff, 0x00000100,
926 	0xac54, 0xffffffff, 0x00000100,
927 	0x897c, 0xffffffff, 0x06000100,
928 	0x9868, 0xffffffff, 0x00000100,
929 	0x9510, 0xffffffff, 0x00000100,
930 	0xaf04, 0xffffffff, 0x00000100,
931 	0xae04, 0xffffffff, 0x00000100,
932 	0x949c, 0xffffffff, 0x00000100,
933 	0x802c, 0xffffffff, 0xe0000000,
934 	0x9160, 0xffffffff, 0x00010000,
935 	0x9164, 0xffffffff, 0x00030002,
936 	0x9168, 0xffffffff, 0x00040007,
937 	0x916c, 0xffffffff, 0x00060005,
938 	0x9170, 0xffffffff, 0x00090008,
939 	0x9174, 0xffffffff, 0x00020001,
940 	0x9178, 0xffffffff, 0x00040003,
941 	0x917c, 0xffffffff, 0x00000007,
942 	0x9180, 0xffffffff, 0x00060005,
943 	0x9184, 0xffffffff, 0x00090008,
944 	0x9188, 0xffffffff, 0x00030002,
945 	0x918c, 0xffffffff, 0x00050004,
946 	0x9190, 0xffffffff, 0x00000008,
947 	0x9194, 0xffffffff, 0x00070006,
948 	0x9198, 0xffffffff, 0x000a0009,
949 	0x919c, 0xffffffff, 0x00040003,
950 	0x91a0, 0xffffffff, 0x00060005,
951 	0x91a4, 0xffffffff, 0x00000009,
952 	0x91a8, 0xffffffff, 0x00080007,
953 	0x91ac, 0xffffffff, 0x000b000a,
954 	0x91b0, 0xffffffff, 0x00050004,
955 	0x91b4, 0xffffffff, 0x00070006,
956 	0x91b8, 0xffffffff, 0x0008000b,
957 	0x91bc, 0xffffffff, 0x000a0009,
958 	0x91c0, 0xffffffff, 0x000d000c,
959 	0x91c4, 0xffffffff, 0x00060005,
960 	0x91c8, 0xffffffff, 0x00080007,
961 	0x91cc, 0xffffffff, 0x0000000b,
962 	0x91d0, 0xffffffff, 0x000a0009,
963 	0x91d4, 0xffffffff, 0x000d000c,
964 	0x9150, 0xffffffff, 0x96940200,
965 	0x8708, 0xffffffff, 0x00900100,
966 	0xc478, 0xffffffff, 0x00000080,
967 	0xc404, 0xffffffff, 0x0020003f,
968 	0x30, 0xffffffff, 0x0000001c,
969 	0x34, 0x000f0000, 0x000f0000,
970 	0x160c, 0xffffffff, 0x00000100,
971 	0x1024, 0xffffffff, 0x00000100,
972 	0x102c, 0x00000101, 0x00000000,
973 	0x20a8, 0xffffffff, 0x00000104,
974 	0x264c, 0x000c0000, 0x000c0000,
975 	0x2648, 0x000c0000, 0x000c0000,
976 	0x55e4, 0xff000fff, 0x00000100,
977 	0x55e8, 0x00000001, 0x00000001,
978 	0x2f50, 0x00000001, 0x00000001,
979 	0x30cc, 0xc0000fff, 0x00000104,
980 	0xc1e4, 0x00000001, 0x00000001,
981 	0xd0c0, 0xfffffff0, 0x00000100,
982 	0xd8c0, 0xfffffff0, 0x00000100
983 };
984 
985 static const u32 hainan_mgcg_cgcg_init[] =
986 {
987 	0xc400, 0xffffffff, 0xfffffffc,
988 	0x802c, 0xffffffff, 0xe0000000,
989 	0x9a60, 0xffffffff, 0x00000100,
990 	0x92a4, 0xffffffff, 0x00000100,
991 	0xc164, 0xffffffff, 0x00000100,
992 	0x9774, 0xffffffff, 0x00000100,
993 	0x8984, 0xffffffff, 0x06000100,
994 	0x8a18, 0xffffffff, 0x00000100,
995 	0x92a0, 0xffffffff, 0x00000100,
996 	0xc380, 0xffffffff, 0x00000100,
997 	0x8b28, 0xffffffff, 0x00000100,
998 	0x9144, 0xffffffff, 0x00000100,
999 	0x8d88, 0xffffffff, 0x00000100,
1000 	0x8d8c, 0xffffffff, 0x00000100,
1001 	0x9030, 0xffffffff, 0x00000100,
1002 	0x9034, 0xffffffff, 0x00000100,
1003 	0x9038, 0xffffffff, 0x00000100,
1004 	0x903c, 0xffffffff, 0x00000100,
1005 	0xad80, 0xffffffff, 0x00000100,
1006 	0xac54, 0xffffffff, 0x00000100,
1007 	0x897c, 0xffffffff, 0x06000100,
1008 	0x9868, 0xffffffff, 0x00000100,
1009 	0x9510, 0xffffffff, 0x00000100,
1010 	0xaf04, 0xffffffff, 0x00000100,
1011 	0xae04, 0xffffffff, 0x00000100,
1012 	0x949c, 0xffffffff, 0x00000100,
1013 	0x802c, 0xffffffff, 0xe0000000,
1014 	0x9160, 0xffffffff, 0x00010000,
1015 	0x9164, 0xffffffff, 0x00030002,
1016 	0x9168, 0xffffffff, 0x00040007,
1017 	0x916c, 0xffffffff, 0x00060005,
1018 	0x9170, 0xffffffff, 0x00090008,
1019 	0x9174, 0xffffffff, 0x00020001,
1020 	0x9178, 0xffffffff, 0x00040003,
1021 	0x917c, 0xffffffff, 0x00000007,
1022 	0x9180, 0xffffffff, 0x00060005,
1023 	0x9184, 0xffffffff, 0x00090008,
1024 	0x9188, 0xffffffff, 0x00030002,
1025 	0x918c, 0xffffffff, 0x00050004,
1026 	0x9190, 0xffffffff, 0x00000008,
1027 	0x9194, 0xffffffff, 0x00070006,
1028 	0x9198, 0xffffffff, 0x000a0009,
1029 	0x919c, 0xffffffff, 0x00040003,
1030 	0x91a0, 0xffffffff, 0x00060005,
1031 	0x91a4, 0xffffffff, 0x00000009,
1032 	0x91a8, 0xffffffff, 0x00080007,
1033 	0x91ac, 0xffffffff, 0x000b000a,
1034 	0x91b0, 0xffffffff, 0x00050004,
1035 	0x91b4, 0xffffffff, 0x00070006,
1036 	0x91b8, 0xffffffff, 0x0008000b,
1037 	0x91bc, 0xffffffff, 0x000a0009,
1038 	0x91c0, 0xffffffff, 0x000d000c,
1039 	0x91c4, 0xffffffff, 0x00060005,
1040 	0x91c8, 0xffffffff, 0x00080007,
1041 	0x91cc, 0xffffffff, 0x0000000b,
1042 	0x91d0, 0xffffffff, 0x000a0009,
1043 	0x91d4, 0xffffffff, 0x000d000c,
1044 	0x9150, 0xffffffff, 0x96940200,
1045 	0x8708, 0xffffffff, 0x00900100,
1046 	0xc478, 0xffffffff, 0x00000080,
1047 	0xc404, 0xffffffff, 0x0020003f,
1048 	0x30, 0xffffffff, 0x0000001c,
1049 	0x34, 0x000f0000, 0x000f0000,
1050 	0x160c, 0xffffffff, 0x00000100,
1051 	0x1024, 0xffffffff, 0x00000100,
1052 	0x20a8, 0xffffffff, 0x00000104,
1053 	0x264c, 0x000c0000, 0x000c0000,
1054 	0x2648, 0x000c0000, 0x000c0000,
1055 	0x2f50, 0x00000001, 0x00000001,
1056 	0x30cc, 0xc0000fff, 0x00000104,
1057 	0xc1e4, 0x00000001, 0x00000001,
1058 	0xd0c0, 0xfffffff0, 0x00000100,
1059 	0xd8c0, 0xfffffff0, 0x00000100
1060 };
1061 
1062 static u32 verde_pg_init[] =
1063 {
1064 	0x353c, 0xffffffff, 0x40000,
1065 	0x3538, 0xffffffff, 0x200010ff,
1066 	0x353c, 0xffffffff, 0x0,
1067 	0x353c, 0xffffffff, 0x0,
1068 	0x353c, 0xffffffff, 0x0,
1069 	0x353c, 0xffffffff, 0x0,
1070 	0x353c, 0xffffffff, 0x0,
1071 	0x353c, 0xffffffff, 0x7007,
1072 	0x3538, 0xffffffff, 0x300010ff,
1073 	0x353c, 0xffffffff, 0x0,
1074 	0x353c, 0xffffffff, 0x0,
1075 	0x353c, 0xffffffff, 0x0,
1076 	0x353c, 0xffffffff, 0x0,
1077 	0x353c, 0xffffffff, 0x0,
1078 	0x353c, 0xffffffff, 0x400000,
1079 	0x3538, 0xffffffff, 0x100010ff,
1080 	0x353c, 0xffffffff, 0x0,
1081 	0x353c, 0xffffffff, 0x0,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x120200,
1086 	0x3538, 0xffffffff, 0x500010ff,
1087 	0x353c, 0xffffffff, 0x0,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x1e1e16,
1093 	0x3538, 0xffffffff, 0x600010ff,
1094 	0x353c, 0xffffffff, 0x0,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x171f1e,
1100 	0x3538, 0xffffffff, 0x700010ff,
1101 	0x353c, 0xffffffff, 0x0,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x3538, 0xffffffff, 0x9ff,
1108 	0x3500, 0xffffffff, 0x0,
1109 	0x3504, 0xffffffff, 0x10000800,
1110 	0x3504, 0xffffffff, 0xf,
1111 	0x3504, 0xffffffff, 0xf,
1112 	0x3500, 0xffffffff, 0x4,
1113 	0x3504, 0xffffffff, 0x1000051e,
1114 	0x3504, 0xffffffff, 0xffff,
1115 	0x3504, 0xffffffff, 0xffff,
1116 	0x3500, 0xffffffff, 0x8,
1117 	0x3504, 0xffffffff, 0x80500,
1118 	0x3500, 0xffffffff, 0x12,
1119 	0x3504, 0xffffffff, 0x9050c,
1120 	0x3500, 0xffffffff, 0x1d,
1121 	0x3504, 0xffffffff, 0xb052c,
1122 	0x3500, 0xffffffff, 0x2a,
1123 	0x3504, 0xffffffff, 0x1053e,
1124 	0x3500, 0xffffffff, 0x2d,
1125 	0x3504, 0xffffffff, 0x10546,
1126 	0x3500, 0xffffffff, 0x30,
1127 	0x3504, 0xffffffff, 0xa054e,
1128 	0x3500, 0xffffffff, 0x3c,
1129 	0x3504, 0xffffffff, 0x1055f,
1130 	0x3500, 0xffffffff, 0x3f,
1131 	0x3504, 0xffffffff, 0x10567,
1132 	0x3500, 0xffffffff, 0x42,
1133 	0x3504, 0xffffffff, 0x1056f,
1134 	0x3500, 0xffffffff, 0x45,
1135 	0x3504, 0xffffffff, 0x10572,
1136 	0x3500, 0xffffffff, 0x48,
1137 	0x3504, 0xffffffff, 0x20575,
1138 	0x3500, 0xffffffff, 0x4c,
1139 	0x3504, 0xffffffff, 0x190801,
1140 	0x3500, 0xffffffff, 0x67,
1141 	0x3504, 0xffffffff, 0x1082a,
1142 	0x3500, 0xffffffff, 0x6a,
1143 	0x3504, 0xffffffff, 0x1b082d,
1144 	0x3500, 0xffffffff, 0x87,
1145 	0x3504, 0xffffffff, 0x310851,
1146 	0x3500, 0xffffffff, 0xba,
1147 	0x3504, 0xffffffff, 0x891,
1148 	0x3500, 0xffffffff, 0xbc,
1149 	0x3504, 0xffffffff, 0x893,
1150 	0x3500, 0xffffffff, 0xbe,
1151 	0x3504, 0xffffffff, 0x20895,
1152 	0x3500, 0xffffffff, 0xc2,
1153 	0x3504, 0xffffffff, 0x20899,
1154 	0x3500, 0xffffffff, 0xc6,
1155 	0x3504, 0xffffffff, 0x2089d,
1156 	0x3500, 0xffffffff, 0xca,
1157 	0x3504, 0xffffffff, 0x8a1,
1158 	0x3500, 0xffffffff, 0xcc,
1159 	0x3504, 0xffffffff, 0x8a3,
1160 	0x3500, 0xffffffff, 0xce,
1161 	0x3504, 0xffffffff, 0x308a5,
1162 	0x3500, 0xffffffff, 0xd3,
1163 	0x3504, 0xffffffff, 0x6d08cd,
1164 	0x3500, 0xffffffff, 0x142,
1165 	0x3504, 0xffffffff, 0x2000095a,
1166 	0x3504, 0xffffffff, 0x1,
1167 	0x3500, 0xffffffff, 0x144,
1168 	0x3504, 0xffffffff, 0x301f095b,
1169 	0x3500, 0xffffffff, 0x165,
1170 	0x3504, 0xffffffff, 0xc094d,
1171 	0x3500, 0xffffffff, 0x173,
1172 	0x3504, 0xffffffff, 0xf096d,
1173 	0x3500, 0xffffffff, 0x184,
1174 	0x3504, 0xffffffff, 0x15097f,
1175 	0x3500, 0xffffffff, 0x19b,
1176 	0x3504, 0xffffffff, 0xc0998,
1177 	0x3500, 0xffffffff, 0x1a9,
1178 	0x3504, 0xffffffff, 0x409a7,
1179 	0x3500, 0xffffffff, 0x1af,
1180 	0x3504, 0xffffffff, 0xcdc,
1181 	0x3500, 0xffffffff, 0x1b1,
1182 	0x3504, 0xffffffff, 0x800,
1183 	0x3508, 0xffffffff, 0x6c9b2000,
1184 	0x3510, 0xfc00, 0x2000,
1185 	0x3544, 0xffffffff, 0xfc0,
1186 	0x28d4, 0x00000100, 0x100
1187 };
1188 
1189 static void si_init_golden_registers(struct radeon_device *rdev)
1190 {
1191 	switch (rdev->family) {
1192 	case CHIP_TAHITI:
1193 		radeon_program_register_sequence(rdev,
1194 						 tahiti_golden_registers,
1195 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1196 		radeon_program_register_sequence(rdev,
1197 						 tahiti_golden_rlc_registers,
1198 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1199 		radeon_program_register_sequence(rdev,
1200 						 tahiti_mgcg_cgcg_init,
1201 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1202 		radeon_program_register_sequence(rdev,
1203 						 tahiti_golden_registers2,
1204 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1205 		break;
1206 	case CHIP_PITCAIRN:
1207 		radeon_program_register_sequence(rdev,
1208 						 pitcairn_golden_registers,
1209 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1210 		radeon_program_register_sequence(rdev,
1211 						 pitcairn_golden_rlc_registers,
1212 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1213 		radeon_program_register_sequence(rdev,
1214 						 pitcairn_mgcg_cgcg_init,
1215 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1216 		break;
1217 	case CHIP_VERDE:
1218 		radeon_program_register_sequence(rdev,
1219 						 verde_golden_registers,
1220 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1221 		radeon_program_register_sequence(rdev,
1222 						 verde_golden_rlc_registers,
1223 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1224 		radeon_program_register_sequence(rdev,
1225 						 verde_mgcg_cgcg_init,
1226 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1227 		radeon_program_register_sequence(rdev,
1228 						 verde_pg_init,
1229 						 (const u32)ARRAY_SIZE(verde_pg_init));
1230 		break;
1231 	case CHIP_OLAND:
1232 		radeon_program_register_sequence(rdev,
1233 						 oland_golden_registers,
1234 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1235 		radeon_program_register_sequence(rdev,
1236 						 oland_golden_rlc_registers,
1237 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1238 		radeon_program_register_sequence(rdev,
1239 						 oland_mgcg_cgcg_init,
1240 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1241 		break;
1242 	case CHIP_HAINAN:
1243 		radeon_program_register_sequence(rdev,
1244 						 hainan_golden_registers,
1245 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1246 		radeon_program_register_sequence(rdev,
1247 						 hainan_golden_registers2,
1248 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1249 		radeon_program_register_sequence(rdev,
1250 						 hainan_mgcg_cgcg_init,
1251 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1252 		break;
1253 	default:
1254 		break;
1255 	}
1256 }
1257 
1258 /**
1259  * si_get_allowed_info_register - fetch the register for the info ioctl
1260  *
1261  * @rdev: radeon_device pointer
1262  * @reg: register offset in bytes
1263  * @val: register value
1264  *
1265  * Returns 0 for success or -EINVAL for an invalid register
1266  *
1267  */
1268 int si_get_allowed_info_register(struct radeon_device *rdev,
1269 				 u32 reg, u32 *val)
1270 {
1271 	switch (reg) {
1272 	case GRBM_STATUS:
1273 	case GRBM_STATUS2:
1274 	case GRBM_STATUS_SE0:
1275 	case GRBM_STATUS_SE1:
1276 	case SRBM_STATUS:
1277 	case SRBM_STATUS2:
1278 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1279 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1280 	case UVD_STATUS:
1281 		*val = RREG32(reg);
1282 		return 0;
1283 	default:
1284 		return -EINVAL;
1285 	}
1286 }
1287 
1288 #define PCIE_BUS_CLK                10000
1289 #define TCLK                        (PCIE_BUS_CLK / 10)
1290 
1291 /**
1292  * si_get_xclk - get the xclk
1293  *
1294  * @rdev: radeon_device pointer
1295  *
1296  * Returns the reference clock used by the gfx engine
1297  * (SI).
1298  */
1299 u32 si_get_xclk(struct radeon_device *rdev)
1300 {
1301         u32 reference_clock = rdev->clock.spll.reference_freq;
1302 	u32 tmp;
1303 
1304 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1305 	if (tmp & MUX_TCLK_TO_XCLK)
1306 		return TCLK;
1307 
1308 	tmp = RREG32(CG_CLKPIN_CNTL);
1309 	if (tmp & XTALIN_DIVIDE)
1310 		return reference_clock / 4;
1311 
1312 	return reference_clock;
1313 }
1314 
1315 /* get temperature in millidegrees */
1316 int si_get_temp(struct radeon_device *rdev)
1317 {
1318 	u32 temp;
1319 	int actual_temp = 0;
1320 
1321 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1322 		CTF_TEMP_SHIFT;
1323 
1324 	if (temp & 0x200)
1325 		actual_temp = 255;
1326 	else
1327 		actual_temp = temp & 0x1ff;
1328 
1329 	actual_temp = (actual_temp * 1000);
1330 
1331 	return actual_temp;
1332 }
1333 
1334 #define TAHITI_IO_MC_REGS_SIZE 36
1335 
1336 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1337 	{0x0000006f, 0x03044000},
1338 	{0x00000070, 0x0480c018},
1339 	{0x00000071, 0x00000040},
1340 	{0x00000072, 0x01000000},
1341 	{0x00000074, 0x000000ff},
1342 	{0x00000075, 0x00143400},
1343 	{0x00000076, 0x08ec0800},
1344 	{0x00000077, 0x040000cc},
1345 	{0x00000079, 0x00000000},
1346 	{0x0000007a, 0x21000409},
1347 	{0x0000007c, 0x00000000},
1348 	{0x0000007d, 0xe8000000},
1349 	{0x0000007e, 0x044408a8},
1350 	{0x0000007f, 0x00000003},
1351 	{0x00000080, 0x00000000},
1352 	{0x00000081, 0x01000000},
1353 	{0x00000082, 0x02000000},
1354 	{0x00000083, 0x00000000},
1355 	{0x00000084, 0xe3f3e4f4},
1356 	{0x00000085, 0x00052024},
1357 	{0x00000087, 0x00000000},
1358 	{0x00000088, 0x66036603},
1359 	{0x00000089, 0x01000000},
1360 	{0x0000008b, 0x1c0a0000},
1361 	{0x0000008c, 0xff010000},
1362 	{0x0000008e, 0xffffefff},
1363 	{0x0000008f, 0xfff3efff},
1364 	{0x00000090, 0xfff3efbf},
1365 	{0x00000094, 0x00101101},
1366 	{0x00000095, 0x00000fff},
1367 	{0x00000096, 0x00116fff},
1368 	{0x00000097, 0x60010000},
1369 	{0x00000098, 0x10010000},
1370 	{0x00000099, 0x00006000},
1371 	{0x0000009a, 0x00001000},
1372 	{0x0000009f, 0x00a77400}
1373 };
1374 
1375 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1376 	{0x0000006f, 0x03044000},
1377 	{0x00000070, 0x0480c018},
1378 	{0x00000071, 0x00000040},
1379 	{0x00000072, 0x01000000},
1380 	{0x00000074, 0x000000ff},
1381 	{0x00000075, 0x00143400},
1382 	{0x00000076, 0x08ec0800},
1383 	{0x00000077, 0x040000cc},
1384 	{0x00000079, 0x00000000},
1385 	{0x0000007a, 0x21000409},
1386 	{0x0000007c, 0x00000000},
1387 	{0x0000007d, 0xe8000000},
1388 	{0x0000007e, 0x044408a8},
1389 	{0x0000007f, 0x00000003},
1390 	{0x00000080, 0x00000000},
1391 	{0x00000081, 0x01000000},
1392 	{0x00000082, 0x02000000},
1393 	{0x00000083, 0x00000000},
1394 	{0x00000084, 0xe3f3e4f4},
1395 	{0x00000085, 0x00052024},
1396 	{0x00000087, 0x00000000},
1397 	{0x00000088, 0x66036603},
1398 	{0x00000089, 0x01000000},
1399 	{0x0000008b, 0x1c0a0000},
1400 	{0x0000008c, 0xff010000},
1401 	{0x0000008e, 0xffffefff},
1402 	{0x0000008f, 0xfff3efff},
1403 	{0x00000090, 0xfff3efbf},
1404 	{0x00000094, 0x00101101},
1405 	{0x00000095, 0x00000fff},
1406 	{0x00000096, 0x00116fff},
1407 	{0x00000097, 0x60010000},
1408 	{0x00000098, 0x10010000},
1409 	{0x00000099, 0x00006000},
1410 	{0x0000009a, 0x00001000},
1411 	{0x0000009f, 0x00a47400}
1412 };
1413 
1414 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1415 	{0x0000006f, 0x03044000},
1416 	{0x00000070, 0x0480c018},
1417 	{0x00000071, 0x00000040},
1418 	{0x00000072, 0x01000000},
1419 	{0x00000074, 0x000000ff},
1420 	{0x00000075, 0x00143400},
1421 	{0x00000076, 0x08ec0800},
1422 	{0x00000077, 0x040000cc},
1423 	{0x00000079, 0x00000000},
1424 	{0x0000007a, 0x21000409},
1425 	{0x0000007c, 0x00000000},
1426 	{0x0000007d, 0xe8000000},
1427 	{0x0000007e, 0x044408a8},
1428 	{0x0000007f, 0x00000003},
1429 	{0x00000080, 0x00000000},
1430 	{0x00000081, 0x01000000},
1431 	{0x00000082, 0x02000000},
1432 	{0x00000083, 0x00000000},
1433 	{0x00000084, 0xe3f3e4f4},
1434 	{0x00000085, 0x00052024},
1435 	{0x00000087, 0x00000000},
1436 	{0x00000088, 0x66036603},
1437 	{0x00000089, 0x01000000},
1438 	{0x0000008b, 0x1c0a0000},
1439 	{0x0000008c, 0xff010000},
1440 	{0x0000008e, 0xffffefff},
1441 	{0x0000008f, 0xfff3efff},
1442 	{0x00000090, 0xfff3efbf},
1443 	{0x00000094, 0x00101101},
1444 	{0x00000095, 0x00000fff},
1445 	{0x00000096, 0x00116fff},
1446 	{0x00000097, 0x60010000},
1447 	{0x00000098, 0x10010000},
1448 	{0x00000099, 0x00006000},
1449 	{0x0000009a, 0x00001000},
1450 	{0x0000009f, 0x00a37400}
1451 };
1452 
1453 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1454 	{0x0000006f, 0x03044000},
1455 	{0x00000070, 0x0480c018},
1456 	{0x00000071, 0x00000040},
1457 	{0x00000072, 0x01000000},
1458 	{0x00000074, 0x000000ff},
1459 	{0x00000075, 0x00143400},
1460 	{0x00000076, 0x08ec0800},
1461 	{0x00000077, 0x040000cc},
1462 	{0x00000079, 0x00000000},
1463 	{0x0000007a, 0x21000409},
1464 	{0x0000007c, 0x00000000},
1465 	{0x0000007d, 0xe8000000},
1466 	{0x0000007e, 0x044408a8},
1467 	{0x0000007f, 0x00000003},
1468 	{0x00000080, 0x00000000},
1469 	{0x00000081, 0x01000000},
1470 	{0x00000082, 0x02000000},
1471 	{0x00000083, 0x00000000},
1472 	{0x00000084, 0xe3f3e4f4},
1473 	{0x00000085, 0x00052024},
1474 	{0x00000087, 0x00000000},
1475 	{0x00000088, 0x66036603},
1476 	{0x00000089, 0x01000000},
1477 	{0x0000008b, 0x1c0a0000},
1478 	{0x0000008c, 0xff010000},
1479 	{0x0000008e, 0xffffefff},
1480 	{0x0000008f, 0xfff3efff},
1481 	{0x00000090, 0xfff3efbf},
1482 	{0x00000094, 0x00101101},
1483 	{0x00000095, 0x00000fff},
1484 	{0x00000096, 0x00116fff},
1485 	{0x00000097, 0x60010000},
1486 	{0x00000098, 0x10010000},
1487 	{0x00000099, 0x00006000},
1488 	{0x0000009a, 0x00001000},
1489 	{0x0000009f, 0x00a17730}
1490 };
1491 
1492 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1493 	{0x0000006f, 0x03044000},
1494 	{0x00000070, 0x0480c018},
1495 	{0x00000071, 0x00000040},
1496 	{0x00000072, 0x01000000},
1497 	{0x00000074, 0x000000ff},
1498 	{0x00000075, 0x00143400},
1499 	{0x00000076, 0x08ec0800},
1500 	{0x00000077, 0x040000cc},
1501 	{0x00000079, 0x00000000},
1502 	{0x0000007a, 0x21000409},
1503 	{0x0000007c, 0x00000000},
1504 	{0x0000007d, 0xe8000000},
1505 	{0x0000007e, 0x044408a8},
1506 	{0x0000007f, 0x00000003},
1507 	{0x00000080, 0x00000000},
1508 	{0x00000081, 0x01000000},
1509 	{0x00000082, 0x02000000},
1510 	{0x00000083, 0x00000000},
1511 	{0x00000084, 0xe3f3e4f4},
1512 	{0x00000085, 0x00052024},
1513 	{0x00000087, 0x00000000},
1514 	{0x00000088, 0x66036603},
1515 	{0x00000089, 0x01000000},
1516 	{0x0000008b, 0x1c0a0000},
1517 	{0x0000008c, 0xff010000},
1518 	{0x0000008e, 0xffffefff},
1519 	{0x0000008f, 0xfff3efff},
1520 	{0x00000090, 0xfff3efbf},
1521 	{0x00000094, 0x00101101},
1522 	{0x00000095, 0x00000fff},
1523 	{0x00000096, 0x00116fff},
1524 	{0x00000097, 0x60010000},
1525 	{0x00000098, 0x10010000},
1526 	{0x00000099, 0x00006000},
1527 	{0x0000009a, 0x00001000},
1528 	{0x0000009f, 0x00a07730}
1529 };
1530 
1531 /* ucode loading */
1532 int si_mc_load_microcode(struct radeon_device *rdev)
1533 {
1534 	const __be32 *fw_data = NULL;
1535 	const __le32 *new_fw_data = NULL;
1536 	u32 running, blackout = 0;
1537 	u32 *io_mc_regs = NULL;
1538 	const __le32 *new_io_mc_regs = NULL;
1539 	int i, regs_size, ucode_size;
1540 
1541 	if (!rdev->mc_fw)
1542 		return -EINVAL;
1543 
1544 	if (rdev->new_fw) {
1545 		const struct mc_firmware_header_v1_0 *hdr =
1546 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1547 
1548 		radeon_ucode_print_mc_hdr(&hdr->header);
1549 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1550 		new_io_mc_regs = (const __le32 *)
1551 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1552 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1553 		new_fw_data = (const __le32 *)
1554 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1555 	} else {
1556 		ucode_size = rdev->mc_fw->datasize / 4;
1557 
1558 		switch (rdev->family) {
1559 		case CHIP_TAHITI:
1560 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1561 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1562 			break;
1563 		case CHIP_PITCAIRN:
1564 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1565 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1566 			break;
1567 		case CHIP_VERDE:
1568 		default:
1569 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1570 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1571 			break;
1572 		case CHIP_OLAND:
1573 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1574 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1575 			break;
1576 		case CHIP_HAINAN:
1577 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1578 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1579 			break;
1580 		}
1581 		fw_data = (const __be32 *)rdev->mc_fw->data;
1582 	}
1583 
1584 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1585 
1586 	if (running == 0) {
1587 		if (running) {
1588 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1589 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1590 		}
1591 
1592 		/* reset the engine and set to writable */
1593 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1594 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1595 
1596 		/* load mc io regs */
1597 		for (i = 0; i < regs_size; i++) {
1598 			if (rdev->new_fw) {
1599 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1600 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1601 			} else {
1602 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1603 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1604 			}
1605 		}
1606 		/* load the MC ucode */
1607 		for (i = 0; i < ucode_size; i++) {
1608 			if (rdev->new_fw)
1609 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1610 			else
1611 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1612 		}
1613 
1614 		/* put the engine back into the active state */
1615 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1616 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1617 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1618 
1619 		/* wait for training to complete */
1620 		for (i = 0; i < rdev->usec_timeout; i++) {
1621 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1622 				break;
1623 			udelay(1);
1624 		}
1625 		for (i = 0; i < rdev->usec_timeout; i++) {
1626 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1627 				break;
1628 			udelay(1);
1629 		}
1630 
1631 		if (running)
1632 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1633 	}
1634 
1635 	return 0;
1636 }
1637 
1638 static int si_init_microcode(struct radeon_device *rdev)
1639 {
1640 	const char *chip_name;
1641 	const char *new_chip_name;
1642 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1643 	size_t smc_req_size, mc2_req_size;
1644 	char fw_name[30];
1645 	int err;
1646 	int new_fw = 0;
1647 
1648 	DRM_DEBUG("\n");
1649 
1650 	switch (rdev->family) {
1651 	case CHIP_TAHITI:
1652 		chip_name = "TAHITI";
1653 		new_chip_name = "tahiti";
1654 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1655 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1656 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1657 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1658 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1659 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1660 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1661 		break;
1662 	case CHIP_PITCAIRN:
1663 		chip_name = "PITCAIRN";
1664 		new_chip_name = "pitcairn";
1665 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1666 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1667 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1668 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1669 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1670 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1671 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1672 		break;
1673 	case CHIP_VERDE:
1674 		chip_name = "VERDE";
1675 		new_chip_name = "verde";
1676 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1677 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1678 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1679 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1680 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1681 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1682 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1683 		break;
1684 	case CHIP_OLAND:
1685 		chip_name = "OLAND";
1686 		new_chip_name = "oland";
1687 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1688 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1689 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1690 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1691 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1692 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1693 		break;
1694 	case CHIP_HAINAN:
1695 		chip_name = "HAINAN";
1696 		new_chip_name = "hainan";
1697 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1698 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1699 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1700 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1701 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1702 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1703 		break;
1704 	default: BUG();
1705 	}
1706 
1707 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1708 
1709 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1710 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1711 	if (err) {
1712 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1713 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1714 		if (err)
1715 			goto out;
1716 		if (rdev->pfp_fw->datasize != pfp_req_size) {
1717 			printk(KERN_ERR
1718 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1719 			       rdev->pfp_fw->datasize, fw_name);
1720 			err = -EINVAL;
1721 			goto out;
1722 		}
1723 	} else {
1724 		err = radeon_ucode_validate(rdev->pfp_fw);
1725 		if (err) {
1726 			printk(KERN_ERR
1727 			       "si_cp: validation failed for firmware \"%s\"\n",
1728 			       fw_name);
1729 			goto out;
1730 		} else {
1731 			new_fw++;
1732 		}
1733 	}
1734 
1735 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
1736 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1737 	if (err) {
1738 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1739 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1740 		if (err)
1741 			goto out;
1742 		if (rdev->me_fw->datasize != me_req_size) {
1743 			printk(KERN_ERR
1744 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1745 			       rdev->me_fw->datasize, fw_name);
1746 			err = -EINVAL;
1747 		}
1748 	} else {
1749 		err = radeon_ucode_validate(rdev->me_fw);
1750 		if (err) {
1751 			printk(KERN_ERR
1752 			       "si_cp: validation failed for firmware \"%s\"\n",
1753 			       fw_name);
1754 			goto out;
1755 		} else {
1756 			new_fw++;
1757 		}
1758 	}
1759 
1760 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
1761 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1762 	if (err) {
1763 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1764 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1765 		if (err)
1766 			goto out;
1767 		if (rdev->ce_fw->datasize != ce_req_size) {
1768 			printk(KERN_ERR
1769 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1770 			       rdev->ce_fw->datasize, fw_name);
1771 			err = -EINVAL;
1772 		}
1773 	} else {
1774 		err = radeon_ucode_validate(rdev->ce_fw);
1775 		if (err) {
1776 			printk(KERN_ERR
1777 			       "si_cp: validation failed for firmware \"%s\"\n",
1778 			       fw_name);
1779 			goto out;
1780 		} else {
1781 			new_fw++;
1782 		}
1783 	}
1784 
1785 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
1786 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1787 	if (err) {
1788 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
1789 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1790 		if (err)
1791 			goto out;
1792 		if (rdev->rlc_fw->datasize != rlc_req_size) {
1793 			printk(KERN_ERR
1794 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1795 			       rdev->rlc_fw->datasize, fw_name);
1796 			err = -EINVAL;
1797 		}
1798 	} else {
1799 		err = radeon_ucode_validate(rdev->rlc_fw);
1800 		if (err) {
1801 			printk(KERN_ERR
1802 			       "si_cp: validation failed for firmware \"%s\"\n",
1803 			       fw_name);
1804 			goto out;
1805 		} else {
1806 			new_fw++;
1807 		}
1808 	}
1809 
1810 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
1811 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1812 	if (err) {
1813 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
1814 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1815 		if (err) {
1816 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1817 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1818 			if (err)
1819 				goto out;
1820 		}
1821 		if ((rdev->mc_fw->datasize != mc_req_size) &&
1822 		    (rdev->mc_fw->datasize != mc2_req_size)) {
1823 			printk(KERN_ERR
1824 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1825 			       rdev->mc_fw->datasize, fw_name);
1826 			err = -EINVAL;
1827 		}
1828 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
1829 	} else {
1830 		err = radeon_ucode_validate(rdev->mc_fw);
1831 		if (err) {
1832 			printk(KERN_ERR
1833 			       "si_cp: validation failed for firmware \"%s\"\n",
1834 			       fw_name);
1835 			goto out;
1836 		} else {
1837 			new_fw++;
1838 		}
1839 	}
1840 
1841 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
1842 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1843 	if (err) {
1844 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1845 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1846 		if (err) {
1847 			printk(KERN_ERR
1848 			       "smc: error loading firmware \"%s\"\n",
1849 			       fw_name);
1850 			release_firmware(rdev->smc_fw);
1851 			rdev->smc_fw = NULL;
1852 			err = 0;
1853 		} else if (rdev->smc_fw->datasize != smc_req_size) {
1854 			printk(KERN_ERR
1855 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1856 			       rdev->smc_fw->datasize, fw_name);
1857 			err = -EINVAL;
1858 		}
1859 	} else {
1860 		err = radeon_ucode_validate(rdev->smc_fw);
1861 		if (err) {
1862 			printk(KERN_ERR
1863 			       "si_cp: validation failed for firmware \"%s\"\n",
1864 			       fw_name);
1865 			goto out;
1866 		} else {
1867 			new_fw++;
1868 		}
1869 	}
1870 
1871 	if (new_fw == 0) {
1872 		rdev->new_fw = false;
1873 	} else if (new_fw < 6) {
1874 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1875 		err = -EINVAL;
1876 	} else {
1877 		rdev->new_fw = true;
1878 	}
1879 out:
1880 	if (err) {
1881 		if (err != -EINVAL)
1882 			printk(KERN_ERR
1883 			       "si_cp: Failed to load firmware \"%s\"\n",
1884 			       fw_name);
1885 		release_firmware(rdev->pfp_fw);
1886 		rdev->pfp_fw = NULL;
1887 		release_firmware(rdev->me_fw);
1888 		rdev->me_fw = NULL;
1889 		release_firmware(rdev->ce_fw);
1890 		rdev->ce_fw = NULL;
1891 		release_firmware(rdev->rlc_fw);
1892 		rdev->rlc_fw = NULL;
1893 		release_firmware(rdev->mc_fw);
1894 		rdev->mc_fw = NULL;
1895 		release_firmware(rdev->smc_fw);
1896 		rdev->smc_fw = NULL;
1897 	}
1898 	return err;
1899 }
1900 
1901 /**
1902  * si_fini_microcode - drop the firmwares image references
1903  *
1904  * @rdev: radeon_device pointer
1905  *
1906  * Drop the pfp, me, rlc, mc and ce firmware image references.
1907  * Called at driver shutdown.
1908  */
1909 static void si_fini_microcode(struct radeon_device *rdev)
1910 {
1911 	release_firmware(rdev->pfp_fw);
1912 	rdev->pfp_fw = NULL;
1913 	release_firmware(rdev->me_fw);
1914 	rdev->me_fw = NULL;
1915 	release_firmware(rdev->rlc_fw);
1916 	rdev->rlc_fw = NULL;
1917 	release_firmware(rdev->mc_fw);
1918 	rdev->mc_fw = NULL;
1919 	release_firmware(rdev->smc_fw);
1920 	rdev->smc_fw = NULL;
1921 	release_firmware(rdev->ce_fw);
1922 	rdev->ce_fw = NULL;
1923 }
1924 
1925 /* watermark setup */
1926 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1927 				   struct radeon_crtc *radeon_crtc,
1928 				   struct drm_display_mode *mode,
1929 				   struct drm_display_mode *other_mode)
1930 {
1931 	u32 tmp, buffer_alloc, i;
1932 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1933 	/*
1934 	 * Line Buffer Setup
1935 	 * There are 3 line buffers, each one shared by 2 display controllers.
1936 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1937 	 * the display controllers.  The paritioning is done via one of four
1938 	 * preset allocations specified in bits 21:20:
1939 	 *  0 - half lb
1940 	 *  2 - whole lb, other crtc must be disabled
1941 	 */
1942 	/* this can get tricky if we have two large displays on a paired group
1943 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1944 	 * non-linked crtcs for maximum line buffer allocation.
1945 	 */
1946 	if (radeon_crtc->base.enabled && mode) {
1947 		if (other_mode) {
1948 			tmp = 0; /* 1/2 */
1949 			buffer_alloc = 1;
1950 		} else {
1951 			tmp = 2; /* whole */
1952 			buffer_alloc = 2;
1953 		}
1954 	} else {
1955 		tmp = 0;
1956 		buffer_alloc = 0;
1957 	}
1958 
1959 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1960 	       DC_LB_MEMORY_CONFIG(tmp));
1961 
1962 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1963 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1964 	for (i = 0; i < rdev->usec_timeout; i++) {
1965 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1966 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1967 			break;
1968 		udelay(1);
1969 	}
1970 
1971 	if (radeon_crtc->base.enabled && mode) {
1972 		switch (tmp) {
1973 		case 0:
1974 		default:
1975 			return 4096 * 2;
1976 		case 2:
1977 			return 8192 * 2;
1978 		}
1979 	}
1980 
1981 	/* controller not enabled, so no lb used */
1982 	return 0;
1983 }
1984 
1985 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1986 {
1987 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1988 
1989 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1990 	case 0:
1991 	default:
1992 		return 1;
1993 	case 1:
1994 		return 2;
1995 	case 2:
1996 		return 4;
1997 	case 3:
1998 		return 8;
1999 	case 4:
2000 		return 3;
2001 	case 5:
2002 		return 6;
2003 	case 6:
2004 		return 10;
2005 	case 7:
2006 		return 12;
2007 	case 8:
2008 		return 16;
2009 	}
2010 }
2011 
2012 struct dce6_wm_params {
2013 	u32 dram_channels; /* number of dram channels */
2014 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2015 	u32 sclk;          /* engine clock in kHz */
2016 	u32 disp_clk;      /* display clock in kHz */
2017 	u32 src_width;     /* viewport width */
2018 	u32 active_time;   /* active display time in ns */
2019 	u32 blank_time;    /* blank time in ns */
2020 	bool interlaced;    /* mode is interlaced */
2021 	fixed20_12 vsc;    /* vertical scale ratio */
2022 	u32 num_heads;     /* number of active crtcs */
2023 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2024 	u32 lb_size;       /* line buffer allocated to pipe */
2025 	u32 vtaps;         /* vertical scaler taps */
2026 };
2027 
2028 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2029 {
2030 	/* Calculate raw DRAM Bandwidth */
2031 	fixed20_12 dram_efficiency; /* 0.7 */
2032 	fixed20_12 yclk, dram_channels, bandwidth;
2033 	fixed20_12 a;
2034 
2035 	a.full = dfixed_const(1000);
2036 	yclk.full = dfixed_const(wm->yclk);
2037 	yclk.full = dfixed_div(yclk, a);
2038 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2039 	a.full = dfixed_const(10);
2040 	dram_efficiency.full = dfixed_const(7);
2041 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2042 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2043 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2044 
2045 	return dfixed_trunc(bandwidth);
2046 }
2047 
2048 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2049 {
2050 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2051 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2052 	fixed20_12 yclk, dram_channels, bandwidth;
2053 	fixed20_12 a;
2054 
2055 	a.full = dfixed_const(1000);
2056 	yclk.full = dfixed_const(wm->yclk);
2057 	yclk.full = dfixed_div(yclk, a);
2058 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2059 	a.full = dfixed_const(10);
2060 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2061 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2062 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2063 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2064 
2065 	return dfixed_trunc(bandwidth);
2066 }
2067 
2068 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2069 {
2070 	/* Calculate the display Data return Bandwidth */
2071 	fixed20_12 return_efficiency; /* 0.8 */
2072 	fixed20_12 sclk, bandwidth;
2073 	fixed20_12 a;
2074 
2075 	a.full = dfixed_const(1000);
2076 	sclk.full = dfixed_const(wm->sclk);
2077 	sclk.full = dfixed_div(sclk, a);
2078 	a.full = dfixed_const(10);
2079 	return_efficiency.full = dfixed_const(8);
2080 	return_efficiency.full = dfixed_div(return_efficiency, a);
2081 	a.full = dfixed_const(32);
2082 	bandwidth.full = dfixed_mul(a, sclk);
2083 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2084 
2085 	return dfixed_trunc(bandwidth);
2086 }
2087 
2088 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2089 {
2090 	return 32;
2091 }
2092 
2093 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2094 {
2095 	/* Calculate the DMIF Request Bandwidth */
2096 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2097 	fixed20_12 disp_clk, sclk, bandwidth;
2098 	fixed20_12 a, b1, b2;
2099 	u32 min_bandwidth;
2100 
2101 	a.full = dfixed_const(1000);
2102 	disp_clk.full = dfixed_const(wm->disp_clk);
2103 	disp_clk.full = dfixed_div(disp_clk, a);
2104 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2105 	b1.full = dfixed_mul(a, disp_clk);
2106 
2107 	a.full = dfixed_const(1000);
2108 	sclk.full = dfixed_const(wm->sclk);
2109 	sclk.full = dfixed_div(sclk, a);
2110 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2111 	b2.full = dfixed_mul(a, sclk);
2112 
2113 	a.full = dfixed_const(10);
2114 	disp_clk_request_efficiency.full = dfixed_const(8);
2115 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2116 
2117 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2118 
2119 	a.full = dfixed_const(min_bandwidth);
2120 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2121 
2122 	return dfixed_trunc(bandwidth);
2123 }
2124 
2125 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2126 {
2127 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2128 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2129 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2130 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2131 
2132 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2133 }
2134 
2135 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2136 {
2137 	/* Calculate the display mode Average Bandwidth
2138 	 * DisplayMode should contain the source and destination dimensions,
2139 	 * timing, etc.
2140 	 */
2141 	fixed20_12 bpp;
2142 	fixed20_12 line_time;
2143 	fixed20_12 src_width;
2144 	fixed20_12 bandwidth;
2145 	fixed20_12 a;
2146 
2147 	a.full = dfixed_const(1000);
2148 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2149 	line_time.full = dfixed_div(line_time, a);
2150 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2151 	src_width.full = dfixed_const(wm->src_width);
2152 	bandwidth.full = dfixed_mul(src_width, bpp);
2153 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2154 	bandwidth.full = dfixed_div(bandwidth, line_time);
2155 
2156 	return dfixed_trunc(bandwidth);
2157 }
2158 
2159 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2160 {
2161 	/* First calcualte the latency in ns */
2162 	u32 mc_latency = 2000; /* 2000 ns. */
2163 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2164 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2165 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2166 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2167 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2168 		(wm->num_heads * cursor_line_pair_return_time);
2169 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2170 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2171 	u32 tmp, dmif_size = 12288;
2172 	fixed20_12 a, b, c;
2173 
2174 	if (wm->num_heads == 0)
2175 		return 0;
2176 
2177 	a.full = dfixed_const(2);
2178 	b.full = dfixed_const(1);
2179 	if ((wm->vsc.full > a.full) ||
2180 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2181 	    (wm->vtaps >= 5) ||
2182 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2183 		max_src_lines_per_dst_line = 4;
2184 	else
2185 		max_src_lines_per_dst_line = 2;
2186 
2187 	a.full = dfixed_const(available_bandwidth);
2188 	b.full = dfixed_const(wm->num_heads);
2189 	a.full = dfixed_div(a, b);
2190 
2191 	b.full = dfixed_const(mc_latency + 512);
2192 	c.full = dfixed_const(wm->disp_clk);
2193 	b.full = dfixed_div(b, c);
2194 
2195 	c.full = dfixed_const(dmif_size);
2196 	b.full = dfixed_div(c, b);
2197 
2198 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2199 
2200 	b.full = dfixed_const(1000);
2201 	c.full = dfixed_const(wm->disp_clk);
2202 	b.full = dfixed_div(c, b);
2203 	c.full = dfixed_const(wm->bytes_per_pixel);
2204 	b.full = dfixed_mul(b, c);
2205 
2206 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2207 
2208 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2209 	b.full = dfixed_const(1000);
2210 	c.full = dfixed_const(lb_fill_bw);
2211 	b.full = dfixed_div(c, b);
2212 	a.full = dfixed_div(a, b);
2213 	line_fill_time = dfixed_trunc(a);
2214 
2215 	if (line_fill_time < wm->active_time)
2216 		return latency;
2217 	else
2218 		return latency + (line_fill_time - wm->active_time);
2219 
2220 }
2221 
2222 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2223 {
2224 	if (dce6_average_bandwidth(wm) <=
2225 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2226 		return true;
2227 	else
2228 		return false;
2229 };
2230 
2231 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2232 {
2233 	if (dce6_average_bandwidth(wm) <=
2234 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2235 		return true;
2236 	else
2237 		return false;
2238 };
2239 
2240 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2241 {
2242 	u32 lb_partitions = wm->lb_size / wm->src_width;
2243 	u32 line_time = wm->active_time + wm->blank_time;
2244 	u32 latency_tolerant_lines;
2245 	u32 latency_hiding;
2246 	fixed20_12 a;
2247 
2248 	a.full = dfixed_const(1);
2249 	if (wm->vsc.full > a.full)
2250 		latency_tolerant_lines = 1;
2251 	else {
2252 		if (lb_partitions <= (wm->vtaps + 1))
2253 			latency_tolerant_lines = 1;
2254 		else
2255 			latency_tolerant_lines = 2;
2256 	}
2257 
2258 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2259 
2260 	if (dce6_latency_watermark(wm) <= latency_hiding)
2261 		return true;
2262 	else
2263 		return false;
2264 }
2265 
2266 static void dce6_program_watermarks(struct radeon_device *rdev,
2267 					 struct radeon_crtc *radeon_crtc,
2268 					 u32 lb_size, u32 num_heads)
2269 {
2270 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2271 	struct dce6_wm_params wm_low, wm_high;
2272 	u32 dram_channels;
2273 	u32 pixel_period;
2274 	u32 line_time = 0;
2275 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2276 	u32 priority_a_mark = 0, priority_b_mark = 0;
2277 	u32 priority_a_cnt = PRIORITY_OFF;
2278 	u32 priority_b_cnt = PRIORITY_OFF;
2279 	u32 tmp, arb_control3;
2280 	fixed20_12 a, b, c;
2281 
2282 	if (radeon_crtc->base.enabled && num_heads && mode) {
2283 		pixel_period = 1000000 / (u32)mode->clock;
2284 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2285 		priority_a_cnt = 0;
2286 		priority_b_cnt = 0;
2287 
2288 		if (rdev->family == CHIP_ARUBA)
2289 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2290 		else
2291 			dram_channels = si_get_number_of_dram_channels(rdev);
2292 
2293 		/* watermark for high clocks */
2294 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2295 			wm_high.yclk =
2296 				radeon_dpm_get_mclk(rdev, false) * 10;
2297 			wm_high.sclk =
2298 				radeon_dpm_get_sclk(rdev, false) * 10;
2299 		} else {
2300 			wm_high.yclk = rdev->pm.current_mclk * 10;
2301 			wm_high.sclk = rdev->pm.current_sclk * 10;
2302 		}
2303 
2304 		wm_high.disp_clk = mode->clock;
2305 		wm_high.src_width = mode->crtc_hdisplay;
2306 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2307 		wm_high.blank_time = line_time - wm_high.active_time;
2308 		wm_high.interlaced = false;
2309 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2310 			wm_high.interlaced = true;
2311 		wm_high.vsc = radeon_crtc->vsc;
2312 		wm_high.vtaps = 1;
2313 		if (radeon_crtc->rmx_type != RMX_OFF)
2314 			wm_high.vtaps = 2;
2315 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2316 		wm_high.lb_size = lb_size;
2317 		wm_high.dram_channels = dram_channels;
2318 		wm_high.num_heads = num_heads;
2319 
2320 		/* watermark for low clocks */
2321 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2322 			wm_low.yclk =
2323 				radeon_dpm_get_mclk(rdev, true) * 10;
2324 			wm_low.sclk =
2325 				radeon_dpm_get_sclk(rdev, true) * 10;
2326 		} else {
2327 			wm_low.yclk = rdev->pm.current_mclk * 10;
2328 			wm_low.sclk = rdev->pm.current_sclk * 10;
2329 		}
2330 
2331 		wm_low.disp_clk = mode->clock;
2332 		wm_low.src_width = mode->crtc_hdisplay;
2333 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2334 		wm_low.blank_time = line_time - wm_low.active_time;
2335 		wm_low.interlaced = false;
2336 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2337 			wm_low.interlaced = true;
2338 		wm_low.vsc = radeon_crtc->vsc;
2339 		wm_low.vtaps = 1;
2340 		if (radeon_crtc->rmx_type != RMX_OFF)
2341 			wm_low.vtaps = 2;
2342 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2343 		wm_low.lb_size = lb_size;
2344 		wm_low.dram_channels = dram_channels;
2345 		wm_low.num_heads = num_heads;
2346 
2347 		/* set for high clocks */
2348 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2349 		/* set for low clocks */
2350 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2351 
2352 		/* possibly force display priority to high */
2353 		/* should really do this at mode validation time... */
2354 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2355 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2356 		    !dce6_check_latency_hiding(&wm_high) ||
2357 		    (rdev->disp_priority == 2)) {
2358 			DRM_DEBUG_KMS("force priority to high\n");
2359 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2360 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2361 		}
2362 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2363 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2364 		    !dce6_check_latency_hiding(&wm_low) ||
2365 		    (rdev->disp_priority == 2)) {
2366 			DRM_DEBUG_KMS("force priority to high\n");
2367 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2368 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2369 		}
2370 
2371 		a.full = dfixed_const(1000);
2372 		b.full = dfixed_const(mode->clock);
2373 		b.full = dfixed_div(b, a);
2374 		c.full = dfixed_const(latency_watermark_a);
2375 		c.full = dfixed_mul(c, b);
2376 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2377 		c.full = dfixed_div(c, a);
2378 		a.full = dfixed_const(16);
2379 		c.full = dfixed_div(c, a);
2380 		priority_a_mark = dfixed_trunc(c);
2381 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2382 
2383 		a.full = dfixed_const(1000);
2384 		b.full = dfixed_const(mode->clock);
2385 		b.full = dfixed_div(b, a);
2386 		c.full = dfixed_const(latency_watermark_b);
2387 		c.full = dfixed_mul(c, b);
2388 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2389 		c.full = dfixed_div(c, a);
2390 		a.full = dfixed_const(16);
2391 		c.full = dfixed_div(c, a);
2392 		priority_b_mark = dfixed_trunc(c);
2393 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2394 
2395 		/* Save number of lines the linebuffer leads before the scanout */
2396 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2397 	}
2398 
2399 	/* select wm A */
2400 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2401 	tmp = arb_control3;
2402 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2403 	tmp |= LATENCY_WATERMARK_MASK(1);
2404 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2405 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2406 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2407 		LATENCY_HIGH_WATERMARK(line_time)));
2408 	/* select wm B */
2409 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2410 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2411 	tmp |= LATENCY_WATERMARK_MASK(2);
2412 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2413 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2414 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2415 		LATENCY_HIGH_WATERMARK(line_time)));
2416 	/* restore original selection */
2417 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2418 
2419 	/* write the priority marks */
2420 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2421 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2422 
2423 	/* save values for DPM */
2424 	radeon_crtc->line_time = line_time;
2425 	radeon_crtc->wm_high = latency_watermark_a;
2426 	radeon_crtc->wm_low = latency_watermark_b;
2427 }
2428 
2429 void dce6_bandwidth_update(struct radeon_device *rdev)
2430 {
2431 	struct drm_display_mode *mode0 = NULL;
2432 	struct drm_display_mode *mode1 = NULL;
2433 	u32 num_heads = 0, lb_size;
2434 	int i;
2435 
2436 	if (!rdev->mode_info.mode_config_initialized)
2437 		return;
2438 
2439 	radeon_update_display_priority(rdev);
2440 
2441 	for (i = 0; i < rdev->num_crtc; i++) {
2442 		if (rdev->mode_info.crtcs[i]->base.enabled)
2443 			num_heads++;
2444 	}
2445 	for (i = 0; i < rdev->num_crtc; i += 2) {
2446 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2447 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2448 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2449 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2450 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2451 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2452 	}
2453 }
2454 
2455 /*
2456  * Core functions
2457  */
2458 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2459 {
2460 	u32 *tile = rdev->config.si.tile_mode_array;
2461 	const u32 num_tile_mode_states =
2462 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2463 	u32 reg_offset, split_equal_to_row_size;
2464 
2465 	switch (rdev->config.si.mem_row_size_in_kb) {
2466 	case 1:
2467 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2468 		break;
2469 	case 2:
2470 	default:
2471 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2472 		break;
2473 	case 4:
2474 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2475 		break;
2476 	}
2477 
2478 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2479 		tile[reg_offset] = 0;
2480 
2481 	switch(rdev->family) {
2482 	case CHIP_TAHITI:
2483 	case CHIP_PITCAIRN:
2484 		/* non-AA compressed depth or any compressed stencil */
2485 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2486 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2487 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2488 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2489 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2490 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2492 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2493 		/* 2xAA/4xAA compressed depth only */
2494 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2496 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2497 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2498 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2499 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2501 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2502 		/* 8xAA compressed depth only */
2503 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2505 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2506 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2507 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2508 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2510 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2511 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2512 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2514 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2515 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2516 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2517 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2519 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2520 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2521 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2522 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2523 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2524 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2525 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2526 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2529 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2530 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2531 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2532 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2533 			   TILE_SPLIT(split_equal_to_row_size) |
2534 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2535 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2537 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2538 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2539 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2540 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2541 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2542 			   TILE_SPLIT(split_equal_to_row_size) |
2543 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2544 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2547 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2548 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2550 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2551 			   TILE_SPLIT(split_equal_to_row_size) |
2552 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2553 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2555 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2556 		/* 1D and 1D Array Surfaces */
2557 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2558 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2560 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2561 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2562 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2564 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2565 		/* Displayable maps. */
2566 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2567 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2568 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2569 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2570 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2571 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2573 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2574 		/* Display 8bpp. */
2575 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2576 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2577 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2578 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2579 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2580 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2582 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2583 		/* Display 16bpp. */
2584 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2585 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2586 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2587 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2588 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2589 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2590 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2591 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2592 		/* Display 32bpp. */
2593 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2596 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2597 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2598 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2600 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2601 		/* Thin. */
2602 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2604 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2605 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2606 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2607 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2609 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2610 		/* Thin 8 bpp. */
2611 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2612 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2613 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2614 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2615 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2616 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2618 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2619 		/* Thin 16 bpp. */
2620 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2622 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2623 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2624 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2625 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2627 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2628 		/* Thin 32 bpp. */
2629 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2630 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2631 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2632 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2633 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2634 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2636 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2637 		/* Thin 64 bpp. */
2638 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2639 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2640 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2641 			   TILE_SPLIT(split_equal_to_row_size) |
2642 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2643 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2646 		/* 8 bpp PRT. */
2647 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2648 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2649 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2650 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2651 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2652 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2653 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2654 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2655 		/* 16 bpp PRT */
2656 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2658 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2659 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2660 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2661 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2663 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2664 		/* 32 bpp PRT */
2665 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2667 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2668 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2669 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2670 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2672 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2673 		/* 64 bpp PRT */
2674 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2676 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2677 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2678 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2679 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2682 		/* 128 bpp PRT */
2683 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2684 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2685 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2686 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2687 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2688 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2690 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2691 
2692 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2693 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2694 		break;
2695 
2696 	case CHIP_VERDE:
2697 	case CHIP_OLAND:
2698 	case CHIP_HAINAN:
2699 		/* non-AA compressed depth or any compressed stencil */
2700 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2702 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2703 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2704 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2705 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2707 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2708 		/* 2xAA/4xAA compressed depth only */
2709 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2711 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2712 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2713 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2714 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2716 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2717 		/* 8xAA compressed depth only */
2718 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2719 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2720 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2721 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2722 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2723 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2725 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2726 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2727 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2728 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2729 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2731 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2732 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2734 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2735 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2736 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2737 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2738 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2740 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2741 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2742 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2743 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2744 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2745 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2747 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 			   TILE_SPLIT(split_equal_to_row_size) |
2749 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2750 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2752 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2753 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2754 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2755 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2756 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2757 			   TILE_SPLIT(split_equal_to_row_size) |
2758 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2759 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2762 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2763 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2764 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2765 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2766 			   TILE_SPLIT(split_equal_to_row_size) |
2767 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2768 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2770 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2771 		/* 1D and 1D Array Surfaces */
2772 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2773 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2774 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2775 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2776 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2777 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2779 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2780 		/* Displayable maps. */
2781 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2782 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2783 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2785 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2786 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2788 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2789 		/* Display 8bpp. */
2790 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2792 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2794 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2795 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2796 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2797 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2798 		/* Display 16bpp. */
2799 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2801 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2803 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2804 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2806 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2807 		/* Display 32bpp. */
2808 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2810 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2811 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2812 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2813 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2815 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2816 		/* Thin. */
2817 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2818 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2819 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2820 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2821 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2822 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2824 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2825 		/* Thin 8 bpp. */
2826 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2827 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2828 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2829 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2830 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2831 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2834 		/* Thin 16 bpp. */
2835 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2837 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2838 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2839 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2840 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2842 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2843 		/* Thin 32 bpp. */
2844 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2845 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2846 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2847 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2848 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2849 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2852 		/* Thin 64 bpp. */
2853 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2854 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2855 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2856 			   TILE_SPLIT(split_equal_to_row_size) |
2857 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2858 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2860 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2861 		/* 8 bpp PRT. */
2862 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2863 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2864 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2865 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2866 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2867 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2868 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2869 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2870 		/* 16 bpp PRT */
2871 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2873 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2874 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2875 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2876 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2878 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2879 		/* 32 bpp PRT */
2880 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2882 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2883 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2884 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2885 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2887 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2888 		/* 64 bpp PRT */
2889 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2891 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2892 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2893 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2894 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2895 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2896 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2897 		/* 128 bpp PRT */
2898 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2900 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2901 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2902 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2903 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2904 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2905 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2906 
2907 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2908 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2909 		break;
2910 
2911 	default:
2912 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2913 	}
2914 }
2915 
2916 static void si_select_se_sh(struct radeon_device *rdev,
2917 			    u32 se_num, u32 sh_num)
2918 {
2919 	u32 data = INSTANCE_BROADCAST_WRITES;
2920 
2921 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2922 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2923 	else if (se_num == 0xffffffff)
2924 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2925 	else if (sh_num == 0xffffffff)
2926 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2927 	else
2928 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2929 	WREG32(GRBM_GFX_INDEX, data);
2930 }
2931 
2932 static u32 si_create_bitmask(u32 bit_width)
2933 {
2934 	u32 i, mask = 0;
2935 
2936 	for (i = 0; i < bit_width; i++) {
2937 		mask <<= 1;
2938 		mask |= 1;
2939 	}
2940 	return mask;
2941 }
2942 
2943 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2944 {
2945 	u32 data, mask;
2946 
2947 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2948 	if (data & 1)
2949 		data &= INACTIVE_CUS_MASK;
2950 	else
2951 		data = 0;
2952 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2953 
2954 	data >>= INACTIVE_CUS_SHIFT;
2955 
2956 	mask = si_create_bitmask(cu_per_sh);
2957 
2958 	return ~data & mask;
2959 }
2960 
2961 static void si_setup_spi(struct radeon_device *rdev,
2962 			 u32 se_num, u32 sh_per_se,
2963 			 u32 cu_per_sh)
2964 {
2965 	int i, j, k;
2966 	u32 data, mask, active_cu;
2967 
2968 	for (i = 0; i < se_num; i++) {
2969 		for (j = 0; j < sh_per_se; j++) {
2970 			si_select_se_sh(rdev, i, j);
2971 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2972 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2973 
2974 			mask = 1;
2975 			for (k = 0; k < 16; k++) {
2976 				mask <<= k;
2977 				if (active_cu & mask) {
2978 					data &= ~mask;
2979 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2980 					break;
2981 				}
2982 			}
2983 		}
2984 	}
2985 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2986 }
2987 
2988 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2989 			      u32 max_rb_num_per_se,
2990 			      u32 sh_per_se)
2991 {
2992 	u32 data, mask;
2993 
2994 	data = RREG32(CC_RB_BACKEND_DISABLE);
2995 	if (data & 1)
2996 		data &= BACKEND_DISABLE_MASK;
2997 	else
2998 		data = 0;
2999 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3000 
3001 	data >>= BACKEND_DISABLE_SHIFT;
3002 
3003 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3004 
3005 	return data & mask;
3006 }
3007 
3008 static void si_setup_rb(struct radeon_device *rdev,
3009 			u32 se_num, u32 sh_per_se,
3010 			u32 max_rb_num_per_se)
3011 {
3012 	int i, j;
3013 	u32 data, mask;
3014 	u32 disabled_rbs = 0;
3015 	u32 enabled_rbs = 0;
3016 
3017 	for (i = 0; i < se_num; i++) {
3018 		for (j = 0; j < sh_per_se; j++) {
3019 			si_select_se_sh(rdev, i, j);
3020 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3021 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3022 		}
3023 	}
3024 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3025 
3026 	mask = 1;
3027 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3028 		if (!(disabled_rbs & mask))
3029 			enabled_rbs |= mask;
3030 		mask <<= 1;
3031 	}
3032 
3033 	rdev->config.si.backend_enable_mask = enabled_rbs;
3034 
3035 	for (i = 0; i < se_num; i++) {
3036 		si_select_se_sh(rdev, i, 0xffffffff);
3037 		data = 0;
3038 		for (j = 0; j < sh_per_se; j++) {
3039 			switch (enabled_rbs & 3) {
3040 			case 1:
3041 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3042 				break;
3043 			case 2:
3044 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3045 				break;
3046 			case 3:
3047 			default:
3048 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3049 				break;
3050 			}
3051 			enabled_rbs >>= 2;
3052 		}
3053 		WREG32(PA_SC_RASTER_CONFIG, data);
3054 	}
3055 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3056 }
3057 
3058 static void si_gpu_init(struct radeon_device *rdev)
3059 {
3060 	u32 gb_addr_config = 0;
3061 	u32 mc_shared_chmap, mc_arb_ramcfg;
3062 	u32 sx_debug_1;
3063 	u32 hdp_host_path_cntl;
3064 	u32 tmp;
3065 	int i, j;
3066 
3067 	switch (rdev->family) {
3068 	case CHIP_TAHITI:
3069 		rdev->config.si.max_shader_engines = 2;
3070 		rdev->config.si.max_tile_pipes = 12;
3071 		rdev->config.si.max_cu_per_sh = 8;
3072 		rdev->config.si.max_sh_per_se = 2;
3073 		rdev->config.si.max_backends_per_se = 4;
3074 		rdev->config.si.max_texture_channel_caches = 12;
3075 		rdev->config.si.max_gprs = 256;
3076 		rdev->config.si.max_gs_threads = 32;
3077 		rdev->config.si.max_hw_contexts = 8;
3078 
3079 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3080 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3081 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3082 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3083 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3084 		break;
3085 	case CHIP_PITCAIRN:
3086 		rdev->config.si.max_shader_engines = 2;
3087 		rdev->config.si.max_tile_pipes = 8;
3088 		rdev->config.si.max_cu_per_sh = 5;
3089 		rdev->config.si.max_sh_per_se = 2;
3090 		rdev->config.si.max_backends_per_se = 4;
3091 		rdev->config.si.max_texture_channel_caches = 8;
3092 		rdev->config.si.max_gprs = 256;
3093 		rdev->config.si.max_gs_threads = 32;
3094 		rdev->config.si.max_hw_contexts = 8;
3095 
3096 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3097 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3098 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3099 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3100 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3101 		break;
3102 	case CHIP_VERDE:
3103 	default:
3104 		rdev->config.si.max_shader_engines = 1;
3105 		rdev->config.si.max_tile_pipes = 4;
3106 		rdev->config.si.max_cu_per_sh = 5;
3107 		rdev->config.si.max_sh_per_se = 2;
3108 		rdev->config.si.max_backends_per_se = 4;
3109 		rdev->config.si.max_texture_channel_caches = 4;
3110 		rdev->config.si.max_gprs = 256;
3111 		rdev->config.si.max_gs_threads = 32;
3112 		rdev->config.si.max_hw_contexts = 8;
3113 
3114 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3115 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3116 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3117 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3118 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3119 		break;
3120 	case CHIP_OLAND:
3121 		rdev->config.si.max_shader_engines = 1;
3122 		rdev->config.si.max_tile_pipes = 4;
3123 		rdev->config.si.max_cu_per_sh = 6;
3124 		rdev->config.si.max_sh_per_se = 1;
3125 		rdev->config.si.max_backends_per_se = 2;
3126 		rdev->config.si.max_texture_channel_caches = 4;
3127 		rdev->config.si.max_gprs = 256;
3128 		rdev->config.si.max_gs_threads = 16;
3129 		rdev->config.si.max_hw_contexts = 8;
3130 
3131 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3132 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3133 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3134 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3135 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3136 		break;
3137 	case CHIP_HAINAN:
3138 		rdev->config.si.max_shader_engines = 1;
3139 		rdev->config.si.max_tile_pipes = 4;
3140 		rdev->config.si.max_cu_per_sh = 5;
3141 		rdev->config.si.max_sh_per_se = 1;
3142 		rdev->config.si.max_backends_per_se = 1;
3143 		rdev->config.si.max_texture_channel_caches = 2;
3144 		rdev->config.si.max_gprs = 256;
3145 		rdev->config.si.max_gs_threads = 16;
3146 		rdev->config.si.max_hw_contexts = 8;
3147 
3148 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3149 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3150 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3151 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3152 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3153 		break;
3154 	}
3155 
3156 	/* Initialize HDP */
3157 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3158 		WREG32((0x2c14 + j), 0x00000000);
3159 		WREG32((0x2c18 + j), 0x00000000);
3160 		WREG32((0x2c1c + j), 0x00000000);
3161 		WREG32((0x2c20 + j), 0x00000000);
3162 		WREG32((0x2c24 + j), 0x00000000);
3163 	}
3164 
3165 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3166 	WREG32(SRBM_INT_CNTL, 1);
3167 	WREG32(SRBM_INT_ACK, 1);
3168 
3169 	evergreen_fix_pci_max_read_req_size(rdev);
3170 
3171 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3172 
3173 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3174 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3175 
3176 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3177 	rdev->config.si.mem_max_burst_length_bytes = 256;
3178 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3179 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3180 	if (rdev->config.si.mem_row_size_in_kb > 4)
3181 		rdev->config.si.mem_row_size_in_kb = 4;
3182 	/* XXX use MC settings? */
3183 	rdev->config.si.shader_engine_tile_size = 32;
3184 	rdev->config.si.num_gpus = 1;
3185 	rdev->config.si.multi_gpu_tile_size = 64;
3186 
3187 	/* fix up row size */
3188 	gb_addr_config &= ~ROW_SIZE_MASK;
3189 	switch (rdev->config.si.mem_row_size_in_kb) {
3190 	case 1:
3191 	default:
3192 		gb_addr_config |= ROW_SIZE(0);
3193 		break;
3194 	case 2:
3195 		gb_addr_config |= ROW_SIZE(1);
3196 		break;
3197 	case 4:
3198 		gb_addr_config |= ROW_SIZE(2);
3199 		break;
3200 	}
3201 
3202 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3203 	 * not have bank info, so create a custom tiling dword.
3204 	 * bits 3:0   num_pipes
3205 	 * bits 7:4   num_banks
3206 	 * bits 11:8  group_size
3207 	 * bits 15:12 row_size
3208 	 */
3209 	rdev->config.si.tile_config = 0;
3210 	switch (rdev->config.si.num_tile_pipes) {
3211 	case 1:
3212 		rdev->config.si.tile_config |= (0 << 0);
3213 		break;
3214 	case 2:
3215 		rdev->config.si.tile_config |= (1 << 0);
3216 		break;
3217 	case 4:
3218 		rdev->config.si.tile_config |= (2 << 0);
3219 		break;
3220 	case 8:
3221 	default:
3222 		/* XXX what about 12? */
3223 		rdev->config.si.tile_config |= (3 << 0);
3224 		break;
3225 	}
3226 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3227 	case 0: /* four banks */
3228 		rdev->config.si.tile_config |= 0 << 4;
3229 		break;
3230 	case 1: /* eight banks */
3231 		rdev->config.si.tile_config |= 1 << 4;
3232 		break;
3233 	case 2: /* sixteen banks */
3234 	default:
3235 		rdev->config.si.tile_config |= 2 << 4;
3236 		break;
3237 	}
3238 	rdev->config.si.tile_config |=
3239 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3240 	rdev->config.si.tile_config |=
3241 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3242 
3243 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3244 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3245 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3246 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3247 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3248 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3249 	if (rdev->has_uvd) {
3250 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3251 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3252 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3253 	}
3254 
3255 	si_tiling_mode_table_init(rdev);
3256 
3257 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3258 		    rdev->config.si.max_sh_per_se,
3259 		    rdev->config.si.max_backends_per_se);
3260 
3261 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3262 		     rdev->config.si.max_sh_per_se,
3263 		     rdev->config.si.max_cu_per_sh);
3264 
3265 	rdev->config.si.active_cus = 0;
3266 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3267 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3268 			rdev->config.si.active_cus +=
3269 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3270 		}
3271 	}
3272 
3273 	/* set HW defaults for 3D engine */
3274 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3275 				     ROQ_IB2_START(0x2b)));
3276 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3277 
3278 	sx_debug_1 = RREG32(SX_DEBUG_1);
3279 	WREG32(SX_DEBUG_1, sx_debug_1);
3280 
3281 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3282 
3283 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3284 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3285 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3286 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3287 
3288 	WREG32(VGT_NUM_INSTANCES, 1);
3289 
3290 	WREG32(CP_PERFMON_CNTL, 0);
3291 
3292 	WREG32(SQ_CONFIG, 0);
3293 
3294 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3295 					  FORCE_EOV_MAX_REZ_CNT(255)));
3296 
3297 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3298 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3299 
3300 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3301 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3302 
3303 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3304 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3305 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3306 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3307 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3308 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3309 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3310 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3311 
3312 	tmp = RREG32(HDP_MISC_CNTL);
3313 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3314 	WREG32(HDP_MISC_CNTL, tmp);
3315 
3316 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3317 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3318 
3319 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3320 
3321 	udelay(50);
3322 }
3323 
3324 /*
3325  * GPU scratch registers helpers function.
3326  */
3327 static void si_scratch_init(struct radeon_device *rdev)
3328 {
3329 	int i;
3330 
3331 	rdev->scratch.num_reg = 7;
3332 	rdev->scratch.reg_base = SCRATCH_REG0;
3333 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3334 		rdev->scratch.free[i] = true;
3335 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3336 	}
3337 }
3338 
3339 void si_fence_ring_emit(struct radeon_device *rdev,
3340 			struct radeon_fence *fence)
3341 {
3342 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3343 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3344 
3345 	/* flush read cache over gart */
3346 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3347 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3348 	radeon_ring_write(ring, 0);
3349 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3350 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3351 			  PACKET3_TC_ACTION_ENA |
3352 			  PACKET3_SH_KCACHE_ACTION_ENA |
3353 			  PACKET3_SH_ICACHE_ACTION_ENA);
3354 	radeon_ring_write(ring, 0xFFFFFFFF);
3355 	radeon_ring_write(ring, 0);
3356 	radeon_ring_write(ring, 10); /* poll interval */
3357 	/* EVENT_WRITE_EOP - flush caches, send int */
3358 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3359 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3360 	radeon_ring_write(ring, lower_32_bits(addr));
3361 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3362 	radeon_ring_write(ring, fence->seq);
3363 	radeon_ring_write(ring, 0);
3364 }
3365 
3366 /*
3367  * IB stuff
3368  */
3369 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3370 {
3371 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3372 	u32 header;
3373 
3374 	if (ib->is_const_ib) {
3375 		/* set switch buffer packet before const IB */
3376 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3377 		radeon_ring_write(ring, 0);
3378 
3379 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3380 	} else {
3381 		u32 next_rptr;
3382 		if (ring->rptr_save_reg) {
3383 			next_rptr = ring->wptr + 3 + 4 + 8;
3384 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3385 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3386 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3387 			radeon_ring_write(ring, next_rptr);
3388 		} else if (rdev->wb.enabled) {
3389 			next_rptr = ring->wptr + 5 + 4 + 8;
3390 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3391 			radeon_ring_write(ring, (1 << 8));
3392 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3393 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3394 			radeon_ring_write(ring, next_rptr);
3395 		}
3396 
3397 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3398 	}
3399 
3400 	radeon_ring_write(ring, header);
3401 	radeon_ring_write(ring,
3402 #ifdef __BIG_ENDIAN
3403 			  (2 << 0) |
3404 #endif
3405 			  (ib->gpu_addr & 0xFFFFFFFC));
3406 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3407 	radeon_ring_write(ring, ib->length_dw |
3408 			  (ib->vm ? (ib->vm->id << 24) : 0));
3409 
3410 	if (!ib->is_const_ib) {
3411 		/* flush read cache over gart for this vmid */
3412 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3413 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3414 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3415 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3416 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3417 				  PACKET3_TC_ACTION_ENA |
3418 				  PACKET3_SH_KCACHE_ACTION_ENA |
3419 				  PACKET3_SH_ICACHE_ACTION_ENA);
3420 		radeon_ring_write(ring, 0xFFFFFFFF);
3421 		radeon_ring_write(ring, 0);
3422 		radeon_ring_write(ring, 10); /* poll interval */
3423 	}
3424 }
3425 
3426 /*
3427  * CP.
3428  */
3429 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3430 {
3431 	if (enable)
3432 		WREG32(CP_ME_CNTL, 0);
3433 	else {
3434 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3435 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3436 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3437 		WREG32(SCRATCH_UMSK, 0);
3438 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3439 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3440 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3441 	}
3442 	udelay(50);
3443 }
3444 
3445 static int si_cp_load_microcode(struct radeon_device *rdev)
3446 {
3447 	int i;
3448 
3449 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3450 		return -EINVAL;
3451 
3452 	si_cp_enable(rdev, false);
3453 
3454 	if (rdev->new_fw) {
3455 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3456 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3457 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3458 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3459 		const struct gfx_firmware_header_v1_0 *me_hdr =
3460 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3461 		const __le32 *fw_data;
3462 		u32 fw_size;
3463 
3464 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3465 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3466 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3467 
3468 		/* PFP */
3469 		fw_data = (const __le32 *)
3470 			((const char *)rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3471 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3472 		WREG32(CP_PFP_UCODE_ADDR, 0);
3473 		for (i = 0; i < fw_size; i++)
3474 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3475 		WREG32(CP_PFP_UCODE_ADDR, 0);
3476 
3477 		/* CE */
3478 		fw_data = (const __le32 *)
3479 			((const char *)rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3480 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3481 		WREG32(CP_CE_UCODE_ADDR, 0);
3482 		for (i = 0; i < fw_size; i++)
3483 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3484 		WREG32(CP_CE_UCODE_ADDR, 0);
3485 
3486 		/* ME */
3487 		fw_data = (const __be32 *)
3488 			((const char *)rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3489 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3490 		WREG32(CP_ME_RAM_WADDR, 0);
3491 		for (i = 0; i < fw_size; i++)
3492 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3493 		WREG32(CP_ME_RAM_WADDR, 0);
3494 	} else {
3495 		const __be32 *fw_data;
3496 
3497 		/* PFP */
3498 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3499 		WREG32(CP_PFP_UCODE_ADDR, 0);
3500 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3501 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3502 		WREG32(CP_PFP_UCODE_ADDR, 0);
3503 
3504 		/* CE */
3505 		fw_data = (const __be32 *)rdev->ce_fw->data;
3506 		WREG32(CP_CE_UCODE_ADDR, 0);
3507 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3508 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3509 		WREG32(CP_CE_UCODE_ADDR, 0);
3510 
3511 		/* ME */
3512 		fw_data = (const __be32 *)rdev->me_fw->data;
3513 		WREG32(CP_ME_RAM_WADDR, 0);
3514 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3515 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3516 		WREG32(CP_ME_RAM_WADDR, 0);
3517 	}
3518 
3519 	WREG32(CP_PFP_UCODE_ADDR, 0);
3520 	WREG32(CP_CE_UCODE_ADDR, 0);
3521 	WREG32(CP_ME_RAM_WADDR, 0);
3522 	WREG32(CP_ME_RAM_RADDR, 0);
3523 	return 0;
3524 }
3525 
3526 static int si_cp_start(struct radeon_device *rdev)
3527 {
3528 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3529 	int r, i;
3530 
3531 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3532 	if (r) {
3533 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3534 		return r;
3535 	}
3536 	/* init the CP */
3537 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3538 	radeon_ring_write(ring, 0x1);
3539 	radeon_ring_write(ring, 0x0);
3540 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3541 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3542 	radeon_ring_write(ring, 0);
3543 	radeon_ring_write(ring, 0);
3544 
3545 	/* init the CE partitions */
3546 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3547 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3548 	radeon_ring_write(ring, 0xc000);
3549 	radeon_ring_write(ring, 0xe000);
3550 	radeon_ring_unlock_commit(rdev, ring, false);
3551 
3552 	si_cp_enable(rdev, true);
3553 
3554 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3555 	if (r) {
3556 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3557 		return r;
3558 	}
3559 
3560 	/* setup clear context state */
3561 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3562 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3563 
3564 	for (i = 0; i < si_default_size; i++)
3565 		radeon_ring_write(ring, si_default_state[i]);
3566 
3567 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3568 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3569 
3570 	/* set clear context state */
3571 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3572 	radeon_ring_write(ring, 0);
3573 
3574 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3575 	radeon_ring_write(ring, 0x00000316);
3576 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3577 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3578 
3579 	radeon_ring_unlock_commit(rdev, ring, false);
3580 
3581 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3582 		ring = &rdev->ring[i];
3583 		r = radeon_ring_lock(rdev, ring, 2);
3584 
3585 		/* clear the compute context state */
3586 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3587 		radeon_ring_write(ring, 0);
3588 
3589 		radeon_ring_unlock_commit(rdev, ring, false);
3590 	}
3591 
3592 	return 0;
3593 }
3594 
3595 static void si_cp_fini(struct radeon_device *rdev)
3596 {
3597 	struct radeon_ring *ring;
3598 	si_cp_enable(rdev, false);
3599 
3600 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3601 	radeon_ring_fini(rdev, ring);
3602 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3603 
3604 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3605 	radeon_ring_fini(rdev, ring);
3606 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3607 
3608 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3609 	radeon_ring_fini(rdev, ring);
3610 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3611 }
3612 
3613 static int si_cp_resume(struct radeon_device *rdev)
3614 {
3615 	struct radeon_ring *ring;
3616 	u32 tmp;
3617 	u32 rb_bufsz;
3618 	int r;
3619 
3620 	si_enable_gui_idle_interrupt(rdev, false);
3621 
3622 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3623 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3624 
3625 	/* Set the write pointer delay */
3626 	WREG32(CP_RB_WPTR_DELAY, 0);
3627 
3628 	WREG32(CP_DEBUG, 0);
3629 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3630 
3631 	/* ring 0 - compute and gfx */
3632 	/* Set ring buffer size */
3633 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3634 	rb_bufsz = order_base_2(ring->ring_size / 8);
3635 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3636 #ifdef __BIG_ENDIAN
3637 	tmp |= BUF_SWAP_32BIT;
3638 #endif
3639 	WREG32(CP_RB0_CNTL, tmp);
3640 
3641 	/* Initialize the ring buffer's read and write pointers */
3642 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3643 	ring->wptr = 0;
3644 	WREG32(CP_RB0_WPTR, ring->wptr);
3645 
3646 	/* set the wb address whether it's enabled or not */
3647 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3648 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3649 
3650 	if (rdev->wb.enabled)
3651 		WREG32(SCRATCH_UMSK, 0xff);
3652 	else {
3653 		tmp |= RB_NO_UPDATE;
3654 		WREG32(SCRATCH_UMSK, 0);
3655 	}
3656 
3657 	mdelay(1);
3658 	WREG32(CP_RB0_CNTL, tmp);
3659 
3660 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3661 
3662 	/* ring1  - compute only */
3663 	/* Set ring buffer size */
3664 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3665 	rb_bufsz = order_base_2(ring->ring_size / 8);
3666 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3667 #ifdef __BIG_ENDIAN
3668 	tmp |= BUF_SWAP_32BIT;
3669 #endif
3670 	WREG32(CP_RB1_CNTL, tmp);
3671 
3672 	/* Initialize the ring buffer's read and write pointers */
3673 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3674 	ring->wptr = 0;
3675 	WREG32(CP_RB1_WPTR, ring->wptr);
3676 
3677 	/* set the wb address whether it's enabled or not */
3678 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3679 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3680 
3681 	mdelay(1);
3682 	WREG32(CP_RB1_CNTL, tmp);
3683 
3684 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3685 
3686 	/* ring2 - compute only */
3687 	/* Set ring buffer size */
3688 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3689 	rb_bufsz = order_base_2(ring->ring_size / 8);
3690 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3691 #ifdef __BIG_ENDIAN
3692 	tmp |= BUF_SWAP_32BIT;
3693 #endif
3694 	WREG32(CP_RB2_CNTL, tmp);
3695 
3696 	/* Initialize the ring buffer's read and write pointers */
3697 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3698 	ring->wptr = 0;
3699 	WREG32(CP_RB2_WPTR, ring->wptr);
3700 
3701 	/* set the wb address whether it's enabled or not */
3702 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3703 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3704 
3705 	mdelay(1);
3706 	WREG32(CP_RB2_CNTL, tmp);
3707 
3708 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3709 
3710 	/* start the rings */
3711 	si_cp_start(rdev);
3712 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3713 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3714 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3715 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3716 	if (r) {
3717 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3718 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3719 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3720 		return r;
3721 	}
3722 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3723 	if (r) {
3724 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3725 	}
3726 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3727 	if (r) {
3728 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3729 	}
3730 
3731 	si_enable_gui_idle_interrupt(rdev, true);
3732 
3733 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3734 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3735 
3736 	return 0;
3737 }
3738 
3739 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3740 {
3741 	u32 reset_mask = 0;
3742 	u32 tmp;
3743 
3744 	/* GRBM_STATUS */
3745 	tmp = RREG32(GRBM_STATUS);
3746 	if (tmp & (PA_BUSY | SC_BUSY |
3747 		   BCI_BUSY | SX_BUSY |
3748 		   TA_BUSY | VGT_BUSY |
3749 		   DB_BUSY | CB_BUSY |
3750 		   GDS_BUSY | SPI_BUSY |
3751 		   IA_BUSY | IA_BUSY_NO_DMA))
3752 		reset_mask |= RADEON_RESET_GFX;
3753 
3754 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3755 		   CP_BUSY | CP_COHERENCY_BUSY))
3756 		reset_mask |= RADEON_RESET_CP;
3757 
3758 	if (tmp & GRBM_EE_BUSY)
3759 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3760 
3761 	/* GRBM_STATUS2 */
3762 	tmp = RREG32(GRBM_STATUS2);
3763 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3764 		reset_mask |= RADEON_RESET_RLC;
3765 
3766 	/* DMA_STATUS_REG 0 */
3767 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3768 	if (!(tmp & DMA_IDLE))
3769 		reset_mask |= RADEON_RESET_DMA;
3770 
3771 	/* DMA_STATUS_REG 1 */
3772 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3773 	if (!(tmp & DMA_IDLE))
3774 		reset_mask |= RADEON_RESET_DMA1;
3775 
3776 	/* SRBM_STATUS2 */
3777 	tmp = RREG32(SRBM_STATUS2);
3778 	if (tmp & DMA_BUSY)
3779 		reset_mask |= RADEON_RESET_DMA;
3780 
3781 	if (tmp & DMA1_BUSY)
3782 		reset_mask |= RADEON_RESET_DMA1;
3783 
3784 	/* SRBM_STATUS */
3785 	tmp = RREG32(SRBM_STATUS);
3786 
3787 	if (tmp & IH_BUSY)
3788 		reset_mask |= RADEON_RESET_IH;
3789 
3790 	if (tmp & SEM_BUSY)
3791 		reset_mask |= RADEON_RESET_SEM;
3792 
3793 	if (tmp & GRBM_RQ_PENDING)
3794 		reset_mask |= RADEON_RESET_GRBM;
3795 
3796 	if (tmp & VMC_BUSY)
3797 		reset_mask |= RADEON_RESET_VMC;
3798 
3799 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3800 		   MCC_BUSY | MCD_BUSY))
3801 		reset_mask |= RADEON_RESET_MC;
3802 
3803 	if (evergreen_is_display_hung(rdev))
3804 		reset_mask |= RADEON_RESET_DISPLAY;
3805 
3806 	/* VM_L2_STATUS */
3807 	tmp = RREG32(VM_L2_STATUS);
3808 	if (tmp & L2_BUSY)
3809 		reset_mask |= RADEON_RESET_VMC;
3810 
3811 	/* Skip MC reset as it's mostly likely not hung, just busy */
3812 	if (reset_mask & RADEON_RESET_MC) {
3813 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3814 		reset_mask &= ~RADEON_RESET_MC;
3815 	}
3816 
3817 	return reset_mask;
3818 }
3819 
3820 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3821 {
3822 	struct evergreen_mc_save save;
3823 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3824 	u32 tmp;
3825 
3826 	if (reset_mask == 0)
3827 		return;
3828 
3829 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3830 
3831 	evergreen_print_gpu_status_regs(rdev);
3832 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3833 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3834 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3835 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3836 
3837 	/* disable PG/CG */
3838 	si_fini_pg(rdev);
3839 	si_fini_cg(rdev);
3840 
3841 	/* stop the rlc */
3842 	si_rlc_stop(rdev);
3843 
3844 	/* Disable CP parsing/prefetching */
3845 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3846 
3847 	if (reset_mask & RADEON_RESET_DMA) {
3848 		/* dma0 */
3849 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3850 		tmp &= ~DMA_RB_ENABLE;
3851 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3852 	}
3853 	if (reset_mask & RADEON_RESET_DMA1) {
3854 		/* dma1 */
3855 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3856 		tmp &= ~DMA_RB_ENABLE;
3857 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3858 	}
3859 
3860 	udelay(50);
3861 
3862 	evergreen_mc_stop(rdev, &save);
3863 	if (evergreen_mc_wait_for_idle(rdev)) {
3864 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3865 	}
3866 
3867 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3868 		grbm_soft_reset = SOFT_RESET_CB |
3869 			SOFT_RESET_DB |
3870 			SOFT_RESET_GDS |
3871 			SOFT_RESET_PA |
3872 			SOFT_RESET_SC |
3873 			SOFT_RESET_BCI |
3874 			SOFT_RESET_SPI |
3875 			SOFT_RESET_SX |
3876 			SOFT_RESET_TC |
3877 			SOFT_RESET_TA |
3878 			SOFT_RESET_VGT |
3879 			SOFT_RESET_IA;
3880 	}
3881 
3882 	if (reset_mask & RADEON_RESET_CP) {
3883 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3884 
3885 		srbm_soft_reset |= SOFT_RESET_GRBM;
3886 	}
3887 
3888 	if (reset_mask & RADEON_RESET_DMA)
3889 		srbm_soft_reset |= SOFT_RESET_DMA;
3890 
3891 	if (reset_mask & RADEON_RESET_DMA1)
3892 		srbm_soft_reset |= SOFT_RESET_DMA1;
3893 
3894 	if (reset_mask & RADEON_RESET_DISPLAY)
3895 		srbm_soft_reset |= SOFT_RESET_DC;
3896 
3897 	if (reset_mask & RADEON_RESET_RLC)
3898 		grbm_soft_reset |= SOFT_RESET_RLC;
3899 
3900 	if (reset_mask & RADEON_RESET_SEM)
3901 		srbm_soft_reset |= SOFT_RESET_SEM;
3902 
3903 	if (reset_mask & RADEON_RESET_IH)
3904 		srbm_soft_reset |= SOFT_RESET_IH;
3905 
3906 	if (reset_mask & RADEON_RESET_GRBM)
3907 		srbm_soft_reset |= SOFT_RESET_GRBM;
3908 
3909 	if (reset_mask & RADEON_RESET_VMC)
3910 		srbm_soft_reset |= SOFT_RESET_VMC;
3911 
3912 	if (reset_mask & RADEON_RESET_MC)
3913 		srbm_soft_reset |= SOFT_RESET_MC;
3914 
3915 	if (grbm_soft_reset) {
3916 		tmp = RREG32(GRBM_SOFT_RESET);
3917 		tmp |= grbm_soft_reset;
3918 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3919 		WREG32(GRBM_SOFT_RESET, tmp);
3920 		tmp = RREG32(GRBM_SOFT_RESET);
3921 
3922 		udelay(50);
3923 
3924 		tmp &= ~grbm_soft_reset;
3925 		WREG32(GRBM_SOFT_RESET, tmp);
3926 		tmp = RREG32(GRBM_SOFT_RESET);
3927 	}
3928 
3929 	if (srbm_soft_reset) {
3930 		tmp = RREG32(SRBM_SOFT_RESET);
3931 		tmp |= srbm_soft_reset;
3932 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3933 		WREG32(SRBM_SOFT_RESET, tmp);
3934 		tmp = RREG32(SRBM_SOFT_RESET);
3935 
3936 		udelay(50);
3937 
3938 		tmp &= ~srbm_soft_reset;
3939 		WREG32(SRBM_SOFT_RESET, tmp);
3940 		tmp = RREG32(SRBM_SOFT_RESET);
3941 	}
3942 
3943 	/* Wait a little for things to settle down */
3944 	udelay(50);
3945 
3946 	evergreen_mc_resume(rdev, &save);
3947 	udelay(50);
3948 
3949 	evergreen_print_gpu_status_regs(rdev);
3950 }
3951 
3952 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3953 {
3954 	u32 tmp, i;
3955 
3956 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3957 	tmp |= SPLL_BYPASS_EN;
3958 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3959 
3960 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3961 	tmp |= SPLL_CTLREQ_CHG;
3962 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3963 
3964 	for (i = 0; i < rdev->usec_timeout; i++) {
3965 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3966 			break;
3967 		udelay(1);
3968 	}
3969 
3970 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3971 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3972 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3973 
3974 	tmp = RREG32(MPLL_CNTL_MODE);
3975 	tmp &= ~MPLL_MCLK_SEL;
3976 	WREG32(MPLL_CNTL_MODE, tmp);
3977 }
3978 
3979 static void si_spll_powerdown(struct radeon_device *rdev)
3980 {
3981 	u32 tmp;
3982 
3983 	tmp = RREG32(SPLL_CNTL_MODE);
3984 	tmp |= SPLL_SW_DIR_CONTROL;
3985 	WREG32(SPLL_CNTL_MODE, tmp);
3986 
3987 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3988 	tmp |= SPLL_RESET;
3989 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3990 
3991 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3992 	tmp |= SPLL_SLEEP;
3993 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3994 
3995 	tmp = RREG32(SPLL_CNTL_MODE);
3996 	tmp &= ~SPLL_SW_DIR_CONTROL;
3997 	WREG32(SPLL_CNTL_MODE, tmp);
3998 }
3999 
4000 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4001 {
4002 	struct evergreen_mc_save save;
4003 	u32 tmp, i;
4004 
4005 	dev_info(rdev->dev, "GPU pci config reset\n");
4006 
4007 	/* disable dpm? */
4008 
4009 	/* disable cg/pg */
4010 	si_fini_pg(rdev);
4011 	si_fini_cg(rdev);
4012 
4013 	/* Disable CP parsing/prefetching */
4014 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4015 	/* dma0 */
4016 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4017 	tmp &= ~DMA_RB_ENABLE;
4018 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4019 	/* dma1 */
4020 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4021 	tmp &= ~DMA_RB_ENABLE;
4022 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4023 	/* XXX other engines? */
4024 
4025 	/* halt the rlc, disable cp internal ints */
4026 	si_rlc_stop(rdev);
4027 
4028 	udelay(50);
4029 
4030 	/* disable mem access */
4031 	evergreen_mc_stop(rdev, &save);
4032 	if (evergreen_mc_wait_for_idle(rdev)) {
4033 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4034 	}
4035 
4036 	/* set mclk/sclk to bypass */
4037 	si_set_clk_bypass_mode(rdev);
4038 	/* powerdown spll */
4039 	si_spll_powerdown(rdev);
4040 	/* disable BM */
4041 	pci_disable_busmaster(rdev->pdev->dev.bsddev);
4042 	/* reset */
4043 	radeon_pci_config_reset(rdev);
4044 	/* wait for asic to come out of reset */
4045 	for (i = 0; i < rdev->usec_timeout; i++) {
4046 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4047 			break;
4048 		udelay(1);
4049 	}
4050 }
4051 
4052 int si_asic_reset(struct radeon_device *rdev, bool hard)
4053 {
4054 	u32 reset_mask;
4055 
4056 	if (hard) {
4057 		si_gpu_pci_config_reset(rdev);
4058 		return 0;
4059 	}
4060 
4061 	reset_mask = si_gpu_check_soft_reset(rdev);
4062 
4063 	if (reset_mask)
4064 		r600_set_bios_scratch_engine_hung(rdev, true);
4065 
4066 	/* try soft reset */
4067 	si_gpu_soft_reset(rdev, reset_mask);
4068 
4069 	reset_mask = si_gpu_check_soft_reset(rdev);
4070 
4071 	/* try pci config reset */
4072 	if (reset_mask && radeon_hard_reset)
4073 		si_gpu_pci_config_reset(rdev);
4074 
4075 	reset_mask = si_gpu_check_soft_reset(rdev);
4076 
4077 	if (!reset_mask)
4078 		r600_set_bios_scratch_engine_hung(rdev, false);
4079 
4080 	return 0;
4081 }
4082 
4083 /**
4084  * si_gfx_is_lockup - Check if the GFX engine is locked up
4085  *
4086  * @rdev: radeon_device pointer
4087  * @ring: radeon_ring structure holding ring information
4088  *
4089  * Check if the GFX engine is locked up.
4090  * Returns true if the engine appears to be locked up, false if not.
4091  */
4092 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4093 {
4094 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4095 
4096 	if (!(reset_mask & (RADEON_RESET_GFX |
4097 			    RADEON_RESET_COMPUTE |
4098 			    RADEON_RESET_CP))) {
4099 		radeon_ring_lockup_update(rdev, ring);
4100 		return false;
4101 	}
4102 	return radeon_ring_test_lockup(rdev, ring);
4103 }
4104 
4105 /* MC */
4106 static void si_mc_program(struct radeon_device *rdev)
4107 {
4108 	struct evergreen_mc_save save;
4109 	u32 tmp;
4110 	int i, j;
4111 
4112 	/* Initialize HDP */
4113 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4114 		WREG32((0x2c14 + j), 0x00000000);
4115 		WREG32((0x2c18 + j), 0x00000000);
4116 		WREG32((0x2c1c + j), 0x00000000);
4117 		WREG32((0x2c20 + j), 0x00000000);
4118 		WREG32((0x2c24 + j), 0x00000000);
4119 	}
4120 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4121 
4122 	evergreen_mc_stop(rdev, &save);
4123 	if (radeon_mc_wait_for_idle(rdev)) {
4124 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4125 	}
4126 	if (!ASIC_IS_NODCE(rdev))
4127 		/* Lockout access through VGA aperture*/
4128 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4129 	/* Update configuration */
4130 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4131 	       rdev->mc.vram_start >> 12);
4132 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4133 	       rdev->mc.vram_end >> 12);
4134 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4135 	       rdev->vram_scratch.gpu_addr >> 12);
4136 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4137 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4138 	WREG32(MC_VM_FB_LOCATION, tmp);
4139 	/* XXX double check these! */
4140 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4141 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4142 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4143 	WREG32(MC_VM_AGP_BASE, 0);
4144 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4145 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4146 	if (radeon_mc_wait_for_idle(rdev)) {
4147 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4148 	}
4149 	evergreen_mc_resume(rdev, &save);
4150 	if (!ASIC_IS_NODCE(rdev)) {
4151 		/* we need to own VRAM, so turn off the VGA renderer here
4152 		 * to stop it overwriting our objects */
4153 		rv515_vga_render_disable(rdev);
4154 	}
4155 }
4156 
4157 void si_vram_gtt_location(struct radeon_device *rdev,
4158 			  struct radeon_mc *mc)
4159 {
4160 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4161 		/* leave room for at least 1024M GTT */
4162 		dev_warn(rdev->dev, "limiting VRAM\n");
4163 		mc->real_vram_size = 0xFFC0000000ULL;
4164 		mc->mc_vram_size = 0xFFC0000000ULL;
4165 	}
4166 	radeon_vram_location(rdev, &rdev->mc, 0);
4167 	rdev->mc.gtt_base_align = 0;
4168 	radeon_gtt_location(rdev, mc);
4169 }
4170 
4171 static int si_mc_init(struct radeon_device *rdev)
4172 {
4173 	u32 tmp;
4174 	int chansize, numchan;
4175 
4176 	/* Get VRAM informations */
4177 	rdev->mc.vram_is_ddr = true;
4178 	tmp = RREG32(MC_ARB_RAMCFG);
4179 	if (tmp & CHANSIZE_OVERRIDE) {
4180 		chansize = 16;
4181 	} else if (tmp & CHANSIZE_MASK) {
4182 		chansize = 64;
4183 	} else {
4184 		chansize = 32;
4185 	}
4186 	tmp = RREG32(MC_SHARED_CHMAP);
4187 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4188 	case 0:
4189 	default:
4190 		numchan = 1;
4191 		break;
4192 	case 1:
4193 		numchan = 2;
4194 		break;
4195 	case 2:
4196 		numchan = 4;
4197 		break;
4198 	case 3:
4199 		numchan = 8;
4200 		break;
4201 	case 4:
4202 		numchan = 3;
4203 		break;
4204 	case 5:
4205 		numchan = 6;
4206 		break;
4207 	case 6:
4208 		numchan = 10;
4209 		break;
4210 	case 7:
4211 		numchan = 12;
4212 		break;
4213 	case 8:
4214 		numchan = 16;
4215 		break;
4216 	}
4217 	rdev->mc.vram_width = numchan * chansize;
4218 	/* Could aper size report 0 ? */
4219 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4220 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4221 	/* size in MB on si */
4222 	tmp = RREG32(CONFIG_MEMSIZE);
4223 	/* some boards may have garbage in the upper 16 bits */
4224 	if (tmp & 0xffff0000) {
4225 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4226 		if (tmp & 0xffff)
4227 			tmp &= 0xffff;
4228 	}
4229 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4230 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4231 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4232 	si_vram_gtt_location(rdev, &rdev->mc);
4233 	radeon_update_bandwidth_info(rdev);
4234 
4235 	return 0;
4236 }
4237 
4238 /*
4239  * GART
4240  */
4241 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4242 {
4243 	/* flush hdp cache */
4244 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4245 
4246 	/* bits 0-15 are the VM contexts0-15 */
4247 	WREG32(VM_INVALIDATE_REQUEST, 1);
4248 }
4249 
4250 static int si_pcie_gart_enable(struct radeon_device *rdev)
4251 {
4252 	int r, i;
4253 
4254 	if (rdev->gart.robj == NULL) {
4255 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4256 		return -EINVAL;
4257 	}
4258 	r = radeon_gart_table_vram_pin(rdev);
4259 	if (r)
4260 		return r;
4261 	/* Setup TLB control */
4262 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4263 	       (0xA << 7) |
4264 	       ENABLE_L1_TLB |
4265 	       ENABLE_L1_FRAGMENT_PROCESSING |
4266 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4267 	       ENABLE_ADVANCED_DRIVER_MODEL |
4268 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4269 	/* Setup L2 cache */
4270 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4271 	       ENABLE_L2_FRAGMENT_PROCESSING |
4272 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4273 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4274 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4275 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4276 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4277 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4278 	       BANK_SELECT(4) |
4279 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4280 	/* setup context0 */
4281 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4282 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4283 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4284 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4285 			(u32)(rdev->dummy_page.addr >> 12));
4286 	WREG32(VM_CONTEXT0_CNTL2, 0);
4287 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4288 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4289 
4290 	WREG32(0x15D4, 0);
4291 	WREG32(0x15D8, 0);
4292 	WREG32(0x15DC, 0);
4293 
4294 	/* empty context1-15 */
4295 	/* set vm size, must be a multiple of 4 */
4296 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4297 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4298 
4299 	/* Assign the pt base to something valid for now; the pts used for
4300 	 * the VMs are determined by the application and setup and assigned
4301 	 * on the fly in the vm part of radeon_gart.c
4302 	 */
4303 	for (i = 1; i < 16; i++) {
4304 		if (i < 8)
4305 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4306 			       rdev->vm_manager.saved_table_addr[i]);
4307 		else
4308 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4309 			       rdev->vm_manager.saved_table_addr[i]);
4310 	}
4311 
4312 	/* enable context1-15 */
4313 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4314 	       (u32)(rdev->dummy_page.addr >> 12));
4315 	WREG32(VM_CONTEXT1_CNTL2, 4);
4316 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4317 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4318 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4319 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4320 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4321 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4322 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4323 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4324 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4325 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4326 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4327 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4328 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4329 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4330 
4331 	si_pcie_gart_tlb_flush(rdev);
4332 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4333 		 (unsigned)(rdev->mc.gtt_size >> 20),
4334 		 (unsigned long long)rdev->gart.table_addr);
4335 	rdev->gart.ready = true;
4336 	return 0;
4337 }
4338 
4339 static void si_pcie_gart_disable(struct radeon_device *rdev)
4340 {
4341 	unsigned i;
4342 
4343 	for (i = 1; i < 16; ++i) {
4344 		uint32_t reg;
4345 		if (i < 8)
4346 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4347 		else
4348 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4349 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4350 	}
4351 
4352 	/* Disable all tables */
4353 	WREG32(VM_CONTEXT0_CNTL, 0);
4354 	WREG32(VM_CONTEXT1_CNTL, 0);
4355 	/* Setup TLB control */
4356 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4357 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4358 	/* Setup L2 cache */
4359 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4360 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4361 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4362 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4363 	WREG32(VM_L2_CNTL2, 0);
4364 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4365 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4366 	radeon_gart_table_vram_unpin(rdev);
4367 }
4368 
4369 static void si_pcie_gart_fini(struct radeon_device *rdev)
4370 {
4371 	si_pcie_gart_disable(rdev);
4372 	radeon_gart_table_vram_free(rdev);
4373 	radeon_gart_fini(rdev);
4374 }
4375 
4376 /* vm parser */
4377 static bool si_vm_reg_valid(u32 reg)
4378 {
4379 	/* context regs are fine */
4380 	if (reg >= 0x28000)
4381 		return true;
4382 
4383 	/* shader regs are also fine */
4384 	if (reg >= 0xB000 && reg < 0xC000)
4385 		return true;
4386 
4387 	/* check config regs */
4388 	switch (reg) {
4389 	case GRBM_GFX_INDEX:
4390 	case CP_STRMOUT_CNTL:
4391 	case VGT_VTX_VECT_EJECT_REG:
4392 	case VGT_CACHE_INVALIDATION:
4393 	case VGT_ESGS_RING_SIZE:
4394 	case VGT_GSVS_RING_SIZE:
4395 	case VGT_GS_VERTEX_REUSE:
4396 	case VGT_PRIMITIVE_TYPE:
4397 	case VGT_INDEX_TYPE:
4398 	case VGT_NUM_INDICES:
4399 	case VGT_NUM_INSTANCES:
4400 	case VGT_TF_RING_SIZE:
4401 	case VGT_HS_OFFCHIP_PARAM:
4402 	case VGT_TF_MEMORY_BASE:
4403 	case PA_CL_ENHANCE:
4404 	case PA_SU_LINE_STIPPLE_VALUE:
4405 	case PA_SC_LINE_STIPPLE_STATE:
4406 	case PA_SC_ENHANCE:
4407 	case SQC_CACHES:
4408 	case SPI_STATIC_THREAD_MGMT_1:
4409 	case SPI_STATIC_THREAD_MGMT_2:
4410 	case SPI_STATIC_THREAD_MGMT_3:
4411 	case SPI_PS_MAX_WAVE_ID:
4412 	case SPI_CONFIG_CNTL:
4413 	case SPI_CONFIG_CNTL_1:
4414 	case TA_CNTL_AUX:
4415 		return true;
4416 	default:
4417 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4418 		return false;
4419 	}
4420 }
4421 
4422 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4423 				  u32 *ib, struct radeon_cs_packet *pkt)
4424 {
4425 	switch (pkt->opcode) {
4426 	case PACKET3_NOP:
4427 	case PACKET3_SET_BASE:
4428 	case PACKET3_SET_CE_DE_COUNTERS:
4429 	case PACKET3_LOAD_CONST_RAM:
4430 	case PACKET3_WRITE_CONST_RAM:
4431 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4432 	case PACKET3_DUMP_CONST_RAM:
4433 	case PACKET3_INCREMENT_CE_COUNTER:
4434 	case PACKET3_WAIT_ON_DE_COUNTER:
4435 	case PACKET3_CE_WRITE:
4436 		break;
4437 	default:
4438 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4439 		return -EINVAL;
4440 	}
4441 	return 0;
4442 }
4443 
4444 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4445 {
4446 	u32 start_reg, reg, i;
4447 	u32 command = ib[idx + 4];
4448 	u32 info = ib[idx + 1];
4449 	u32 idx_value = ib[idx];
4450 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4451 		/* src address space is register */
4452 		if (((info & 0x60000000) >> 29) == 0) {
4453 			start_reg = idx_value << 2;
4454 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4455 				reg = start_reg;
4456 				if (!si_vm_reg_valid(reg)) {
4457 					DRM_ERROR("CP DMA Bad SRC register\n");
4458 					return -EINVAL;
4459 				}
4460 			} else {
4461 				for (i = 0; i < (command & 0x1fffff); i++) {
4462 					reg = start_reg + (4 * i);
4463 					if (!si_vm_reg_valid(reg)) {
4464 						DRM_ERROR("CP DMA Bad SRC register\n");
4465 						return -EINVAL;
4466 					}
4467 				}
4468 			}
4469 		}
4470 	}
4471 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4472 		/* dst address space is register */
4473 		if (((info & 0x00300000) >> 20) == 0) {
4474 			start_reg = ib[idx + 2];
4475 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4476 				reg = start_reg;
4477 				if (!si_vm_reg_valid(reg)) {
4478 					DRM_ERROR("CP DMA Bad DST register\n");
4479 					return -EINVAL;
4480 				}
4481 			} else {
4482 				for (i = 0; i < (command & 0x1fffff); i++) {
4483 					reg = start_reg + (4 * i);
4484 				if (!si_vm_reg_valid(reg)) {
4485 						DRM_ERROR("CP DMA Bad DST register\n");
4486 						return -EINVAL;
4487 					}
4488 				}
4489 			}
4490 		}
4491 	}
4492 	return 0;
4493 }
4494 
4495 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4496 				   u32 *ib, struct radeon_cs_packet *pkt)
4497 {
4498 	int r;
4499 	u32 idx = pkt->idx + 1;
4500 	u32 idx_value = ib[idx];
4501 	u32 start_reg, end_reg, reg, i;
4502 
4503 	switch (pkt->opcode) {
4504 	case PACKET3_NOP:
4505 	case PACKET3_SET_BASE:
4506 	case PACKET3_CLEAR_STATE:
4507 	case PACKET3_INDEX_BUFFER_SIZE:
4508 	case PACKET3_DISPATCH_DIRECT:
4509 	case PACKET3_DISPATCH_INDIRECT:
4510 	case PACKET3_ALLOC_GDS:
4511 	case PACKET3_WRITE_GDS_RAM:
4512 	case PACKET3_ATOMIC_GDS:
4513 	case PACKET3_ATOMIC:
4514 	case PACKET3_OCCLUSION_QUERY:
4515 	case PACKET3_SET_PREDICATION:
4516 	case PACKET3_COND_EXEC:
4517 	case PACKET3_PRED_EXEC:
4518 	case PACKET3_DRAW_INDIRECT:
4519 	case PACKET3_DRAW_INDEX_INDIRECT:
4520 	case PACKET3_INDEX_BASE:
4521 	case PACKET3_DRAW_INDEX_2:
4522 	case PACKET3_CONTEXT_CONTROL:
4523 	case PACKET3_INDEX_TYPE:
4524 	case PACKET3_DRAW_INDIRECT_MULTI:
4525 	case PACKET3_DRAW_INDEX_AUTO:
4526 	case PACKET3_DRAW_INDEX_IMMD:
4527 	case PACKET3_NUM_INSTANCES:
4528 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4529 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4530 	case PACKET3_DRAW_INDEX_OFFSET_2:
4531 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4532 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4533 	case PACKET3_MPEG_INDEX:
4534 	case PACKET3_WAIT_REG_MEM:
4535 	case PACKET3_MEM_WRITE:
4536 	case PACKET3_PFP_SYNC_ME:
4537 	case PACKET3_SURFACE_SYNC:
4538 	case PACKET3_EVENT_WRITE:
4539 	case PACKET3_EVENT_WRITE_EOP:
4540 	case PACKET3_EVENT_WRITE_EOS:
4541 	case PACKET3_SET_CONTEXT_REG:
4542 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4543 	case PACKET3_SET_SH_REG:
4544 	case PACKET3_SET_SH_REG_OFFSET:
4545 	case PACKET3_INCREMENT_DE_COUNTER:
4546 	case PACKET3_WAIT_ON_CE_COUNTER:
4547 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4548 	case PACKET3_ME_WRITE:
4549 		break;
4550 	case PACKET3_COPY_DATA:
4551 		if ((idx_value & 0xf00) == 0) {
4552 			reg = ib[idx + 3] * 4;
4553 			if (!si_vm_reg_valid(reg))
4554 				return -EINVAL;
4555 		}
4556 		break;
4557 	case PACKET3_WRITE_DATA:
4558 		if ((idx_value & 0xf00) == 0) {
4559 			start_reg = ib[idx + 1] * 4;
4560 			if (idx_value & 0x10000) {
4561 				if (!si_vm_reg_valid(start_reg))
4562 					return -EINVAL;
4563 			} else {
4564 				for (i = 0; i < (pkt->count - 2); i++) {
4565 					reg = start_reg + (4 * i);
4566 					if (!si_vm_reg_valid(reg))
4567 						return -EINVAL;
4568 				}
4569 			}
4570 		}
4571 		break;
4572 	case PACKET3_COND_WRITE:
4573 		if (idx_value & 0x100) {
4574 			reg = ib[idx + 5] * 4;
4575 			if (!si_vm_reg_valid(reg))
4576 				return -EINVAL;
4577 		}
4578 		break;
4579 	case PACKET3_COPY_DW:
4580 		if (idx_value & 0x2) {
4581 			reg = ib[idx + 3] * 4;
4582 			if (!si_vm_reg_valid(reg))
4583 				return -EINVAL;
4584 		}
4585 		break;
4586 	case PACKET3_SET_CONFIG_REG:
4587 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4588 		end_reg = 4 * pkt->count + start_reg - 4;
4589 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4590 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4591 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4592 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4593 			return -EINVAL;
4594 		}
4595 		for (i = 0; i < pkt->count; i++) {
4596 			reg = start_reg + (4 * i);
4597 			if (!si_vm_reg_valid(reg))
4598 				return -EINVAL;
4599 		}
4600 		break;
4601 	case PACKET3_CP_DMA:
4602 		r = si_vm_packet3_cp_dma_check(ib, idx);
4603 		if (r)
4604 			return r;
4605 		break;
4606 	default:
4607 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4608 		return -EINVAL;
4609 	}
4610 	return 0;
4611 }
4612 
4613 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4614 				       u32 *ib, struct radeon_cs_packet *pkt)
4615 {
4616 	int r;
4617 	u32 idx = pkt->idx + 1;
4618 	u32 idx_value = ib[idx];
4619 	u32 start_reg, reg, i;
4620 
4621 	switch (pkt->opcode) {
4622 	case PACKET3_NOP:
4623 	case PACKET3_SET_BASE:
4624 	case PACKET3_CLEAR_STATE:
4625 	case PACKET3_DISPATCH_DIRECT:
4626 	case PACKET3_DISPATCH_INDIRECT:
4627 	case PACKET3_ALLOC_GDS:
4628 	case PACKET3_WRITE_GDS_RAM:
4629 	case PACKET3_ATOMIC_GDS:
4630 	case PACKET3_ATOMIC:
4631 	case PACKET3_OCCLUSION_QUERY:
4632 	case PACKET3_SET_PREDICATION:
4633 	case PACKET3_COND_EXEC:
4634 	case PACKET3_PRED_EXEC:
4635 	case PACKET3_CONTEXT_CONTROL:
4636 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4637 	case PACKET3_WAIT_REG_MEM:
4638 	case PACKET3_MEM_WRITE:
4639 	case PACKET3_PFP_SYNC_ME:
4640 	case PACKET3_SURFACE_SYNC:
4641 	case PACKET3_EVENT_WRITE:
4642 	case PACKET3_EVENT_WRITE_EOP:
4643 	case PACKET3_EVENT_WRITE_EOS:
4644 	case PACKET3_SET_CONTEXT_REG:
4645 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4646 	case PACKET3_SET_SH_REG:
4647 	case PACKET3_SET_SH_REG_OFFSET:
4648 	case PACKET3_INCREMENT_DE_COUNTER:
4649 	case PACKET3_WAIT_ON_CE_COUNTER:
4650 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4651 	case PACKET3_ME_WRITE:
4652 		break;
4653 	case PACKET3_COPY_DATA:
4654 		if ((idx_value & 0xf00) == 0) {
4655 			reg = ib[idx + 3] * 4;
4656 			if (!si_vm_reg_valid(reg))
4657 				return -EINVAL;
4658 		}
4659 		break;
4660 	case PACKET3_WRITE_DATA:
4661 		if ((idx_value & 0xf00) == 0) {
4662 			start_reg = ib[idx + 1] * 4;
4663 			if (idx_value & 0x10000) {
4664 				if (!si_vm_reg_valid(start_reg))
4665 					return -EINVAL;
4666 			} else {
4667 				for (i = 0; i < (pkt->count - 2); i++) {
4668 					reg = start_reg + (4 * i);
4669 					if (!si_vm_reg_valid(reg))
4670 						return -EINVAL;
4671 				}
4672 			}
4673 		}
4674 		break;
4675 	case PACKET3_COND_WRITE:
4676 		if (idx_value & 0x100) {
4677 			reg = ib[idx + 5] * 4;
4678 			if (!si_vm_reg_valid(reg))
4679 				return -EINVAL;
4680 		}
4681 		break;
4682 	case PACKET3_COPY_DW:
4683 		if (idx_value & 0x2) {
4684 			reg = ib[idx + 3] * 4;
4685 			if (!si_vm_reg_valid(reg))
4686 				return -EINVAL;
4687 		}
4688 		break;
4689 	case PACKET3_CP_DMA:
4690 		r = si_vm_packet3_cp_dma_check(ib, idx);
4691 		if (r)
4692 			return r;
4693 		break;
4694 	default:
4695 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4696 		return -EINVAL;
4697 	}
4698 	return 0;
4699 }
4700 
4701 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4702 {
4703 	int ret = 0;
4704 	u32 idx = 0, i;
4705 	struct radeon_cs_packet pkt;
4706 
4707 	do {
4708 		pkt.idx = idx;
4709 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4710 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4711 		pkt.one_reg_wr = 0;
4712 		switch (pkt.type) {
4713 		case RADEON_PACKET_TYPE0:
4714 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4715 			ret = -EINVAL;
4716 			break;
4717 		case RADEON_PACKET_TYPE2:
4718 			idx += 1;
4719 			break;
4720 		case RADEON_PACKET_TYPE3:
4721 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4722 			if (ib->is_const_ib)
4723 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4724 			else {
4725 				switch (ib->ring) {
4726 				case RADEON_RING_TYPE_GFX_INDEX:
4727 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4728 					break;
4729 				case CAYMAN_RING_TYPE_CP1_INDEX:
4730 				case CAYMAN_RING_TYPE_CP2_INDEX:
4731 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4732 					break;
4733 				default:
4734 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4735 					ret = -EINVAL;
4736 					break;
4737 				}
4738 			}
4739 			idx += pkt.count + 2;
4740 			break;
4741 		default:
4742 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4743 			ret = -EINVAL;
4744 			break;
4745 		}
4746 		if (ret) {
4747 			for (i = 0; i < ib->length_dw; i++) {
4748 				if (i == idx)
4749 					printk("\t0x%08x <---\n", ib->ptr[i]);
4750 				else
4751 					printk("\t0x%08x\n", ib->ptr[i]);
4752 			}
4753  			break;
4754 		}
4755 	} while (idx < ib->length_dw);
4756 
4757 	return ret;
4758 }
4759 
4760 /*
4761  * vm
4762  */
4763 int si_vm_init(struct radeon_device *rdev)
4764 {
4765 	/* number of VMs */
4766 	rdev->vm_manager.nvm = 16;
4767 	/* base offset of vram pages */
4768 	rdev->vm_manager.vram_base_offset = 0;
4769 
4770 	return 0;
4771 }
4772 
4773 void si_vm_fini(struct radeon_device *rdev)
4774 {
4775 }
4776 
4777 /**
4778  * si_vm_decode_fault - print human readable fault info
4779  *
4780  * @rdev: radeon_device pointer
4781  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4782  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4783  *
4784  * Print human readable fault information (SI).
4785  */
4786 static void si_vm_decode_fault(struct radeon_device *rdev,
4787 			       u32 status, u32 addr)
4788 {
4789 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4790 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4791 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4792 	char *block;
4793 
4794 	if (rdev->family == CHIP_TAHITI) {
4795 		switch (mc_id) {
4796 		case 160:
4797 		case 144:
4798 		case 96:
4799 		case 80:
4800 		case 224:
4801 		case 208:
4802 		case 32:
4803 		case 16:
4804 			block = "CB";
4805 			break;
4806 		case 161:
4807 		case 145:
4808 		case 97:
4809 		case 81:
4810 		case 225:
4811 		case 209:
4812 		case 33:
4813 		case 17:
4814 			block = "CB_FMASK";
4815 			break;
4816 		case 162:
4817 		case 146:
4818 		case 98:
4819 		case 82:
4820 		case 226:
4821 		case 210:
4822 		case 34:
4823 		case 18:
4824 			block = "CB_CMASK";
4825 			break;
4826 		case 163:
4827 		case 147:
4828 		case 99:
4829 		case 83:
4830 		case 227:
4831 		case 211:
4832 		case 35:
4833 		case 19:
4834 			block = "CB_IMMED";
4835 			break;
4836 		case 164:
4837 		case 148:
4838 		case 100:
4839 		case 84:
4840 		case 228:
4841 		case 212:
4842 		case 36:
4843 		case 20:
4844 			block = "DB";
4845 			break;
4846 		case 165:
4847 		case 149:
4848 		case 101:
4849 		case 85:
4850 		case 229:
4851 		case 213:
4852 		case 37:
4853 		case 21:
4854 			block = "DB_HTILE";
4855 			break;
4856 		case 167:
4857 		case 151:
4858 		case 103:
4859 		case 87:
4860 		case 231:
4861 		case 215:
4862 		case 39:
4863 		case 23:
4864 			block = "DB_STEN";
4865 			break;
4866 		case 72:
4867 		case 68:
4868 		case 64:
4869 		case 8:
4870 		case 4:
4871 		case 0:
4872 		case 136:
4873 		case 132:
4874 		case 128:
4875 		case 200:
4876 		case 196:
4877 		case 192:
4878 			block = "TC";
4879 			break;
4880 		case 112:
4881 		case 48:
4882 			block = "CP";
4883 			break;
4884 		case 49:
4885 		case 177:
4886 		case 50:
4887 		case 178:
4888 			block = "SH";
4889 			break;
4890 		case 53:
4891 		case 190:
4892 			block = "VGT";
4893 			break;
4894 		case 117:
4895 			block = "IH";
4896 			break;
4897 		case 51:
4898 		case 115:
4899 			block = "RLC";
4900 			break;
4901 		case 119:
4902 		case 183:
4903 			block = "DMA0";
4904 			break;
4905 		case 61:
4906 			block = "DMA1";
4907 			break;
4908 		case 248:
4909 		case 120:
4910 			block = "HDP";
4911 			break;
4912 		default:
4913 			block = "unknown";
4914 			break;
4915 		}
4916 	} else {
4917 		switch (mc_id) {
4918 		case 32:
4919 		case 16:
4920 		case 96:
4921 		case 80:
4922 		case 160:
4923 		case 144:
4924 		case 224:
4925 		case 208:
4926 			block = "CB";
4927 			break;
4928 		case 33:
4929 		case 17:
4930 		case 97:
4931 		case 81:
4932 		case 161:
4933 		case 145:
4934 		case 225:
4935 		case 209:
4936 			block = "CB_FMASK";
4937 			break;
4938 		case 34:
4939 		case 18:
4940 		case 98:
4941 		case 82:
4942 		case 162:
4943 		case 146:
4944 		case 226:
4945 		case 210:
4946 			block = "CB_CMASK";
4947 			break;
4948 		case 35:
4949 		case 19:
4950 		case 99:
4951 		case 83:
4952 		case 163:
4953 		case 147:
4954 		case 227:
4955 		case 211:
4956 			block = "CB_IMMED";
4957 			break;
4958 		case 36:
4959 		case 20:
4960 		case 100:
4961 		case 84:
4962 		case 164:
4963 		case 148:
4964 		case 228:
4965 		case 212:
4966 			block = "DB";
4967 			break;
4968 		case 37:
4969 		case 21:
4970 		case 101:
4971 		case 85:
4972 		case 165:
4973 		case 149:
4974 		case 229:
4975 		case 213:
4976 			block = "DB_HTILE";
4977 			break;
4978 		case 39:
4979 		case 23:
4980 		case 103:
4981 		case 87:
4982 		case 167:
4983 		case 151:
4984 		case 231:
4985 		case 215:
4986 			block = "DB_STEN";
4987 			break;
4988 		case 72:
4989 		case 68:
4990 		case 8:
4991 		case 4:
4992 		case 136:
4993 		case 132:
4994 		case 200:
4995 		case 196:
4996 			block = "TC";
4997 			break;
4998 		case 112:
4999 		case 48:
5000 			block = "CP";
5001 			break;
5002 		case 49:
5003 		case 177:
5004 		case 50:
5005 		case 178:
5006 			block = "SH";
5007 			break;
5008 		case 53:
5009 			block = "VGT";
5010 			break;
5011 		case 117:
5012 			block = "IH";
5013 			break;
5014 		case 51:
5015 		case 115:
5016 			block = "RLC";
5017 			break;
5018 		case 119:
5019 		case 183:
5020 			block = "DMA0";
5021 			break;
5022 		case 61:
5023 			block = "DMA1";
5024 			break;
5025 		case 248:
5026 		case 120:
5027 			block = "HDP";
5028 			break;
5029 		default:
5030 			block = "unknown";
5031 			break;
5032 		}
5033 	}
5034 
5035 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5036 	       protections, vmid, addr,
5037 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5038 	       block, mc_id);
5039 }
5040 
5041 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5042 		 unsigned vm_id, uint64_t pd_addr)
5043 {
5044 	/* write new base address */
5045 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5046 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5047 				 WRITE_DATA_DST_SEL(0)));
5048 
5049 	if (vm_id < 8) {
5050 		radeon_ring_write(ring,
5051 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5052 	} else {
5053 		radeon_ring_write(ring,
5054 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5055 	}
5056 	radeon_ring_write(ring, 0);
5057 	radeon_ring_write(ring, pd_addr >> 12);
5058 
5059 	/* flush hdp cache */
5060 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5061 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5062 				 WRITE_DATA_DST_SEL(0)));
5063 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5064 	radeon_ring_write(ring, 0);
5065 	radeon_ring_write(ring, 0x1);
5066 
5067 	/* bits 0-15 are the VM contexts0-15 */
5068 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5069 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5070 				 WRITE_DATA_DST_SEL(0)));
5071 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5072 	radeon_ring_write(ring, 0);
5073 	radeon_ring_write(ring, 1 << vm_id);
5074 
5075 	/* wait for the invalidate to complete */
5076 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5077 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5078 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5079 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5080 	radeon_ring_write(ring, 0);
5081 	radeon_ring_write(ring, 0); /* ref */
5082 	radeon_ring_write(ring, 0); /* mask */
5083 	radeon_ring_write(ring, 0x20); /* poll interval */
5084 
5085 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5086 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5087 	radeon_ring_write(ring, 0x0);
5088 }
5089 
5090 /*
5091  *  Power and clock gating
5092  */
5093 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5094 {
5095 	int i;
5096 
5097 	for (i = 0; i < rdev->usec_timeout; i++) {
5098 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5099 			break;
5100 		udelay(1);
5101 	}
5102 
5103 	for (i = 0; i < rdev->usec_timeout; i++) {
5104 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5105 			break;
5106 		udelay(1);
5107 	}
5108 }
5109 
5110 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5111 					 bool enable)
5112 {
5113 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5114 	u32 mask;
5115 	int i;
5116 
5117 	if (enable)
5118 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5119 	else
5120 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5121 	WREG32(CP_INT_CNTL_RING0, tmp);
5122 
5123 	if (!enable) {
5124 		/* read a gfx register */
5125 		tmp = RREG32(DB_DEPTH_INFO);
5126 
5127 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5128 		for (i = 0; i < rdev->usec_timeout; i++) {
5129 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5130 				break;
5131 			udelay(1);
5132 		}
5133 	}
5134 }
5135 
5136 static void si_set_uvd_dcm(struct radeon_device *rdev,
5137 			   bool sw_mode)
5138 {
5139 	u32 tmp, tmp2;
5140 
5141 	tmp = RREG32(UVD_CGC_CTRL);
5142 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5143 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5144 
5145 	if (sw_mode) {
5146 		tmp &= ~0x7ffff800;
5147 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5148 	} else {
5149 		tmp |= 0x7ffff800;
5150 		tmp2 = 0;
5151 	}
5152 
5153 	WREG32(UVD_CGC_CTRL, tmp);
5154 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5155 }
5156 
5157 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5158 {
5159 	bool hw_mode = true;
5160 
5161 	if (hw_mode) {
5162 		si_set_uvd_dcm(rdev, false);
5163 	} else {
5164 		u32 tmp = RREG32(UVD_CGC_CTRL);
5165 		tmp &= ~DCM;
5166 		WREG32(UVD_CGC_CTRL, tmp);
5167 	}
5168 }
5169 
5170 static u32 si_halt_rlc(struct radeon_device *rdev)
5171 {
5172 	u32 data, orig;
5173 
5174 	orig = data = RREG32(RLC_CNTL);
5175 
5176 	if (data & RLC_ENABLE) {
5177 		data &= ~RLC_ENABLE;
5178 		WREG32(RLC_CNTL, data);
5179 
5180 		si_wait_for_rlc_serdes(rdev);
5181 	}
5182 
5183 	return orig;
5184 }
5185 
5186 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5187 {
5188 	u32 tmp;
5189 
5190 	tmp = RREG32(RLC_CNTL);
5191 	if (tmp != rlc)
5192 		WREG32(RLC_CNTL, rlc);
5193 }
5194 
5195 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5196 {
5197 	u32 data, orig;
5198 
5199 	orig = data = RREG32(DMA_PG);
5200 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5201 		data |= PG_CNTL_ENABLE;
5202 	else
5203 		data &= ~PG_CNTL_ENABLE;
5204 	if (orig != data)
5205 		WREG32(DMA_PG, data);
5206 }
5207 
5208 static void si_init_dma_pg(struct radeon_device *rdev)
5209 {
5210 	u32 tmp;
5211 
5212 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5213 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5214 
5215 	for (tmp = 0; tmp < 5; tmp++)
5216 		WREG32(DMA_PGFSM_WRITE, 0);
5217 }
5218 
5219 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5220 			       bool enable)
5221 {
5222 	u32 tmp;
5223 
5224 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5225 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5226 		WREG32(RLC_TTOP_D, tmp);
5227 
5228 		tmp = RREG32(RLC_PG_CNTL);
5229 		tmp |= GFX_PG_ENABLE;
5230 		WREG32(RLC_PG_CNTL, tmp);
5231 
5232 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5233 		tmp |= AUTO_PG_EN;
5234 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5235 	} else {
5236 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5237 		tmp &= ~AUTO_PG_EN;
5238 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5239 
5240 		tmp = RREG32(DB_RENDER_CONTROL);
5241 	}
5242 }
5243 
5244 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5245 {
5246 	u32 tmp;
5247 
5248 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5249 
5250 	tmp = RREG32(RLC_PG_CNTL);
5251 	tmp |= GFX_PG_SRC;
5252 	WREG32(RLC_PG_CNTL, tmp);
5253 
5254 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5255 
5256 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5257 
5258 	tmp &= ~GRBM_REG_SGIT_MASK;
5259 	tmp |= GRBM_REG_SGIT(0x700);
5260 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5261 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5262 }
5263 
5264 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5265 {
5266 	u32 mask = 0, tmp, tmp1;
5267 	int i;
5268 
5269 	si_select_se_sh(rdev, se, sh);
5270 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5271 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5272 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5273 
5274 	tmp &= 0xffff0000;
5275 
5276 	tmp |= tmp1;
5277 	tmp >>= 16;
5278 
5279 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5280 		mask <<= 1;
5281 		mask |= 1;
5282 	}
5283 
5284 	return (~tmp) & mask;
5285 }
5286 
5287 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5288 {
5289 	u32 i, j, k, active_cu_number = 0;
5290 	u32 mask, counter, cu_bitmap;
5291 	u32 tmp = 0;
5292 
5293 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5294 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5295 			mask = 1;
5296 			cu_bitmap = 0;
5297 			counter  = 0;
5298 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5299 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5300 					if (counter < 2)
5301 						cu_bitmap |= mask;
5302 					counter++;
5303 				}
5304 				mask <<= 1;
5305 			}
5306 
5307 			active_cu_number += counter;
5308 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5309 		}
5310 	}
5311 
5312 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5313 
5314 	tmp = RREG32(RLC_MAX_PG_CU);
5315 	tmp &= ~MAX_PU_CU_MASK;
5316 	tmp |= MAX_PU_CU(active_cu_number);
5317 	WREG32(RLC_MAX_PG_CU, tmp);
5318 }
5319 
5320 static void si_enable_cgcg(struct radeon_device *rdev,
5321 			   bool enable)
5322 {
5323 	u32 data, orig, tmp;
5324 
5325 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5326 
5327 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5328 		si_enable_gui_idle_interrupt(rdev, true);
5329 
5330 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5331 
5332 		tmp = si_halt_rlc(rdev);
5333 
5334 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5335 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5336 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5337 
5338 		si_wait_for_rlc_serdes(rdev);
5339 
5340 		si_update_rlc(rdev, tmp);
5341 
5342 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5343 
5344 		data |= CGCG_EN | CGLS_EN;
5345 	} else {
5346 		si_enable_gui_idle_interrupt(rdev, false);
5347 
5348 		RREG32(CB_CGTT_SCLK_CTRL);
5349 		RREG32(CB_CGTT_SCLK_CTRL);
5350 		RREG32(CB_CGTT_SCLK_CTRL);
5351 		RREG32(CB_CGTT_SCLK_CTRL);
5352 
5353 		data &= ~(CGCG_EN | CGLS_EN);
5354 	}
5355 
5356 	if (orig != data)
5357 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5358 }
5359 
5360 static void si_enable_mgcg(struct radeon_device *rdev,
5361 			   bool enable)
5362 {
5363 	u32 data, orig, tmp = 0;
5364 
5365 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5366 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5367 		data = 0x96940200;
5368 		if (orig != data)
5369 			WREG32(CGTS_SM_CTRL_REG, data);
5370 
5371 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5372 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5373 			data |= CP_MEM_LS_EN;
5374 			if (orig != data)
5375 				WREG32(CP_MEM_SLP_CNTL, data);
5376 		}
5377 
5378 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5379 		data &= 0xffffffc0;
5380 		if (orig != data)
5381 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5382 
5383 		tmp = si_halt_rlc(rdev);
5384 
5385 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5386 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5387 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5388 
5389 		si_update_rlc(rdev, tmp);
5390 	} else {
5391 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5392 		data |= 0x00000003;
5393 		if (orig != data)
5394 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5395 
5396 		data = RREG32(CP_MEM_SLP_CNTL);
5397 		if (data & CP_MEM_LS_EN) {
5398 			data &= ~CP_MEM_LS_EN;
5399 			WREG32(CP_MEM_SLP_CNTL, data);
5400 		}
5401 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5402 		data |= LS_OVERRIDE | OVERRIDE;
5403 		if (orig != data)
5404 			WREG32(CGTS_SM_CTRL_REG, data);
5405 
5406 		tmp = si_halt_rlc(rdev);
5407 
5408 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5409 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5410 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5411 
5412 		si_update_rlc(rdev, tmp);
5413 	}
5414 }
5415 
5416 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5417 			       bool enable)
5418 {
5419 	u32 orig, data, tmp;
5420 
5421 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5422 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5423 		tmp |= 0x3fff;
5424 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5425 
5426 		orig = data = RREG32(UVD_CGC_CTRL);
5427 		data |= DCM;
5428 		if (orig != data)
5429 			WREG32(UVD_CGC_CTRL, data);
5430 
5431 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5432 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5433 	} else {
5434 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5435 		tmp &= ~0x3fff;
5436 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5437 
5438 		orig = data = RREG32(UVD_CGC_CTRL);
5439 		data &= ~DCM;
5440 		if (orig != data)
5441 			WREG32(UVD_CGC_CTRL, data);
5442 
5443 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5444 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5445 	}
5446 }
5447 
5448 static const u32 mc_cg_registers[] =
5449 {
5450 	MC_HUB_MISC_HUB_CG,
5451 	MC_HUB_MISC_SIP_CG,
5452 	MC_HUB_MISC_VM_CG,
5453 	MC_XPB_CLK_GAT,
5454 	ATC_MISC_CG,
5455 	MC_CITF_MISC_WR_CG,
5456 	MC_CITF_MISC_RD_CG,
5457 	MC_CITF_MISC_VM_CG,
5458 	VM_L2_CG,
5459 };
5460 
5461 static void si_enable_mc_ls(struct radeon_device *rdev,
5462 			    bool enable)
5463 {
5464 	int i;
5465 	u32 orig, data;
5466 
5467 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5468 		orig = data = RREG32(mc_cg_registers[i]);
5469 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5470 			data |= MC_LS_ENABLE;
5471 		else
5472 			data &= ~MC_LS_ENABLE;
5473 		if (data != orig)
5474 			WREG32(mc_cg_registers[i], data);
5475 	}
5476 }
5477 
5478 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5479 			       bool enable)
5480 {
5481 	int i;
5482 	u32 orig, data;
5483 
5484 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5485 		orig = data = RREG32(mc_cg_registers[i]);
5486 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5487 			data |= MC_CG_ENABLE;
5488 		else
5489 			data &= ~MC_CG_ENABLE;
5490 		if (data != orig)
5491 			WREG32(mc_cg_registers[i], data);
5492 	}
5493 }
5494 
5495 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5496 			       bool enable)
5497 {
5498 	u32 orig, data, offset;
5499 	int i;
5500 
5501 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5502 		for (i = 0; i < 2; i++) {
5503 			if (i == 0)
5504 				offset = DMA0_REGISTER_OFFSET;
5505 			else
5506 				offset = DMA1_REGISTER_OFFSET;
5507 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5508 			data &= ~MEM_POWER_OVERRIDE;
5509 			if (data != orig)
5510 				WREG32(DMA_POWER_CNTL + offset, data);
5511 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5512 		}
5513 	} else {
5514 		for (i = 0; i < 2; i++) {
5515 			if (i == 0)
5516 				offset = DMA0_REGISTER_OFFSET;
5517 			else
5518 				offset = DMA1_REGISTER_OFFSET;
5519 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5520 			data |= MEM_POWER_OVERRIDE;
5521 			if (data != orig)
5522 				WREG32(DMA_POWER_CNTL + offset, data);
5523 
5524 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5525 			data = 0xff000000;
5526 			if (data != orig)
5527 				WREG32(DMA_CLK_CTRL + offset, data);
5528 		}
5529 	}
5530 }
5531 
5532 static void si_enable_bif_mgls(struct radeon_device *rdev,
5533 			       bool enable)
5534 {
5535 	u32 orig, data;
5536 
5537 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5538 
5539 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5540 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5541 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5542 	else
5543 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5544 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5545 
5546 	if (orig != data)
5547 		WREG32_PCIE(PCIE_CNTL2, data);
5548 }
5549 
5550 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5551 			       bool enable)
5552 {
5553 	u32 orig, data;
5554 
5555 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5556 
5557 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5558 		data &= ~CLOCK_GATING_DIS;
5559 	else
5560 		data |= CLOCK_GATING_DIS;
5561 
5562 	if (orig != data)
5563 		WREG32(HDP_HOST_PATH_CNTL, data);
5564 }
5565 
5566 static void si_enable_hdp_ls(struct radeon_device *rdev,
5567 			     bool enable)
5568 {
5569 	u32 orig, data;
5570 
5571 	orig = data = RREG32(HDP_MEM_POWER_LS);
5572 
5573 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5574 		data |= HDP_LS_ENABLE;
5575 	else
5576 		data &= ~HDP_LS_ENABLE;
5577 
5578 	if (orig != data)
5579 		WREG32(HDP_MEM_POWER_LS, data);
5580 }
5581 
5582 static void si_update_cg(struct radeon_device *rdev,
5583 			 u32 block, bool enable)
5584 {
5585 	if (block & RADEON_CG_BLOCK_GFX) {
5586 		si_enable_gui_idle_interrupt(rdev, false);
5587 		/* order matters! */
5588 		if (enable) {
5589 			si_enable_mgcg(rdev, true);
5590 			si_enable_cgcg(rdev, true);
5591 		} else {
5592 			si_enable_cgcg(rdev, false);
5593 			si_enable_mgcg(rdev, false);
5594 		}
5595 		si_enable_gui_idle_interrupt(rdev, true);
5596 	}
5597 
5598 	if (block & RADEON_CG_BLOCK_MC) {
5599 		si_enable_mc_mgcg(rdev, enable);
5600 		si_enable_mc_ls(rdev, enable);
5601 	}
5602 
5603 	if (block & RADEON_CG_BLOCK_SDMA) {
5604 		si_enable_dma_mgcg(rdev, enable);
5605 	}
5606 
5607 	if (block & RADEON_CG_BLOCK_BIF) {
5608 		si_enable_bif_mgls(rdev, enable);
5609 	}
5610 
5611 	if (block & RADEON_CG_BLOCK_UVD) {
5612 		if (rdev->has_uvd) {
5613 			si_enable_uvd_mgcg(rdev, enable);
5614 		}
5615 	}
5616 
5617 	if (block & RADEON_CG_BLOCK_HDP) {
5618 		si_enable_hdp_mgcg(rdev, enable);
5619 		si_enable_hdp_ls(rdev, enable);
5620 	}
5621 }
5622 
5623 static void si_init_cg(struct radeon_device *rdev)
5624 {
5625 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5626 			    RADEON_CG_BLOCK_MC |
5627 			    RADEON_CG_BLOCK_SDMA |
5628 			    RADEON_CG_BLOCK_BIF |
5629 			    RADEON_CG_BLOCK_HDP), true);
5630 	if (rdev->has_uvd) {
5631 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5632 		si_init_uvd_internal_cg(rdev);
5633 	}
5634 }
5635 
5636 static void si_fini_cg(struct radeon_device *rdev)
5637 {
5638 	if (rdev->has_uvd) {
5639 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5640 	}
5641 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5642 			    RADEON_CG_BLOCK_MC |
5643 			    RADEON_CG_BLOCK_SDMA |
5644 			    RADEON_CG_BLOCK_BIF |
5645 			    RADEON_CG_BLOCK_HDP), false);
5646 }
5647 
5648 u32 si_get_csb_size(struct radeon_device *rdev)
5649 {
5650 	u32 count = 0;
5651 	const struct cs_section_def *sect = NULL;
5652 	const struct cs_extent_def *ext = NULL;
5653 
5654 	if (rdev->rlc.cs_data == NULL)
5655 		return 0;
5656 
5657 	/* begin clear state */
5658 	count += 2;
5659 	/* context control state */
5660 	count += 3;
5661 
5662 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5663 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5664 			if (sect->id == SECT_CONTEXT)
5665 				count += 2 + ext->reg_count;
5666 			else
5667 				return 0;
5668 		}
5669 	}
5670 	/* pa_sc_raster_config */
5671 	count += 3;
5672 	/* end clear state */
5673 	count += 2;
5674 	/* clear state */
5675 	count += 2;
5676 
5677 	return count;
5678 }
5679 
5680 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5681 {
5682 	u32 count = 0, i;
5683 	const struct cs_section_def *sect = NULL;
5684 	const struct cs_extent_def *ext = NULL;
5685 
5686 	if (rdev->rlc.cs_data == NULL)
5687 		return;
5688 	if (buffer == NULL)
5689 		return;
5690 
5691 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5692 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5693 
5694 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5695 	buffer[count++] = cpu_to_le32(0x80000000);
5696 	buffer[count++] = cpu_to_le32(0x80000000);
5697 
5698 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5699 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5700 			if (sect->id == SECT_CONTEXT) {
5701 				buffer[count++] =
5702 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5703 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5704 				for (i = 0; i < ext->reg_count; i++)
5705 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5706 			} else {
5707 				return;
5708 			}
5709 		}
5710 	}
5711 
5712 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5713 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5714 	switch (rdev->family) {
5715 	case CHIP_TAHITI:
5716 	case CHIP_PITCAIRN:
5717 		buffer[count++] = cpu_to_le32(0x2a00126a);
5718 		break;
5719 	case CHIP_VERDE:
5720 		buffer[count++] = cpu_to_le32(0x0000124a);
5721 		break;
5722 	case CHIP_OLAND:
5723 		buffer[count++] = cpu_to_le32(0x00000082);
5724 		break;
5725 	case CHIP_HAINAN:
5726 		buffer[count++] = cpu_to_le32(0x00000000);
5727 		break;
5728 	default:
5729 		buffer[count++] = cpu_to_le32(0x00000000);
5730 		break;
5731 	}
5732 
5733 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5734 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5735 
5736 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5737 	buffer[count++] = cpu_to_le32(0);
5738 }
5739 
5740 static void si_init_pg(struct radeon_device *rdev)
5741 {
5742 	if (rdev->pg_flags) {
5743 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5744 			si_init_dma_pg(rdev);
5745 		}
5746 		si_init_ao_cu_mask(rdev);
5747 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5748 			si_init_gfx_cgpg(rdev);
5749 		} else {
5750 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5751 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5752 		}
5753 		si_enable_dma_pg(rdev, true);
5754 		si_enable_gfx_cgpg(rdev, true);
5755 	} else {
5756 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5757 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5758 	}
5759 }
5760 
5761 static void si_fini_pg(struct radeon_device *rdev)
5762 {
5763 	if (rdev->pg_flags) {
5764 		si_enable_dma_pg(rdev, false);
5765 		si_enable_gfx_cgpg(rdev, false);
5766 	}
5767 }
5768 
5769 /*
5770  * RLC
5771  */
5772 void si_rlc_reset(struct radeon_device *rdev)
5773 {
5774 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5775 
5776 	tmp |= SOFT_RESET_RLC;
5777 	WREG32(GRBM_SOFT_RESET, tmp);
5778 	udelay(50);
5779 	tmp &= ~SOFT_RESET_RLC;
5780 	WREG32(GRBM_SOFT_RESET, tmp);
5781 	udelay(50);
5782 }
5783 
5784 static void si_rlc_stop(struct radeon_device *rdev)
5785 {
5786 	WREG32(RLC_CNTL, 0);
5787 
5788 	si_enable_gui_idle_interrupt(rdev, false);
5789 
5790 	si_wait_for_rlc_serdes(rdev);
5791 }
5792 
5793 static void si_rlc_start(struct radeon_device *rdev)
5794 {
5795 	WREG32(RLC_CNTL, RLC_ENABLE);
5796 
5797 	si_enable_gui_idle_interrupt(rdev, true);
5798 
5799 	udelay(50);
5800 }
5801 
5802 static bool si_lbpw_supported(struct radeon_device *rdev)
5803 {
5804 	u32 tmp;
5805 
5806 	/* Enable LBPW only for DDR3 */
5807 	tmp = RREG32(MC_SEQ_MISC0);
5808 	if ((tmp & 0xF0000000) == 0xB0000000)
5809 		return true;
5810 	return false;
5811 }
5812 
5813 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5814 {
5815 	u32 tmp;
5816 
5817 	tmp = RREG32(RLC_LB_CNTL);
5818 	if (enable)
5819 		tmp |= LOAD_BALANCE_ENABLE;
5820 	else
5821 		tmp &= ~LOAD_BALANCE_ENABLE;
5822 	WREG32(RLC_LB_CNTL, tmp);
5823 
5824 	if (!enable) {
5825 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5826 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5827 	}
5828 }
5829 
5830 static int si_rlc_resume(struct radeon_device *rdev)
5831 {
5832 	u32 i;
5833 
5834 	if (!rdev->rlc_fw)
5835 		return -EINVAL;
5836 
5837 	si_rlc_stop(rdev);
5838 
5839 	si_rlc_reset(rdev);
5840 
5841 	si_init_pg(rdev);
5842 
5843 	si_init_cg(rdev);
5844 
5845 	WREG32(RLC_RL_BASE, 0);
5846 	WREG32(RLC_RL_SIZE, 0);
5847 	WREG32(RLC_LB_CNTL, 0);
5848 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5849 	WREG32(RLC_LB_CNTR_INIT, 0);
5850 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5851 
5852 	WREG32(RLC_MC_CNTL, 0);
5853 	WREG32(RLC_UCODE_CNTL, 0);
5854 
5855 	if (rdev->new_fw) {
5856 		const struct rlc_firmware_header_v1_0 *hdr =
5857 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5858 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5859 		const __le32 *fw_data = (const __le32 *)
5860 			((const char *)rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5861 
5862 		radeon_ucode_print_rlc_hdr(&hdr->header);
5863 
5864 		for (i = 0; i < fw_size; i++) {
5865 			WREG32(RLC_UCODE_ADDR, i);
5866 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5867 		}
5868 	} else {
5869 		const __be32 *fw_data =
5870 			(const __be32 *)rdev->rlc_fw->data;
5871 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5872 			WREG32(RLC_UCODE_ADDR, i);
5873 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5874 		}
5875 	}
5876 	WREG32(RLC_UCODE_ADDR, 0);
5877 
5878 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5879 
5880 	si_rlc_start(rdev);
5881 
5882 	return 0;
5883 }
5884 
5885 static void si_enable_interrupts(struct radeon_device *rdev)
5886 {
5887 	u32 ih_cntl = RREG32(IH_CNTL);
5888 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5889 
5890 	ih_cntl |= ENABLE_INTR;
5891 	ih_rb_cntl |= IH_RB_ENABLE;
5892 	WREG32(IH_CNTL, ih_cntl);
5893 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5894 	rdev->ih.enabled = true;
5895 }
5896 
5897 static void si_disable_interrupts(struct radeon_device *rdev)
5898 {
5899 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5900 	u32 ih_cntl = RREG32(IH_CNTL);
5901 
5902 	ih_rb_cntl &= ~IH_RB_ENABLE;
5903 	ih_cntl &= ~ENABLE_INTR;
5904 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5905 	WREG32(IH_CNTL, ih_cntl);
5906 	/* set rptr, wptr to 0 */
5907 	WREG32(IH_RB_RPTR, 0);
5908 	WREG32(IH_RB_WPTR, 0);
5909 	rdev->ih.enabled = false;
5910 	rdev->ih.rptr = 0;
5911 }
5912 
5913 static void si_disable_interrupt_state(struct radeon_device *rdev)
5914 {
5915 	u32 tmp;
5916 
5917 	tmp = RREG32(CP_INT_CNTL_RING0) &
5918 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5919 	WREG32(CP_INT_CNTL_RING0, tmp);
5920 	WREG32(CP_INT_CNTL_RING1, 0);
5921 	WREG32(CP_INT_CNTL_RING2, 0);
5922 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5923 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5924 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5925 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5926 	WREG32(GRBM_INT_CNTL, 0);
5927 	WREG32(SRBM_INT_CNTL, 0);
5928 	if (rdev->num_crtc >= 2) {
5929 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5930 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5931 	}
5932 	if (rdev->num_crtc >= 4) {
5933 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5934 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5935 	}
5936 	if (rdev->num_crtc >= 6) {
5937 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5938 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5939 	}
5940 
5941 	if (rdev->num_crtc >= 2) {
5942 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5943 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5944 	}
5945 	if (rdev->num_crtc >= 4) {
5946 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5947 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5948 	}
5949 	if (rdev->num_crtc >= 6) {
5950 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5951 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5952 	}
5953 
5954 	if (!ASIC_IS_NODCE(rdev)) {
5955 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5956 
5957 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5958 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5959 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5960 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5961 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5962 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5963 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5964 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5965 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5966 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5967 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5968 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5969 	}
5970 }
5971 
5972 static int si_irq_init(struct radeon_device *rdev)
5973 {
5974 	int ret = 0;
5975 	int rb_bufsz;
5976 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5977 
5978 	/* allocate ring */
5979 	ret = r600_ih_ring_alloc(rdev);
5980 	if (ret)
5981 		return ret;
5982 
5983 	/* disable irqs */
5984 	si_disable_interrupts(rdev);
5985 
5986 	/* init rlc */
5987 	ret = si_rlc_resume(rdev);
5988 	if (ret) {
5989 		r600_ih_ring_fini(rdev);
5990 		return ret;
5991 	}
5992 
5993 	/* setup interrupt control */
5994 	/* set dummy read address to ring address */
5995 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5996 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5997 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5998 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5999 	 */
6000 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6001 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6002 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6003 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6004 
6005 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6006 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6007 
6008 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6009 		      IH_WPTR_OVERFLOW_CLEAR |
6010 		      (rb_bufsz << 1));
6011 
6012 	if (rdev->wb.enabled)
6013 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6014 
6015 	/* set the writeback address whether it's enabled or not */
6016 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6017 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6018 
6019 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6020 
6021 	/* set rptr, wptr to 0 */
6022 	WREG32(IH_RB_RPTR, 0);
6023 	WREG32(IH_RB_WPTR, 0);
6024 
6025 	/* Default settings for IH_CNTL (disabled at first) */
6026 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6027 	/* RPTR_REARM only works if msi's are enabled */
6028 	if (rdev->msi_enabled)
6029 		ih_cntl |= RPTR_REARM;
6030 	WREG32(IH_CNTL, ih_cntl);
6031 
6032 	/* force the active interrupt state to all disabled */
6033 	si_disable_interrupt_state(rdev);
6034 
6035 	pci_enable_busmaster(rdev->dev->bsddev);
6036 
6037 	/* enable irqs */
6038 	si_enable_interrupts(rdev);
6039 
6040 	return ret;
6041 }
6042 
6043 int si_irq_set(struct radeon_device *rdev)
6044 {
6045 	u32 cp_int_cntl;
6046 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6047 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6048 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6049 	u32 grbm_int_cntl = 0;
6050 	u32 dma_cntl, dma_cntl1;
6051 	u32 thermal_int = 0;
6052 
6053 	if (!rdev->irq.installed) {
6054 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6055 		return -EINVAL;
6056 	}
6057 	/* don't enable anything if the ih is disabled */
6058 	if (!rdev->ih.enabled) {
6059 		si_disable_interrupts(rdev);
6060 		/* force the active interrupt state to all disabled */
6061 		si_disable_interrupt_state(rdev);
6062 		return 0;
6063 	}
6064 
6065 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6066 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6067 
6068 	if (!ASIC_IS_NODCE(rdev)) {
6069 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6070 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6071 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6072 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6073 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6074 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6075 	}
6076 
6077 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6078 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6079 
6080 	thermal_int = RREG32(CG_THERMAL_INT) &
6081 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6082 
6083 	/* enable CP interrupts on all rings */
6084 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6085 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6086 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6087 	}
6088 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6089 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6090 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6091 	}
6092 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6093 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6094 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6095 	}
6096 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6097 		DRM_DEBUG("si_irq_set: sw int dma\n");
6098 		dma_cntl |= TRAP_ENABLE;
6099 	}
6100 
6101 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6102 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6103 		dma_cntl1 |= TRAP_ENABLE;
6104 	}
6105 	if (rdev->irq.crtc_vblank_int[0] ||
6106 	    atomic_read(&rdev->irq.pflip[0])) {
6107 		DRM_DEBUG("si_irq_set: vblank 0\n");
6108 		crtc1 |= VBLANK_INT_MASK;
6109 	}
6110 	if (rdev->irq.crtc_vblank_int[1] ||
6111 	    atomic_read(&rdev->irq.pflip[1])) {
6112 		DRM_DEBUG("si_irq_set: vblank 1\n");
6113 		crtc2 |= VBLANK_INT_MASK;
6114 	}
6115 	if (rdev->irq.crtc_vblank_int[2] ||
6116 	    atomic_read(&rdev->irq.pflip[2])) {
6117 		DRM_DEBUG("si_irq_set: vblank 2\n");
6118 		crtc3 |= VBLANK_INT_MASK;
6119 	}
6120 	if (rdev->irq.crtc_vblank_int[3] ||
6121 	    atomic_read(&rdev->irq.pflip[3])) {
6122 		DRM_DEBUG("si_irq_set: vblank 3\n");
6123 		crtc4 |= VBLANK_INT_MASK;
6124 	}
6125 	if (rdev->irq.crtc_vblank_int[4] ||
6126 	    atomic_read(&rdev->irq.pflip[4])) {
6127 		DRM_DEBUG("si_irq_set: vblank 4\n");
6128 		crtc5 |= VBLANK_INT_MASK;
6129 	}
6130 	if (rdev->irq.crtc_vblank_int[5] ||
6131 	    atomic_read(&rdev->irq.pflip[5])) {
6132 		DRM_DEBUG("si_irq_set: vblank 5\n");
6133 		crtc6 |= VBLANK_INT_MASK;
6134 	}
6135 	if (rdev->irq.hpd[0]) {
6136 		DRM_DEBUG("si_irq_set: hpd 1\n");
6137 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6138 	}
6139 	if (rdev->irq.hpd[1]) {
6140 		DRM_DEBUG("si_irq_set: hpd 2\n");
6141 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6142 	}
6143 	if (rdev->irq.hpd[2]) {
6144 		DRM_DEBUG("si_irq_set: hpd 3\n");
6145 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6146 	}
6147 	if (rdev->irq.hpd[3]) {
6148 		DRM_DEBUG("si_irq_set: hpd 4\n");
6149 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6150 	}
6151 	if (rdev->irq.hpd[4]) {
6152 		DRM_DEBUG("si_irq_set: hpd 5\n");
6153 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6154 	}
6155 	if (rdev->irq.hpd[5]) {
6156 		DRM_DEBUG("si_irq_set: hpd 6\n");
6157 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6158 	}
6159 
6160 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6161 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6162 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6163 
6164 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6165 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6166 
6167 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6168 
6169 	if (rdev->irq.dpm_thermal) {
6170 		DRM_DEBUG("dpm thermal\n");
6171 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6172 	}
6173 
6174 	if (rdev->num_crtc >= 2) {
6175 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6176 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6177 	}
6178 	if (rdev->num_crtc >= 4) {
6179 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6180 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6181 	}
6182 	if (rdev->num_crtc >= 6) {
6183 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6184 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6185 	}
6186 
6187 	if (rdev->num_crtc >= 2) {
6188 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6189 		       GRPH_PFLIP_INT_MASK);
6190 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6191 		       GRPH_PFLIP_INT_MASK);
6192 	}
6193 	if (rdev->num_crtc >= 4) {
6194 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6195 		       GRPH_PFLIP_INT_MASK);
6196 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6197 		       GRPH_PFLIP_INT_MASK);
6198 	}
6199 	if (rdev->num_crtc >= 6) {
6200 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6201 		       GRPH_PFLIP_INT_MASK);
6202 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6203 		       GRPH_PFLIP_INT_MASK);
6204 	}
6205 
6206 	if (!ASIC_IS_NODCE(rdev)) {
6207 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6208 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6209 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6210 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6211 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6212 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6213 	}
6214 
6215 	WREG32(CG_THERMAL_INT, thermal_int);
6216 
6217 	/* posting read */
6218 	RREG32(SRBM_STATUS);
6219 
6220 	return 0;
6221 }
6222 
6223 static inline void si_irq_ack(struct radeon_device *rdev)
6224 {
6225 	u32 tmp;
6226 
6227 	if (ASIC_IS_NODCE(rdev))
6228 		return;
6229 
6230 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6231 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6232 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6233 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6234 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6235 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6236 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6237 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6238 	if (rdev->num_crtc >= 4) {
6239 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6240 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6241 	}
6242 	if (rdev->num_crtc >= 6) {
6243 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6244 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6245 	}
6246 
6247 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6248 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6249 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6250 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6251 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6252 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6253 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6254 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6255 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6256 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6257 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6258 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6259 
6260 	if (rdev->num_crtc >= 4) {
6261 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6262 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6263 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6264 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6265 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6266 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6267 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6268 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6269 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6270 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6271 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6272 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6273 	}
6274 
6275 	if (rdev->num_crtc >= 6) {
6276 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6277 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6278 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6279 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6280 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6281 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6282 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6283 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6284 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6285 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6286 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6287 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6288 	}
6289 
6290 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6291 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6292 		tmp |= DC_HPDx_INT_ACK;
6293 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6294 	}
6295 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6296 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6297 		tmp |= DC_HPDx_INT_ACK;
6298 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6299 	}
6300 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6301 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6302 		tmp |= DC_HPDx_INT_ACK;
6303 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6304 	}
6305 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6306 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6307 		tmp |= DC_HPDx_INT_ACK;
6308 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6309 	}
6310 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6311 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6312 		tmp |= DC_HPDx_INT_ACK;
6313 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6314 	}
6315 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6316 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6317 		tmp |= DC_HPDx_INT_ACK;
6318 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6319 	}
6320 
6321 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6322 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6323 		tmp |= DC_HPDx_RX_INT_ACK;
6324 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6325 	}
6326 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6327 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6328 		tmp |= DC_HPDx_RX_INT_ACK;
6329 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6330 	}
6331 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6332 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6333 		tmp |= DC_HPDx_RX_INT_ACK;
6334 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6335 	}
6336 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6337 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6338 		tmp |= DC_HPDx_RX_INT_ACK;
6339 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6340 	}
6341 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6342 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6343 		tmp |= DC_HPDx_RX_INT_ACK;
6344 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6345 	}
6346 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6347 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6348 		tmp |= DC_HPDx_RX_INT_ACK;
6349 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6350 	}
6351 }
6352 
6353 static void si_irq_disable(struct radeon_device *rdev)
6354 {
6355 	si_disable_interrupts(rdev);
6356 	/* Wait and acknowledge irq */
6357 	mdelay(1);
6358 	si_irq_ack(rdev);
6359 	si_disable_interrupt_state(rdev);
6360 }
6361 
6362 static void si_irq_suspend(struct radeon_device *rdev)
6363 {
6364 	si_irq_disable(rdev);
6365 	si_rlc_stop(rdev);
6366 }
6367 
6368 static void si_irq_fini(struct radeon_device *rdev)
6369 {
6370 	si_irq_suspend(rdev);
6371 	r600_ih_ring_fini(rdev);
6372 }
6373 
6374 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6375 {
6376 	u32 wptr, tmp;
6377 
6378 	if (rdev->wb.enabled)
6379 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6380 	else
6381 		wptr = RREG32(IH_RB_WPTR);
6382 
6383 	if (wptr & RB_OVERFLOW) {
6384 		wptr &= ~RB_OVERFLOW;
6385 		/* When a ring buffer overflow happen start parsing interrupt
6386 		 * from the last not overwritten vector (wptr + 16). Hopefully
6387 		 * this should allow us to catchup.
6388 		 */
6389 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6390 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6391 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6392 		tmp = RREG32(IH_RB_CNTL);
6393 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6394 		WREG32(IH_RB_CNTL, tmp);
6395 	}
6396 	return (wptr & rdev->ih.ptr_mask);
6397 }
6398 
6399 /*        SI IV Ring
6400  * Each IV ring entry is 128 bits:
6401  * [7:0]    - interrupt source id
6402  * [31:8]   - reserved
6403  * [59:32]  - interrupt source data
6404  * [63:60]  - reserved
6405  * [71:64]  - RINGID
6406  * [79:72]  - VMID
6407  * [127:80] - reserved
6408  */
6409 irqreturn_t si_irq_process(struct radeon_device *rdev)
6410 {
6411 	u32 wptr;
6412 	u32 rptr;
6413 	u32 src_id, src_data, ring_id;
6414 	u32 ring_index;
6415 	bool queue_hotplug = false;
6416 	bool queue_dp = false;
6417 	bool queue_thermal = false;
6418 	u32 status, addr;
6419 
6420 	if (!rdev->ih.enabled || rdev->shutdown)
6421 		return IRQ_NONE;
6422 
6423 	wptr = si_get_ih_wptr(rdev);
6424 
6425 restart_ih:
6426 	/* is somebody else already processing irqs? */
6427 	if (atomic_xchg(&rdev->ih.lock, 1))
6428 		return IRQ_NONE;
6429 
6430 	rptr = rdev->ih.rptr;
6431 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6432 
6433 	/* Order reading of wptr vs. reading of IH ring data */
6434 	rmb();
6435 
6436 	/* display interrupts */
6437 	si_irq_ack(rdev);
6438 
6439 	while (rptr != wptr) {
6440 		/* wptr/rptr are in bytes! */
6441 		ring_index = rptr / 4;
6442 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6443 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6444 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6445 
6446 		switch (src_id) {
6447 		case 1: /* D1 vblank/vline */
6448 			switch (src_data) {
6449 			case 0: /* D1 vblank */
6450 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6451 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6452 
6453 				if (rdev->irq.crtc_vblank_int[0]) {
6454 					drm_handle_vblank(rdev->ddev, 0);
6455 					rdev->pm.vblank_sync = true;
6456 					wake_up(&rdev->irq.vblank_queue);
6457 				}
6458 				if (atomic_read(&rdev->irq.pflip[0]))
6459 					radeon_crtc_handle_vblank(rdev, 0);
6460 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6461 				DRM_DEBUG("IH: D1 vblank\n");
6462 
6463 				break;
6464 			case 1: /* D1 vline */
6465 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6466 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6467 
6468 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6469 				DRM_DEBUG("IH: D1 vline\n");
6470 
6471 				break;
6472 			default:
6473 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6474 				break;
6475 			}
6476 			break;
6477 		case 2: /* D2 vblank/vline */
6478 			switch (src_data) {
6479 			case 0: /* D2 vblank */
6480 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6481 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6482 
6483 				if (rdev->irq.crtc_vblank_int[1]) {
6484 					drm_handle_vblank(rdev->ddev, 1);
6485 					rdev->pm.vblank_sync = true;
6486 					wake_up(&rdev->irq.vblank_queue);
6487  				}
6488 				if (atomic_read(&rdev->irq.pflip[1]))
6489 					radeon_crtc_handle_vblank(rdev, 1);
6490 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6491 				DRM_DEBUG("IH: D2 vblank\n");
6492 
6493 				break;
6494 			case 1: /* D2 vline */
6495 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6496 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6497 
6498 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6499 				DRM_DEBUG("IH: D2 vline\n");
6500 
6501 				break;
6502 			default:
6503 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6504 				break;
6505 			}
6506 			break;
6507 		case 3: /* D3 vblank/vline */
6508 			switch (src_data) {
6509 			case 0: /* D3 vblank */
6510 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6511 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6512 
6513 				if (rdev->irq.crtc_vblank_int[2]) {
6514 					drm_handle_vblank(rdev->ddev, 2);
6515 					rdev->pm.vblank_sync = true;
6516 					wake_up(&rdev->irq.vblank_queue);
6517  				}
6518 				if (atomic_read(&rdev->irq.pflip[2]))
6519 					radeon_crtc_handle_vblank(rdev, 2);
6520 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6521 				DRM_DEBUG("IH: D3 vblank\n");
6522 
6523 				break;
6524 			case 1: /* D3 vline */
6525 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6526 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6527 
6528 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6529 				DRM_DEBUG("IH: D3 vline\n");
6530 
6531 				break;
6532 			default:
6533 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6534 				break;
6535 			}
6536 			break;
6537 		case 4: /* D4 vblank/vline */
6538 			switch (src_data) {
6539 			case 0: /* D4 vblank */
6540 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6541 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6542 
6543 				if (rdev->irq.crtc_vblank_int[3]) {
6544 					drm_handle_vblank(rdev->ddev, 3);
6545 					rdev->pm.vblank_sync = true;
6546 					wake_up(&rdev->irq.vblank_queue);
6547  				}
6548 				if (atomic_read(&rdev->irq.pflip[3]))
6549 					radeon_crtc_handle_vblank(rdev, 3);
6550 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6551 				DRM_DEBUG("IH: D4 vblank\n");
6552 
6553 				break;
6554 			case 1: /* D4 vline */
6555 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6556 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6557 
6558 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6559 				DRM_DEBUG("IH: D4 vline\n");
6560 
6561 				break;
6562 			default:
6563 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6564 				break;
6565 			}
6566 			break;
6567 		case 5: /* D5 vblank/vline */
6568 			switch (src_data) {
6569 			case 0: /* D5 vblank */
6570 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6571 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6572 
6573 				if (rdev->irq.crtc_vblank_int[4]) {
6574 					drm_handle_vblank(rdev->ddev, 4);
6575 					rdev->pm.vblank_sync = true;
6576 					wake_up(&rdev->irq.vblank_queue);
6577  				}
6578 				if (atomic_read(&rdev->irq.pflip[4]))
6579 					radeon_crtc_handle_vblank(rdev, 4);
6580 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6581 				DRM_DEBUG("IH: D5 vblank\n");
6582 
6583 				break;
6584 			case 1: /* D5 vline */
6585 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6586 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6587 
6588 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6589 				DRM_DEBUG("IH: D5 vline\n");
6590 
6591 				break;
6592 			default:
6593 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6594 				break;
6595 			}
6596 			break;
6597 		case 6: /* D6 vblank/vline */
6598 			switch (src_data) {
6599 			case 0: /* D6 vblank */
6600 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6601 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6602 
6603 				if (rdev->irq.crtc_vblank_int[5]) {
6604 					drm_handle_vblank(rdev->ddev, 5);
6605 					rdev->pm.vblank_sync = true;
6606 					wake_up(&rdev->irq.vblank_queue);
6607  				}
6608 				if (atomic_read(&rdev->irq.pflip[5]))
6609 					radeon_crtc_handle_vblank(rdev, 5);
6610 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6611 				DRM_DEBUG("IH: D6 vblank\n");
6612 
6613 				break;
6614 			case 1: /* D6 vline */
6615 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6616 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6617 
6618 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6619 				DRM_DEBUG("IH: D6 vline\n");
6620 
6621 				break;
6622 			default:
6623 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6624 				break;
6625 			}
6626 			break;
6627 		case 8: /* D1 page flip */
6628 		case 10: /* D2 page flip */
6629 		case 12: /* D3 page flip */
6630 		case 14: /* D4 page flip */
6631 		case 16: /* D5 page flip */
6632 		case 18: /* D6 page flip */
6633 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6634 			if (radeon_use_pflipirq > 0)
6635 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6636 			break;
6637 		case 42: /* HPD hotplug */
6638 			switch (src_data) {
6639 			case 0:
6640 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6641 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6642 
6643 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6644 				queue_hotplug = true;
6645 				DRM_DEBUG("IH: HPD1\n");
6646 
6647 				break;
6648 			case 1:
6649 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6650 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6651 
6652 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6653 				queue_hotplug = true;
6654 				DRM_DEBUG("IH: HPD2\n");
6655 
6656 				break;
6657 			case 2:
6658 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6659 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6660 
6661 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6662 				queue_hotplug = true;
6663 				DRM_DEBUG("IH: HPD3\n");
6664 
6665 				break;
6666 			case 3:
6667 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6668 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6669 
6670 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6671 				queue_hotplug = true;
6672 				DRM_DEBUG("IH: HPD4\n");
6673 
6674 				break;
6675 			case 4:
6676 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6677 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6678 
6679 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6680 				queue_hotplug = true;
6681 				DRM_DEBUG("IH: HPD5\n");
6682 
6683 				break;
6684 			case 5:
6685 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6686 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6687 
6688 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6689 				queue_hotplug = true;
6690 				DRM_DEBUG("IH: HPD6\n");
6691 
6692 				break;
6693 			case 6:
6694 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6695 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6696 
6697 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6698 				queue_dp = true;
6699 				DRM_DEBUG("IH: HPD_RX 1\n");
6700 
6701 				break;
6702 			case 7:
6703 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6704 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6705 
6706 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6707 				queue_dp = true;
6708 				DRM_DEBUG("IH: HPD_RX 2\n");
6709 
6710 				break;
6711 			case 8:
6712 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6713 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6714 
6715 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6716 				queue_dp = true;
6717 				DRM_DEBUG("IH: HPD_RX 3\n");
6718 
6719 				break;
6720 			case 9:
6721 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6722 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6723 
6724 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6725 				queue_dp = true;
6726 				DRM_DEBUG("IH: HPD_RX 4\n");
6727 
6728 				break;
6729 			case 10:
6730 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6731 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6732 
6733 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6734 				queue_dp = true;
6735 				DRM_DEBUG("IH: HPD_RX 5\n");
6736 
6737 				break;
6738 			case 11:
6739 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6740 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6741 
6742 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6743 				queue_dp = true;
6744 				DRM_DEBUG("IH: HPD_RX 6\n");
6745 
6746 				break;
6747 			default:
6748 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6749 				break;
6750 			}
6751 			break;
6752 		case 96:
6753 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6754 			WREG32(SRBM_INT_ACK, 0x1);
6755 			break;
6756 		case 124: /* UVD */
6757 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6758 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6759 			break;
6760 		case 146:
6761 		case 147:
6762 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6763 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6764 			/* reset addr and status */
6765 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6766 			if (addr == 0x0 && status == 0x0)
6767 				break;
6768 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6769 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6770 				addr);
6771 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6772 				status);
6773 			si_vm_decode_fault(rdev, status, addr);
6774 			break;
6775 		case 176: /* RINGID0 CP_INT */
6776 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6777 			break;
6778 		case 177: /* RINGID1 CP_INT */
6779 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6780 			break;
6781 		case 178: /* RINGID2 CP_INT */
6782 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6783 			break;
6784 		case 181: /* CP EOP event */
6785 			DRM_DEBUG("IH: CP EOP\n");
6786 			switch (ring_id) {
6787 			case 0:
6788 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6789 				break;
6790 			case 1:
6791 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6792 				break;
6793 			case 2:
6794 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6795 				break;
6796 			}
6797 			break;
6798 		case 224: /* DMA trap event */
6799 			DRM_DEBUG("IH: DMA trap\n");
6800 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6801 			break;
6802 		case 230: /* thermal low to high */
6803 			DRM_DEBUG("IH: thermal low to high\n");
6804 			rdev->pm.dpm.thermal.high_to_low = false;
6805 			queue_thermal = true;
6806 			break;
6807 		case 231: /* thermal high to low */
6808 			DRM_DEBUG("IH: thermal high to low\n");
6809 			rdev->pm.dpm.thermal.high_to_low = true;
6810 			queue_thermal = true;
6811 			break;
6812 		case 233: /* GUI IDLE */
6813 			DRM_DEBUG("IH: GUI idle\n");
6814 			break;
6815 		case 244: /* DMA trap event */
6816 			DRM_DEBUG("IH: DMA1 trap\n");
6817 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6818 			break;
6819 		default:
6820 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6821 			break;
6822 		}
6823 
6824 		/* wptr/rptr are in bytes! */
6825 		rptr += 16;
6826 		rptr &= rdev->ih.ptr_mask;
6827 		WREG32(IH_RB_RPTR, rptr);
6828 	}
6829 	if (queue_dp)
6830 		schedule_work(&rdev->dp_work);
6831 	if (queue_hotplug)
6832 		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
6833 	if (queue_thermal && rdev->pm.dpm_enabled)
6834 		taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
6835 	rdev->ih.rptr = rptr;
6836 	atomic_set(&rdev->ih.lock, 0);
6837 
6838 	/* make sure wptr hasn't changed while processing */
6839 	wptr = si_get_ih_wptr(rdev);
6840 	if (wptr != rptr)
6841 		goto restart_ih;
6842 
6843 	return IRQ_HANDLED;
6844 }
6845 
6846 /*
6847  * startup/shutdown callbacks
6848  */
6849 static void si_uvd_init(struct radeon_device *rdev)
6850 {
6851 	int r;
6852 
6853 	if (!rdev->has_uvd)
6854 		return;
6855 
6856 	r = radeon_uvd_init(rdev);
6857 	if (r) {
6858 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6859 		/*
6860 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6861 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6862 		 * there. So it is pointless to try to go through that code
6863 		 * hence why we disable uvd here.
6864 		 */
6865 		rdev->has_uvd = 0;
6866 		return;
6867 	}
6868 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6869 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6870 }
6871 
6872 static void si_uvd_start(struct radeon_device *rdev)
6873 {
6874 	int r;
6875 
6876 	if (!rdev->has_uvd)
6877 		return;
6878 
6879 	r = uvd_v2_2_resume(rdev);
6880 	if (r) {
6881 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6882 		goto error;
6883 	}
6884 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6885 	if (r) {
6886 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6887 		goto error;
6888 	}
6889 	return;
6890 
6891 error:
6892 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6893 }
6894 
6895 static void si_uvd_resume(struct radeon_device *rdev)
6896 {
6897 	struct radeon_ring *ring;
6898 	int r;
6899 
6900 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6901 		return;
6902 
6903 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6904 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, RADEON_CP_PACKET2);
6905 	if (r) {
6906 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6907 		return;
6908 	}
6909 	r = uvd_v1_0_init(rdev);
6910 	if (r) {
6911 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6912 		return;
6913 	}
6914 }
6915 
6916 static void si_vce_init(struct radeon_device *rdev)
6917 {
6918 	int r;
6919 
6920 	if (!rdev->has_vce)
6921 		return;
6922 
6923 	r = radeon_vce_init(rdev);
6924 	if (r) {
6925 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6926 		/*
6927 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6928 		 * to early fails si_vce_start() and thus nothing happens
6929 		 * there. So it is pointless to try to go through that code
6930 		 * hence why we disable vce here.
6931 		 */
6932 		rdev->has_vce = 0;
6933 		return;
6934 	}
6935 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6936 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6937 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6938 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6939 }
6940 
6941 static void si_vce_start(struct radeon_device *rdev)
6942 {
6943 	int r;
6944 
6945 	if (!rdev->has_vce)
6946 		return;
6947 
6948 	r = radeon_vce_resume(rdev);
6949 	if (r) {
6950 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6951 		goto error;
6952 	}
6953 	r = vce_v1_0_resume(rdev);
6954 	if (r) {
6955 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6956 		goto error;
6957 	}
6958 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6959 	if (r) {
6960 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6961 		goto error;
6962 	}
6963 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6964 	if (r) {
6965 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6966 		goto error;
6967 	}
6968 	return;
6969 
6970 error:
6971 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6972 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6973 }
6974 
6975 static void si_vce_resume(struct radeon_device *rdev)
6976 {
6977 	struct radeon_ring *ring;
6978 	int r;
6979 
6980 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6981 		return;
6982 
6983 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6984 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6985 	if (r) {
6986 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6987 		return;
6988 	}
6989 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6990 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6991 	if (r) {
6992 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6993 		return;
6994 	}
6995 	r = vce_v1_0_init(rdev);
6996 	if (r) {
6997 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6998 		return;
6999 	}
7000 }
7001 
7002 static int si_startup(struct radeon_device *rdev)
7003 {
7004 	struct radeon_ring *ring;
7005 	int r;
7006 
7007 	/* enable pcie gen2/3 link */
7008 	si_pcie_gen3_enable(rdev);
7009 	/* enable aspm */
7010 	si_program_aspm(rdev);
7011 
7012 	/* scratch needs to be initialized before MC */
7013 	r = r600_vram_scratch_init(rdev);
7014 	if (r)
7015 		return r;
7016 
7017 	si_mc_program(rdev);
7018 
7019 	if (!rdev->pm.dpm_enabled) {
7020 		r = si_mc_load_microcode(rdev);
7021 		if (r) {
7022 			DRM_ERROR("Failed to load MC firmware!\n");
7023 			return r;
7024 		}
7025 	}
7026 
7027 	r = si_pcie_gart_enable(rdev);
7028 	if (r)
7029 		return r;
7030 	si_gpu_init(rdev);
7031 
7032 	/* allocate rlc buffers */
7033 	if (rdev->family == CHIP_VERDE) {
7034 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7035 		rdev->rlc.reg_list_size =
7036 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7037 	}
7038 	rdev->rlc.cs_data = si_cs_data;
7039 	r = sumo_rlc_init(rdev);
7040 	if (r) {
7041 		DRM_ERROR("Failed to init rlc BOs!\n");
7042 		return r;
7043 	}
7044 
7045 	/* allocate wb buffer */
7046 	r = radeon_wb_init(rdev);
7047 	if (r)
7048 		return r;
7049 
7050 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7051 	if (r) {
7052 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7053 		return r;
7054 	}
7055 
7056 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7057 	if (r) {
7058 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7059 		return r;
7060 	}
7061 
7062 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7063 	if (r) {
7064 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7065 		return r;
7066 	}
7067 
7068 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7069 	if (r) {
7070 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7071 		return r;
7072 	}
7073 
7074 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7075 	if (r) {
7076 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7077 		return r;
7078 	}
7079 
7080 	si_uvd_start(rdev);
7081 	si_vce_start(rdev);
7082 
7083 	/* Enable IRQ */
7084 	if (!rdev->irq.installed) {
7085 		r = radeon_irq_kms_init(rdev);
7086 		if (r)
7087 			return r;
7088 	}
7089 
7090 	r = si_irq_init(rdev);
7091 	if (r) {
7092 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7093 		radeon_irq_kms_fini(rdev);
7094 		return r;
7095 	}
7096 	si_irq_set(rdev);
7097 
7098 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7099 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7100 			     RADEON_CP_PACKET2);
7101 	if (r)
7102 		return r;
7103 
7104 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7105 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7106 			     RADEON_CP_PACKET2);
7107 	if (r)
7108 		return r;
7109 
7110 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7111 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7112 			     RADEON_CP_PACKET2);
7113 	if (r)
7114 		return r;
7115 
7116 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7117 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7118 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7119 	if (r)
7120 		return r;
7121 
7122 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7123 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7124 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7125 	if (r)
7126 		return r;
7127 
7128 	r = si_cp_load_microcode(rdev);
7129 	if (r)
7130 		return r;
7131 	r = si_cp_resume(rdev);
7132 	if (r)
7133 		return r;
7134 
7135 	r = cayman_dma_resume(rdev);
7136 	if (r)
7137 		return r;
7138 
7139 	si_uvd_resume(rdev);
7140 	si_vce_resume(rdev);
7141 
7142 	r = radeon_ib_pool_init(rdev);
7143 	if (r) {
7144 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7145 		return r;
7146 	}
7147 
7148 	r = radeon_vm_manager_init(rdev);
7149 	if (r) {
7150 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7151 		return r;
7152 	}
7153 
7154 	r = radeon_audio_init(rdev);
7155 	if (r)
7156 		return r;
7157 
7158 	return 0;
7159 }
7160 
7161 int si_resume(struct radeon_device *rdev)
7162 {
7163 	int r;
7164 
7165 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7166 	 * posting will perform necessary task to bring back GPU into good
7167 	 * shape.
7168 	 */
7169 	/* post card */
7170 	atom_asic_init(rdev->mode_info.atom_context);
7171 
7172 	/* init golden registers */
7173 	si_init_golden_registers(rdev);
7174 
7175 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7176 		radeon_pm_resume(rdev);
7177 
7178 	rdev->accel_working = true;
7179 	r = si_startup(rdev);
7180 	if (r) {
7181 		DRM_ERROR("si startup failed on resume\n");
7182 		rdev->accel_working = false;
7183 		return r;
7184 	}
7185 
7186 	return r;
7187 
7188 }
7189 
7190 int si_suspend(struct radeon_device *rdev)
7191 {
7192 	radeon_pm_suspend(rdev);
7193 	radeon_audio_fini(rdev);
7194 	radeon_vm_manager_fini(rdev);
7195 	si_cp_enable(rdev, false);
7196 	cayman_dma_stop(rdev);
7197 	if (rdev->has_uvd) {
7198 		uvd_v1_0_fini(rdev);
7199 		radeon_uvd_suspend(rdev);
7200 	}
7201 	if (rdev->has_vce)
7202 		radeon_vce_suspend(rdev);
7203 	si_fini_pg(rdev);
7204 	si_fini_cg(rdev);
7205 	si_irq_suspend(rdev);
7206 	radeon_wb_disable(rdev);
7207 	si_pcie_gart_disable(rdev);
7208 	return 0;
7209 }
7210 
7211 /* Plan is to move initialization in that function and use
7212  * helper function so that radeon_device_init pretty much
7213  * do nothing more than calling asic specific function. This
7214  * should also allow to remove a bunch of callback function
7215  * like vram_info.
7216  */
7217 int si_init(struct radeon_device *rdev)
7218 {
7219 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7220 	int r;
7221 
7222 	/* Read BIOS */
7223 	if (!radeon_get_bios(rdev)) {
7224 		if (ASIC_IS_AVIVO(rdev))
7225 			return -EINVAL;
7226 	}
7227 	/* Must be an ATOMBIOS */
7228 	if (!rdev->is_atom_bios) {
7229 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7230 		return -EINVAL;
7231 	}
7232 	r = radeon_atombios_init(rdev);
7233 	if (r)
7234 		return r;
7235 
7236 	/* Post card if necessary */
7237 	if (!radeon_card_posted(rdev)) {
7238 		if (!rdev->bios) {
7239 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7240 			return -EINVAL;
7241 		}
7242 		DRM_INFO("GPU not posted. posting now...\n");
7243 		atom_asic_init(rdev->mode_info.atom_context);
7244 	}
7245 	/* init golden registers */
7246 	si_init_golden_registers(rdev);
7247 	/* Initialize scratch registers */
7248 	si_scratch_init(rdev);
7249 	/* Initialize surface registers */
7250 	radeon_surface_init(rdev);
7251 	/* Initialize clocks */
7252 	radeon_get_clock_info(rdev->ddev);
7253 
7254 	/* Fence driver */
7255 	r = radeon_fence_driver_init(rdev);
7256 	if (r)
7257 		return r;
7258 
7259 	/* initialize memory controller */
7260 	r = si_mc_init(rdev);
7261 	if (r)
7262 		return r;
7263 	/* Memory manager */
7264 	r = radeon_bo_init(rdev);
7265 	if (r)
7266 		return r;
7267 
7268 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7269 	    !rdev->rlc_fw || !rdev->mc_fw) {
7270 		r = si_init_microcode(rdev);
7271 		if (r) {
7272 			DRM_ERROR("Failed to load firmware!\n");
7273 			return r;
7274 		}
7275 	}
7276 
7277 	/* Initialize power management */
7278 	radeon_pm_init(rdev);
7279 
7280 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7281 	ring->ring_obj = NULL;
7282 	r600_ring_init(rdev, ring, 1024 * 1024);
7283 
7284 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7285 	ring->ring_obj = NULL;
7286 	r600_ring_init(rdev, ring, 1024 * 1024);
7287 
7288 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7289 	ring->ring_obj = NULL;
7290 	r600_ring_init(rdev, ring, 1024 * 1024);
7291 
7292 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7293 	ring->ring_obj = NULL;
7294 	r600_ring_init(rdev, ring, 64 * 1024);
7295 
7296 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7297 	ring->ring_obj = NULL;
7298 	r600_ring_init(rdev, ring, 64 * 1024);
7299 
7300 	si_uvd_init(rdev);
7301 	si_vce_init(rdev);
7302 
7303 	rdev->ih.ring_obj = NULL;
7304 	r600_ih_ring_init(rdev, 64 * 1024);
7305 
7306 	r = r600_pcie_gart_init(rdev);
7307 	if (r)
7308 		return r;
7309 
7310 	rdev->accel_working = true;
7311 	r = si_startup(rdev);
7312 	if (r) {
7313 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7314 		si_cp_fini(rdev);
7315 		cayman_dma_fini(rdev);
7316 		si_irq_fini(rdev);
7317 		sumo_rlc_fini(rdev);
7318 		radeon_wb_fini(rdev);
7319 		radeon_ib_pool_fini(rdev);
7320 		radeon_vm_manager_fini(rdev);
7321 		radeon_irq_kms_fini(rdev);
7322 		si_pcie_gart_fini(rdev);
7323 		rdev->accel_working = false;
7324 	}
7325 
7326 	/* Don't start up if the MC ucode is missing.
7327 	 * The default clocks and voltages before the MC ucode
7328 	 * is loaded are not suffient for advanced operations.
7329 	 */
7330 	if (!rdev->mc_fw) {
7331 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7332 		return -EINVAL;
7333 	}
7334 
7335 	return 0;
7336 }
7337 
7338 void si_fini(struct radeon_device *rdev)
7339 {
7340 	radeon_pm_fini(rdev);
7341 	si_cp_fini(rdev);
7342 	cayman_dma_fini(rdev);
7343 	si_fini_pg(rdev);
7344 	si_fini_cg(rdev);
7345 	si_irq_fini(rdev);
7346 	sumo_rlc_fini(rdev);
7347 	radeon_wb_fini(rdev);
7348 	radeon_vm_manager_fini(rdev);
7349 	radeon_ib_pool_fini(rdev);
7350 	radeon_irq_kms_fini(rdev);
7351 	if (rdev->has_uvd) {
7352 		uvd_v1_0_fini(rdev);
7353 		radeon_uvd_fini(rdev);
7354 	}
7355 	if (rdev->has_vce)
7356 		radeon_vce_fini(rdev);
7357 	si_pcie_gart_fini(rdev);
7358 	r600_vram_scratch_fini(rdev);
7359 	radeon_gem_fini(rdev);
7360 	radeon_fence_driver_fini(rdev);
7361 	radeon_bo_fini(rdev);
7362 	radeon_atombios_fini(rdev);
7363 	si_fini_microcode(rdev);
7364 	kfree(rdev->bios);
7365 	rdev->bios = NULL;
7366 }
7367 
7368 /**
7369  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7370  *
7371  * @rdev: radeon_device pointer
7372  *
7373  * Fetches a GPU clock counter snapshot (SI).
7374  * Returns the 64 bit clock counter snapshot.
7375  */
7376 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7377 {
7378 	uint64_t clock;
7379 
7380 	mutex_lock(&rdev->gpu_clock_mutex);
7381 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7382 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7383 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7384 	mutex_unlock(&rdev->gpu_clock_mutex);
7385 	return clock;
7386 }
7387 
7388 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7389 {
7390 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7391 	int r;
7392 
7393 	/* bypass vclk and dclk with bclk */
7394 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7395 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7396 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7397 
7398 	/* put PLL in bypass mode */
7399 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7400 
7401 	if (!vclk || !dclk) {
7402 		/* keep the Bypass mode */
7403 		return 0;
7404 	}
7405 
7406 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7407 					  16384, 0x03FFFFFF, 0, 128, 5,
7408 					  &fb_div, &vclk_div, &dclk_div);
7409 	if (r)
7410 		return r;
7411 
7412 	/* set RESET_ANTI_MUX to 0 */
7413 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7414 
7415 	/* set VCO_MODE to 1 */
7416 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7417 
7418 	/* disable sleep mode */
7419 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7420 
7421 	/* deassert UPLL_RESET */
7422 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7423 
7424 	mdelay(1);
7425 
7426 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7427 	if (r)
7428 		return r;
7429 
7430 	/* assert UPLL_RESET again */
7431 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7432 
7433 	/* disable spread spectrum. */
7434 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7435 
7436 	/* set feedback divider */
7437 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7438 
7439 	/* set ref divider to 0 */
7440 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7441 
7442 	if (fb_div < 307200)
7443 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7444 	else
7445 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7446 
7447 	/* set PDIV_A and PDIV_B */
7448 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7449 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7450 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7451 
7452 	/* give the PLL some time to settle */
7453 	mdelay(15);
7454 
7455 	/* deassert PLL_RESET */
7456 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7457 
7458 	mdelay(15);
7459 
7460 	/* switch from bypass mode to normal mode */
7461 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7462 
7463 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7464 	if (r)
7465 		return r;
7466 
7467 	/* switch VCLK and DCLK selection */
7468 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7469 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7470 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7471 
7472 	mdelay(100);
7473 
7474 	return 0;
7475 }
7476 
7477 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7478 {
7479 	struct pci_dev *root = rdev->pdev->bus->self;
7480 	int bridge_pos, gpu_pos;
7481 	u32 speed_cntl, mask, current_data_rate;
7482 	int ret, i;
7483 	u16 tmp16;
7484 
7485 	if (radeon_pcie_gen2 == 0)
7486 		return;
7487 
7488 	if (rdev->flags & RADEON_IS_IGP)
7489 		return;
7490 
7491 	if (!(rdev->flags & RADEON_IS_PCIE))
7492 		return;
7493 
7494 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7495 	if (ret != 0)
7496 		return;
7497 
7498 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7499 		return;
7500 
7501 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7502 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7503 		LC_CURRENT_DATA_RATE_SHIFT;
7504 	if (mask & DRM_PCIE_SPEED_80) {
7505 		if (current_data_rate == 2) {
7506 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7507 			return;
7508 		}
7509 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7510 	} else if (mask & DRM_PCIE_SPEED_50) {
7511 		if (current_data_rate == 1) {
7512 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7513 			return;
7514 		}
7515 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7516 	}
7517 
7518 	bridge_pos = pci_get_pciecap_ptr(root->dev.bsddev);
7519 	if (!bridge_pos)
7520 		return;
7521 
7522 	gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev.bsddev);
7523 	if (!gpu_pos)
7524 		return;
7525 
7526 	if (mask & DRM_PCIE_SPEED_80) {
7527 		/* re-try equalization if gen3 is not already enabled */
7528 		if (current_data_rate != 2) {
7529 			u16 bridge_cfg, gpu_cfg;
7530 			u16 bridge_cfg2, gpu_cfg2;
7531 			u32 max_lw, current_lw, tmp;
7532 
7533 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7534 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7535 
7536 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7537 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7538 
7539 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7540 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7541 
7542 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7543 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7544 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7545 
7546 			if (current_lw < max_lw) {
7547 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7548 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7549 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7550 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7551 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7552 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7553 				}
7554 			}
7555 
7556 			for (i = 0; i < 10; i++) {
7557 				/* check status */
7558 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7559 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7560 					break;
7561 
7562 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7563 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7564 
7565 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7566 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7567 
7568 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7569 				tmp |= LC_SET_QUIESCE;
7570 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7571 
7572 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7573 				tmp |= LC_REDO_EQ;
7574 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7575 
7576 				mdelay(100);
7577 
7578 				/* linkctl */
7579 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7580 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7581 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7582 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7583 
7584 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7585 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7586 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7587 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7588 
7589 				/* linkctl2 */
7590 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7591 				tmp16 &= ~((1 << 4) | (7 << 9));
7592 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7593 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7594 
7595 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7596 				tmp16 &= ~((1 << 4) | (7 << 9));
7597 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7598 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7599 
7600 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7601 				tmp &= ~LC_SET_QUIESCE;
7602 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7603 			}
7604 		}
7605 	}
7606 
7607 	/* set the link speed */
7608 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7609 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7610 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7611 
7612 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7613 	tmp16 &= ~0xf;
7614 	if (mask & DRM_PCIE_SPEED_80)
7615 		tmp16 |= 3; /* gen3 */
7616 	else if (mask & DRM_PCIE_SPEED_50)
7617 		tmp16 |= 2; /* gen2 */
7618 	else
7619 		tmp16 |= 1; /* gen1 */
7620 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7621 
7622 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7623 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7624 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7625 
7626 	for (i = 0; i < rdev->usec_timeout; i++) {
7627 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7628 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7629 			break;
7630 		udelay(1);
7631 	}
7632 }
7633 
7634 static void si_program_aspm(struct radeon_device *rdev)
7635 {
7636 	u32 data, orig;
7637 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7638 	bool disable_clkreq = false;
7639 
7640 	if (radeon_aspm == 0)
7641 		return;
7642 
7643 	if (!(rdev->flags & RADEON_IS_PCIE))
7644 		return;
7645 
7646 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7647 	data &= ~LC_XMIT_N_FTS_MASK;
7648 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7649 	if (orig != data)
7650 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7651 
7652 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7653 	data |= LC_GO_TO_RECOVERY;
7654 	if (orig != data)
7655 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7656 
7657 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7658 	data |= P_IGNORE_EDB_ERR;
7659 	if (orig != data)
7660 		WREG32_PCIE(PCIE_P_CNTL, data);
7661 
7662 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7663 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7664 	data |= LC_PMI_TO_L1_DIS;
7665 	if (!disable_l0s)
7666 		data |= LC_L0S_INACTIVITY(7);
7667 
7668 	if (!disable_l1) {
7669 		data |= LC_L1_INACTIVITY(7);
7670 		data &= ~LC_PMI_TO_L1_DIS;
7671 		if (orig != data)
7672 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7673 
7674 		if (!disable_plloff_in_l1) {
7675 			bool clk_req_support;
7676 
7677 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7678 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7679 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7680 			if (orig != data)
7681 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7682 
7683 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7684 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7685 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7686 			if (orig != data)
7687 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7688 
7689 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7690 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7691 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7692 			if (orig != data)
7693 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7694 
7695 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7696 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7697 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7698 			if (orig != data)
7699 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7700 
7701 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7702 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7703 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7704 				if (orig != data)
7705 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7706 
7707 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7708 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7709 				if (orig != data)
7710 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7711 
7712 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7713 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7714 				if (orig != data)
7715 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7716 
7717 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7718 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7719 				if (orig != data)
7720 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7721 
7722 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7723 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7724 				if (orig != data)
7725 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7726 
7727 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7728 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7729 				if (orig != data)
7730 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7731 
7732 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7733 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7734 				if (orig != data)
7735 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7736 
7737 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7738 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7739 				if (orig != data)
7740 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7741 			}
7742 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7743 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7744 			data |= LC_DYN_LANES_PWR_STATE(3);
7745 			if (orig != data)
7746 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7747 
7748 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7749 			data &= ~LS2_EXIT_TIME_MASK;
7750 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7751 				data |= LS2_EXIT_TIME(5);
7752 			if (orig != data)
7753 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7754 
7755 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7756 			data &= ~LS2_EXIT_TIME_MASK;
7757 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7758 				data |= LS2_EXIT_TIME(5);
7759 			if (orig != data)
7760 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7761 
7762 			if (!disable_clkreq) {
7763 #ifdef zMN_TODO
7764 				struct pci_dev *root = rdev->pdev->bus->self;
7765 				u32 lnkcap;
7766 
7767 				clk_req_support = false;
7768 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7769 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7770 					clk_req_support = true;
7771 #else
7772 				clk_req_support = false;
7773 #endif
7774 			} else {
7775 				clk_req_support = false;
7776 			}
7777 
7778 			if (clk_req_support) {
7779 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7780 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7781 				if (orig != data)
7782 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7783 
7784 				orig = data = RREG32(THM_CLK_CNTL);
7785 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7786 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7787 				if (orig != data)
7788 					WREG32(THM_CLK_CNTL, data);
7789 
7790 				orig = data = RREG32(MISC_CLK_CNTL);
7791 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7792 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7793 				if (orig != data)
7794 					WREG32(MISC_CLK_CNTL, data);
7795 
7796 				orig = data = RREG32(CG_CLKPIN_CNTL);
7797 				data &= ~BCLK_AS_XCLK;
7798 				if (orig != data)
7799 					WREG32(CG_CLKPIN_CNTL, data);
7800 
7801 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7802 				data &= ~FORCE_BIF_REFCLK_EN;
7803 				if (orig != data)
7804 					WREG32(CG_CLKPIN_CNTL_2, data);
7805 
7806 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7807 				data &= ~MPLL_CLKOUT_SEL_MASK;
7808 				data |= MPLL_CLKOUT_SEL(4);
7809 				if (orig != data)
7810 					WREG32(MPLL_BYPASSCLK_SEL, data);
7811 
7812 				orig = data = RREG32(SPLL_CNTL_MODE);
7813 				data &= ~SPLL_REFCLK_SEL_MASK;
7814 				if (orig != data)
7815 					WREG32(SPLL_CNTL_MODE, data);
7816 			}
7817 		}
7818 	} else {
7819 		if (orig != data)
7820 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7821 	}
7822 
7823 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7824 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7825 	if (orig != data)
7826 		WREG32_PCIE(PCIE_CNTL2, data);
7827 
7828 	if (!disable_l0s) {
7829 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7830 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7831 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7832 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7833 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7834 				data &= ~LC_L0S_INACTIVITY_MASK;
7835 				if (orig != data)
7836 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7837 			}
7838 		}
7839 	}
7840 }
7841 
7842 int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7843 {
7844         unsigned i;
7845 
7846         /* make sure VCEPLL_CTLREQ is deasserted */
7847         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7848 
7849         mdelay(10);
7850 
7851         /* assert UPLL_CTLREQ */
7852         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7853 
7854         /* wait for CTLACK and CTLACK2 to get asserted */
7855         for (i = 0; i < 100; ++i) {
7856                 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7857                 if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7858                         break;
7859                 mdelay(10);
7860         }
7861 
7862         /* deassert UPLL_CTLREQ */
7863         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7864 
7865         if (i == 100) {
7866                 DRM_ERROR("Timeout setting UVD clocks!\n");
7867                 return -ETIMEDOUT;
7868         }
7869 
7870         return 0;
7871 }
7872 
7873 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7874 {
7875 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7876 	int r;
7877 
7878 	/* bypass evclk and ecclk with bclk */
7879 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7880 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7881 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7882 
7883 	/* put PLL in bypass mode */
7884 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7885 		     ~VCEPLL_BYPASS_EN_MASK);
7886 
7887 	if (!evclk || !ecclk) {
7888 		/* keep the Bypass mode, put PLL to sleep */
7889 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7890 			     ~VCEPLL_SLEEP_MASK);
7891 		return 0;
7892 	}
7893 
7894 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7895 					  16384, 0x03FFFFFF, 0, 128, 5,
7896 					  &fb_div, &evclk_div, &ecclk_div);
7897 	if (r)
7898 		return r;
7899 
7900 	/* set RESET_ANTI_MUX to 0 */
7901 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7902 
7903 	/* set VCO_MODE to 1 */
7904 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7905 		     ~VCEPLL_VCO_MODE_MASK);
7906 
7907 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7908 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7909 		     ~VCEPLL_SLEEP_MASK);
7910 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7911 
7912 	/* deassert VCEPLL_RESET */
7913 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7914 
7915 	mdelay(1);
7916 
7917 	r = si_vce_send_vcepll_ctlreq(rdev);
7918 	if (r)
7919 		return r;
7920 
7921 	/* assert VCEPLL_RESET again */
7922 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7923 
7924 	/* disable spread spectrum. */
7925 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7926 
7927 	/* set feedback divider */
7928 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7929 
7930 	/* set ref divider to 0 */
7931 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7932 
7933 	/* set PDIV_A and PDIV_B */
7934 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7935 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7936 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7937 
7938 	/* give the PLL some time to settle */
7939 	mdelay(15);
7940 
7941 	/* deassert PLL_RESET */
7942 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7943 
7944 	mdelay(15);
7945 
7946 	/* switch from bypass mode to normal mode */
7947 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7948 
7949 	r = si_vce_send_vcepll_ctlreq(rdev);
7950 	if (r)
7951 		return r;
7952 
7953 	/* switch VCLK and DCLK selection */
7954 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7955 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7956 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7957 
7958 	mdelay(100);
7959 
7960 	return 0;
7961 }
7962