xref: /dragonfly/sys/dev/drm/radeon/si.c (revision dead10d5)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include <uapi_drm/radeon_drm.h>
30 #include "sid.h"
31 #include "atom.h"
32 #include "si_blit_shaders.h"
33 #include "clearstate_si.h"
34 #include "radeon_ucode.h"
35 
36 
37 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
38 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 
45 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
46 MODULE_FIRMWARE("radeon/tahiti_me.bin");
47 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
48 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
49 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
50 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
51 
52 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
53 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
59 
60 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
61 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
62 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
68 MODULE_FIRMWARE("radeon/VERDE_me.bin");
69 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
70 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
71 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
72 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
73 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
74 
75 MODULE_FIRMWARE("radeon/verde_pfp.bin");
76 MODULE_FIRMWARE("radeon/verde_me.bin");
77 MODULE_FIRMWARE("radeon/verde_ce.bin");
78 MODULE_FIRMWARE("radeon/verde_mc.bin");
79 MODULE_FIRMWARE("radeon/verde_rlc.bin");
80 MODULE_FIRMWARE("radeon/verde_smc.bin");
81 
82 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
83 MODULE_FIRMWARE("radeon/OLAND_me.bin");
84 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
85 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
86 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
87 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
88 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
89 
90 MODULE_FIRMWARE("radeon/oland_pfp.bin");
91 MODULE_FIRMWARE("radeon/oland_me.bin");
92 MODULE_FIRMWARE("radeon/oland_ce.bin");
93 MODULE_FIRMWARE("radeon/oland_mc.bin");
94 MODULE_FIRMWARE("radeon/oland_rlc.bin");
95 MODULE_FIRMWARE("radeon/oland_smc.bin");
96 
97 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
98 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
99 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
100 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
101 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
102 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
104 
105 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
106 MODULE_FIRMWARE("radeon/hainan_me.bin");
107 MODULE_FIRMWARE("radeon/hainan_ce.bin");
108 MODULE_FIRMWARE("radeon/hainan_mc.bin");
109 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
110 MODULE_FIRMWARE("radeon/hainan_smc.bin");
111 
112 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
113 static void si_pcie_gen3_enable(struct radeon_device *rdev);
114 static void si_program_aspm(struct radeon_device *rdev);
115 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
116 					 bool enable);
117 static void si_init_pg(struct radeon_device *rdev);
118 static void si_init_cg(struct radeon_device *rdev);
119 static void si_fini_pg(struct radeon_device *rdev);
120 static void si_fini_cg(struct radeon_device *rdev);
121 static void si_rlc_stop(struct radeon_device *rdev);
122 
123 static const u32 verde_rlc_save_restore_register_list[] =
124 {
125 	(0x8000 << 16) | (0x98f4 >> 2),
126 	0x00000000,
127 	(0x8040 << 16) | (0x98f4 >> 2),
128 	0x00000000,
129 	(0x8000 << 16) | (0xe80 >> 2),
130 	0x00000000,
131 	(0x8040 << 16) | (0xe80 >> 2),
132 	0x00000000,
133 	(0x8000 << 16) | (0x89bc >> 2),
134 	0x00000000,
135 	(0x8040 << 16) | (0x89bc >> 2),
136 	0x00000000,
137 	(0x8000 << 16) | (0x8c1c >> 2),
138 	0x00000000,
139 	(0x8040 << 16) | (0x8c1c >> 2),
140 	0x00000000,
141 	(0x9c00 << 16) | (0x98f0 >> 2),
142 	0x00000000,
143 	(0x9c00 << 16) | (0xe7c >> 2),
144 	0x00000000,
145 	(0x8000 << 16) | (0x9148 >> 2),
146 	0x00000000,
147 	(0x8040 << 16) | (0x9148 >> 2),
148 	0x00000000,
149 	(0x9c00 << 16) | (0x9150 >> 2),
150 	0x00000000,
151 	(0x9c00 << 16) | (0x897c >> 2),
152 	0x00000000,
153 	(0x9c00 << 16) | (0x8d8c >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0xac54 >> 2),
156 	0X00000000,
157 	0x3,
158 	(0x9c00 << 16) | (0x98f8 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x9910 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0x9914 >> 2),
163 	0x00000000,
164 	(0x9c00 << 16) | (0x9918 >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x991c >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9920 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x9924 >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x9928 >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x992c >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0x9930 >> 2),
177 	0x00000000,
178 	(0x9c00 << 16) | (0x9934 >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x9938 >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x993c >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x9940 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x9944 >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x9948 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x994c >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x9950 >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0x9954 >> 2),
195 	0x00000000,
196 	(0x9c00 << 16) | (0x9958 >> 2),
197 	0x00000000,
198 	(0x9c00 << 16) | (0x995c >> 2),
199 	0x00000000,
200 	(0x9c00 << 16) | (0x9960 >> 2),
201 	0x00000000,
202 	(0x9c00 << 16) | (0x9964 >> 2),
203 	0x00000000,
204 	(0x9c00 << 16) | (0x9968 >> 2),
205 	0x00000000,
206 	(0x9c00 << 16) | (0x996c >> 2),
207 	0x00000000,
208 	(0x9c00 << 16) | (0x9970 >> 2),
209 	0x00000000,
210 	(0x9c00 << 16) | (0x9974 >> 2),
211 	0x00000000,
212 	(0x9c00 << 16) | (0x9978 >> 2),
213 	0x00000000,
214 	(0x9c00 << 16) | (0x997c >> 2),
215 	0x00000000,
216 	(0x9c00 << 16) | (0x9980 >> 2),
217 	0x00000000,
218 	(0x9c00 << 16) | (0x9984 >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x9988 >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x998c >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x8c00 >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x8c14 >> 2),
227 	0x00000000,
228 	(0x9c00 << 16) | (0x8c04 >> 2),
229 	0x00000000,
230 	(0x9c00 << 16) | (0x8c08 >> 2),
231 	0x00000000,
232 	(0x8000 << 16) | (0x9b7c >> 2),
233 	0x00000000,
234 	(0x8040 << 16) | (0x9b7c >> 2),
235 	0x00000000,
236 	(0x8000 << 16) | (0xe84 >> 2),
237 	0x00000000,
238 	(0x8040 << 16) | (0xe84 >> 2),
239 	0x00000000,
240 	(0x8000 << 16) | (0x89c0 >> 2),
241 	0x00000000,
242 	(0x8040 << 16) | (0x89c0 >> 2),
243 	0x00000000,
244 	(0x8000 << 16) | (0x914c >> 2),
245 	0x00000000,
246 	(0x8040 << 16) | (0x914c >> 2),
247 	0x00000000,
248 	(0x8000 << 16) | (0x8c20 >> 2),
249 	0x00000000,
250 	(0x8040 << 16) | (0x8c20 >> 2),
251 	0x00000000,
252 	(0x8000 << 16) | (0x9354 >> 2),
253 	0x00000000,
254 	(0x8040 << 16) | (0x9354 >> 2),
255 	0x00000000,
256 	(0x9c00 << 16) | (0x9060 >> 2),
257 	0x00000000,
258 	(0x9c00 << 16) | (0x9364 >> 2),
259 	0x00000000,
260 	(0x9c00 << 16) | (0x9100 >> 2),
261 	0x00000000,
262 	(0x9c00 << 16) | (0x913c >> 2),
263 	0x00000000,
264 	(0x8000 << 16) | (0x90e0 >> 2),
265 	0x00000000,
266 	(0x8000 << 16) | (0x90e4 >> 2),
267 	0x00000000,
268 	(0x8000 << 16) | (0x90e8 >> 2),
269 	0x00000000,
270 	(0x8040 << 16) | (0x90e0 >> 2),
271 	0x00000000,
272 	(0x8040 << 16) | (0x90e4 >> 2),
273 	0x00000000,
274 	(0x8040 << 16) | (0x90e8 >> 2),
275 	0x00000000,
276 	(0x9c00 << 16) | (0x8bcc >> 2),
277 	0x00000000,
278 	(0x9c00 << 16) | (0x8b24 >> 2),
279 	0x00000000,
280 	(0x9c00 << 16) | (0x88c4 >> 2),
281 	0x00000000,
282 	(0x9c00 << 16) | (0x8e50 >> 2),
283 	0x00000000,
284 	(0x9c00 << 16) | (0x8c0c >> 2),
285 	0x00000000,
286 	(0x9c00 << 16) | (0x8e58 >> 2),
287 	0x00000000,
288 	(0x9c00 << 16) | (0x8e5c >> 2),
289 	0x00000000,
290 	(0x9c00 << 16) | (0x9508 >> 2),
291 	0x00000000,
292 	(0x9c00 << 16) | (0x950c >> 2),
293 	0x00000000,
294 	(0x9c00 << 16) | (0x9494 >> 2),
295 	0x00000000,
296 	(0x9c00 << 16) | (0xac0c >> 2),
297 	0x00000000,
298 	(0x9c00 << 16) | (0xac10 >> 2),
299 	0x00000000,
300 	(0x9c00 << 16) | (0xac14 >> 2),
301 	0x00000000,
302 	(0x9c00 << 16) | (0xae00 >> 2),
303 	0x00000000,
304 	(0x9c00 << 16) | (0xac08 >> 2),
305 	0x00000000,
306 	(0x9c00 << 16) | (0x88d4 >> 2),
307 	0x00000000,
308 	(0x9c00 << 16) | (0x88c8 >> 2),
309 	0x00000000,
310 	(0x9c00 << 16) | (0x88cc >> 2),
311 	0x00000000,
312 	(0x9c00 << 16) | (0x89b0 >> 2),
313 	0x00000000,
314 	(0x9c00 << 16) | (0x8b10 >> 2),
315 	0x00000000,
316 	(0x9c00 << 16) | (0x8a14 >> 2),
317 	0x00000000,
318 	(0x9c00 << 16) | (0x9830 >> 2),
319 	0x00000000,
320 	(0x9c00 << 16) | (0x9834 >> 2),
321 	0x00000000,
322 	(0x9c00 << 16) | (0x9838 >> 2),
323 	0x00000000,
324 	(0x9c00 << 16) | (0x9a10 >> 2),
325 	0x00000000,
326 	(0x8000 << 16) | (0x9870 >> 2),
327 	0x00000000,
328 	(0x8000 << 16) | (0x9874 >> 2),
329 	0x00000000,
330 	(0x8001 << 16) | (0x9870 >> 2),
331 	0x00000000,
332 	(0x8001 << 16) | (0x9874 >> 2),
333 	0x00000000,
334 	(0x8040 << 16) | (0x9870 >> 2),
335 	0x00000000,
336 	(0x8040 << 16) | (0x9874 >> 2),
337 	0x00000000,
338 	(0x8041 << 16) | (0x9870 >> 2),
339 	0x00000000,
340 	(0x8041 << 16) | (0x9874 >> 2),
341 	0x00000000,
342 	0x00000000
343 };
344 
345 static const u32 tahiti_golden_rlc_registers[] =
346 {
347 	0xc424, 0xffffffff, 0x00601005,
348 	0xc47c, 0xffffffff, 0x10104040,
349 	0xc488, 0xffffffff, 0x0100000a,
350 	0xc314, 0xffffffff, 0x00000800,
351 	0xc30c, 0xffffffff, 0x800000f4,
352 	0xf4a8, 0xffffffff, 0x00000000
353 };
354 
355 static const u32 tahiti_golden_registers[] =
356 {
357 	0x9a10, 0x00010000, 0x00018208,
358 	0x9830, 0xffffffff, 0x00000000,
359 	0x9834, 0xf00fffff, 0x00000400,
360 	0x9838, 0x0002021c, 0x00020200,
361 	0xc78, 0x00000080, 0x00000000,
362 	0xd030, 0x000300c0, 0x00800040,
363 	0xd830, 0x000300c0, 0x00800040,
364 	0x5bb0, 0x000000f0, 0x00000070,
365 	0x5bc0, 0x00200000, 0x50100000,
366 	0x7030, 0x31000311, 0x00000011,
367 	0x277c, 0x00000003, 0x000007ff,
368 	0x240c, 0x000007ff, 0x00000000,
369 	0x8a14, 0xf000001f, 0x00000007,
370 	0x8b24, 0xffffffff, 0x00ffffff,
371 	0x8b10, 0x0000ff0f, 0x00000000,
372 	0x28a4c, 0x07ffffff, 0x4e000000,
373 	0x28350, 0x3f3f3fff, 0x2a00126a,
374 	0x30, 0x000000ff, 0x0040,
375 	0x34, 0x00000040, 0x00004040,
376 	0x9100, 0x07ffffff, 0x03000000,
377 	0x8e88, 0x01ff1f3f, 0x00000000,
378 	0x8e84, 0x01ff1f3f, 0x00000000,
379 	0x9060, 0x0000007f, 0x00000020,
380 	0x9508, 0x00010000, 0x00010000,
381 	0xac14, 0x00000200, 0x000002fb,
382 	0xac10, 0xffffffff, 0x0000543b,
383 	0xac0c, 0xffffffff, 0xa9210876,
384 	0x88d0, 0xffffffff, 0x000fff40,
385 	0x88d4, 0x0000001f, 0x00000010,
386 	0x1410, 0x20000000, 0x20fffed8,
387 	0x15c0, 0x000c0fc0, 0x000c0400
388 };
389 
390 static const u32 tahiti_golden_registers2[] =
391 {
392 	0xc64, 0x00000001, 0x00000001
393 };
394 
395 static const u32 pitcairn_golden_rlc_registers[] =
396 {
397 	0xc424, 0xffffffff, 0x00601004,
398 	0xc47c, 0xffffffff, 0x10102020,
399 	0xc488, 0xffffffff, 0x01000020,
400 	0xc314, 0xffffffff, 0x00000800,
401 	0xc30c, 0xffffffff, 0x800000a4
402 };
403 
404 static const u32 pitcairn_golden_registers[] =
405 {
406 	0x9a10, 0x00010000, 0x00018208,
407 	0x9830, 0xffffffff, 0x00000000,
408 	0x9834, 0xf00fffff, 0x00000400,
409 	0x9838, 0x0002021c, 0x00020200,
410 	0xc78, 0x00000080, 0x00000000,
411 	0xd030, 0x000300c0, 0x00800040,
412 	0xd830, 0x000300c0, 0x00800040,
413 	0x5bb0, 0x000000f0, 0x00000070,
414 	0x5bc0, 0x00200000, 0x50100000,
415 	0x7030, 0x31000311, 0x00000011,
416 	0x2ae4, 0x00073ffe, 0x000022a2,
417 	0x240c, 0x000007ff, 0x00000000,
418 	0x8a14, 0xf000001f, 0x00000007,
419 	0x8b24, 0xffffffff, 0x00ffffff,
420 	0x8b10, 0x0000ff0f, 0x00000000,
421 	0x28a4c, 0x07ffffff, 0x4e000000,
422 	0x28350, 0x3f3f3fff, 0x2a00126a,
423 	0x30, 0x000000ff, 0x0040,
424 	0x34, 0x00000040, 0x00004040,
425 	0x9100, 0x07ffffff, 0x03000000,
426 	0x9060, 0x0000007f, 0x00000020,
427 	0x9508, 0x00010000, 0x00010000,
428 	0xac14, 0x000003ff, 0x000000f7,
429 	0xac10, 0xffffffff, 0x00000000,
430 	0xac0c, 0xffffffff, 0x32761054,
431 	0x88d4, 0x0000001f, 0x00000010,
432 	0x15c0, 0x000c0fc0, 0x000c0400
433 };
434 
435 static const u32 verde_golden_rlc_registers[] =
436 {
437 	0xc424, 0xffffffff, 0x033f1005,
438 	0xc47c, 0xffffffff, 0x10808020,
439 	0xc488, 0xffffffff, 0x00800008,
440 	0xc314, 0xffffffff, 0x00001000,
441 	0xc30c, 0xffffffff, 0x80010014
442 };
443 
444 static const u32 verde_golden_registers[] =
445 {
446 	0x9a10, 0x00010000, 0x00018208,
447 	0x9830, 0xffffffff, 0x00000000,
448 	0x9834, 0xf00fffff, 0x00000400,
449 	0x9838, 0x0002021c, 0x00020200,
450 	0xc78, 0x00000080, 0x00000000,
451 	0xd030, 0x000300c0, 0x00800040,
452 	0xd030, 0x000300c0, 0x00800040,
453 	0xd830, 0x000300c0, 0x00800040,
454 	0xd830, 0x000300c0, 0x00800040,
455 	0x5bb0, 0x000000f0, 0x00000070,
456 	0x5bc0, 0x00200000, 0x50100000,
457 	0x7030, 0x31000311, 0x00000011,
458 	0x2ae4, 0x00073ffe, 0x000022a2,
459 	0x2ae4, 0x00073ffe, 0x000022a2,
460 	0x2ae4, 0x00073ffe, 0x000022a2,
461 	0x240c, 0x000007ff, 0x00000000,
462 	0x240c, 0x000007ff, 0x00000000,
463 	0x240c, 0x000007ff, 0x00000000,
464 	0x8a14, 0xf000001f, 0x00000007,
465 	0x8a14, 0xf000001f, 0x00000007,
466 	0x8a14, 0xf000001f, 0x00000007,
467 	0x8b24, 0xffffffff, 0x00ffffff,
468 	0x8b10, 0x0000ff0f, 0x00000000,
469 	0x28a4c, 0x07ffffff, 0x4e000000,
470 	0x28350, 0x3f3f3fff, 0x0000124a,
471 	0x28350, 0x3f3f3fff, 0x0000124a,
472 	0x28350, 0x3f3f3fff, 0x0000124a,
473 	0x30, 0x000000ff, 0x0040,
474 	0x34, 0x00000040, 0x00004040,
475 	0x9100, 0x07ffffff, 0x03000000,
476 	0x9100, 0x07ffffff, 0x03000000,
477 	0x8e88, 0x01ff1f3f, 0x00000000,
478 	0x8e88, 0x01ff1f3f, 0x00000000,
479 	0x8e88, 0x01ff1f3f, 0x00000000,
480 	0x8e84, 0x01ff1f3f, 0x00000000,
481 	0x8e84, 0x01ff1f3f, 0x00000000,
482 	0x8e84, 0x01ff1f3f, 0x00000000,
483 	0x9060, 0x0000007f, 0x00000020,
484 	0x9508, 0x00010000, 0x00010000,
485 	0xac14, 0x000003ff, 0x00000003,
486 	0xac14, 0x000003ff, 0x00000003,
487 	0xac14, 0x000003ff, 0x00000003,
488 	0xac10, 0xffffffff, 0x00000000,
489 	0xac10, 0xffffffff, 0x00000000,
490 	0xac10, 0xffffffff, 0x00000000,
491 	0xac0c, 0xffffffff, 0x00001032,
492 	0xac0c, 0xffffffff, 0x00001032,
493 	0xac0c, 0xffffffff, 0x00001032,
494 	0x88d4, 0x0000001f, 0x00000010,
495 	0x88d4, 0x0000001f, 0x00000010,
496 	0x88d4, 0x0000001f, 0x00000010,
497 	0x15c0, 0x000c0fc0, 0x000c0400
498 };
499 
500 static const u32 oland_golden_rlc_registers[] =
501 {
502 	0xc424, 0xffffffff, 0x00601005,
503 	0xc47c, 0xffffffff, 0x10104040,
504 	0xc488, 0xffffffff, 0x0100000a,
505 	0xc314, 0xffffffff, 0x00000800,
506 	0xc30c, 0xffffffff, 0x800000f4
507 };
508 
509 static const u32 oland_golden_registers[] =
510 {
511 	0x9a10, 0x00010000, 0x00018208,
512 	0x9830, 0xffffffff, 0x00000000,
513 	0x9834, 0xf00fffff, 0x00000400,
514 	0x9838, 0x0002021c, 0x00020200,
515 	0xc78, 0x00000080, 0x00000000,
516 	0xd030, 0x000300c0, 0x00800040,
517 	0xd830, 0x000300c0, 0x00800040,
518 	0x5bb0, 0x000000f0, 0x00000070,
519 	0x5bc0, 0x00200000, 0x50100000,
520 	0x7030, 0x31000311, 0x00000011,
521 	0x2ae4, 0x00073ffe, 0x000022a2,
522 	0x240c, 0x000007ff, 0x00000000,
523 	0x8a14, 0xf000001f, 0x00000007,
524 	0x8b24, 0xffffffff, 0x00ffffff,
525 	0x8b10, 0x0000ff0f, 0x00000000,
526 	0x28a4c, 0x07ffffff, 0x4e000000,
527 	0x28350, 0x3f3f3fff, 0x00000082,
528 	0x30, 0x000000ff, 0x0040,
529 	0x34, 0x00000040, 0x00004040,
530 	0x9100, 0x07ffffff, 0x03000000,
531 	0x9060, 0x0000007f, 0x00000020,
532 	0x9508, 0x00010000, 0x00010000,
533 	0xac14, 0x000003ff, 0x000000f3,
534 	0xac10, 0xffffffff, 0x00000000,
535 	0xac0c, 0xffffffff, 0x00003210,
536 	0x88d4, 0x0000001f, 0x00000010,
537 	0x15c0, 0x000c0fc0, 0x000c0400
538 };
539 
540 static const u32 hainan_golden_registers[] =
541 {
542 	0x9a10, 0x00010000, 0x00018208,
543 	0x9830, 0xffffffff, 0x00000000,
544 	0x9834, 0xf00fffff, 0x00000400,
545 	0x9838, 0x0002021c, 0x00020200,
546 	0xd0c0, 0xff000fff, 0x00000100,
547 	0xd030, 0x000300c0, 0x00800040,
548 	0xd8c0, 0xff000fff, 0x00000100,
549 	0xd830, 0x000300c0, 0x00800040,
550 	0x2ae4, 0x00073ffe, 0x000022a2,
551 	0x240c, 0x000007ff, 0x00000000,
552 	0x8a14, 0xf000001f, 0x00000007,
553 	0x8b24, 0xffffffff, 0x00ffffff,
554 	0x8b10, 0x0000ff0f, 0x00000000,
555 	0x28a4c, 0x07ffffff, 0x4e000000,
556 	0x28350, 0x3f3f3fff, 0x00000000,
557 	0x30, 0x000000ff, 0x0040,
558 	0x34, 0x00000040, 0x00004040,
559 	0x9100, 0x03e00000, 0x03600000,
560 	0x9060, 0x0000007f, 0x00000020,
561 	0x9508, 0x00010000, 0x00010000,
562 	0xac14, 0x000003ff, 0x000000f1,
563 	0xac10, 0xffffffff, 0x00000000,
564 	0xac0c, 0xffffffff, 0x00003210,
565 	0x88d4, 0x0000001f, 0x00000010,
566 	0x15c0, 0x000c0fc0, 0x000c0400
567 };
568 
569 static const u32 hainan_golden_registers2[] =
570 {
571 	0x98f8, 0xffffffff, 0x02010001
572 };
573 
574 static const u32 tahiti_mgcg_cgcg_init[] =
575 {
576 	0xc400, 0xffffffff, 0xfffffffc,
577 	0x802c, 0xffffffff, 0xe0000000,
578 	0x9a60, 0xffffffff, 0x00000100,
579 	0x92a4, 0xffffffff, 0x00000100,
580 	0xc164, 0xffffffff, 0x00000100,
581 	0x9774, 0xffffffff, 0x00000100,
582 	0x8984, 0xffffffff, 0x06000100,
583 	0x8a18, 0xffffffff, 0x00000100,
584 	0x92a0, 0xffffffff, 0x00000100,
585 	0xc380, 0xffffffff, 0x00000100,
586 	0x8b28, 0xffffffff, 0x00000100,
587 	0x9144, 0xffffffff, 0x00000100,
588 	0x8d88, 0xffffffff, 0x00000100,
589 	0x8d8c, 0xffffffff, 0x00000100,
590 	0x9030, 0xffffffff, 0x00000100,
591 	0x9034, 0xffffffff, 0x00000100,
592 	0x9038, 0xffffffff, 0x00000100,
593 	0x903c, 0xffffffff, 0x00000100,
594 	0xad80, 0xffffffff, 0x00000100,
595 	0xac54, 0xffffffff, 0x00000100,
596 	0x897c, 0xffffffff, 0x06000100,
597 	0x9868, 0xffffffff, 0x00000100,
598 	0x9510, 0xffffffff, 0x00000100,
599 	0xaf04, 0xffffffff, 0x00000100,
600 	0xae04, 0xffffffff, 0x00000100,
601 	0x949c, 0xffffffff, 0x00000100,
602 	0x802c, 0xffffffff, 0xe0000000,
603 	0x9160, 0xffffffff, 0x00010000,
604 	0x9164, 0xffffffff, 0x00030002,
605 	0x9168, 0xffffffff, 0x00040007,
606 	0x916c, 0xffffffff, 0x00060005,
607 	0x9170, 0xffffffff, 0x00090008,
608 	0x9174, 0xffffffff, 0x00020001,
609 	0x9178, 0xffffffff, 0x00040003,
610 	0x917c, 0xffffffff, 0x00000007,
611 	0x9180, 0xffffffff, 0x00060005,
612 	0x9184, 0xffffffff, 0x00090008,
613 	0x9188, 0xffffffff, 0x00030002,
614 	0x918c, 0xffffffff, 0x00050004,
615 	0x9190, 0xffffffff, 0x00000008,
616 	0x9194, 0xffffffff, 0x00070006,
617 	0x9198, 0xffffffff, 0x000a0009,
618 	0x919c, 0xffffffff, 0x00040003,
619 	0x91a0, 0xffffffff, 0x00060005,
620 	0x91a4, 0xffffffff, 0x00000009,
621 	0x91a8, 0xffffffff, 0x00080007,
622 	0x91ac, 0xffffffff, 0x000b000a,
623 	0x91b0, 0xffffffff, 0x00050004,
624 	0x91b4, 0xffffffff, 0x00070006,
625 	0x91b8, 0xffffffff, 0x0008000b,
626 	0x91bc, 0xffffffff, 0x000a0009,
627 	0x91c0, 0xffffffff, 0x000d000c,
628 	0x91c4, 0xffffffff, 0x00060005,
629 	0x91c8, 0xffffffff, 0x00080007,
630 	0x91cc, 0xffffffff, 0x0000000b,
631 	0x91d0, 0xffffffff, 0x000a0009,
632 	0x91d4, 0xffffffff, 0x000d000c,
633 	0x91d8, 0xffffffff, 0x00070006,
634 	0x91dc, 0xffffffff, 0x00090008,
635 	0x91e0, 0xffffffff, 0x0000000c,
636 	0x91e4, 0xffffffff, 0x000b000a,
637 	0x91e8, 0xffffffff, 0x000e000d,
638 	0x91ec, 0xffffffff, 0x00080007,
639 	0x91f0, 0xffffffff, 0x000a0009,
640 	0x91f4, 0xffffffff, 0x0000000d,
641 	0x91f8, 0xffffffff, 0x000c000b,
642 	0x91fc, 0xffffffff, 0x000f000e,
643 	0x9200, 0xffffffff, 0x00090008,
644 	0x9204, 0xffffffff, 0x000b000a,
645 	0x9208, 0xffffffff, 0x000c000f,
646 	0x920c, 0xffffffff, 0x000e000d,
647 	0x9210, 0xffffffff, 0x00110010,
648 	0x9214, 0xffffffff, 0x000a0009,
649 	0x9218, 0xffffffff, 0x000c000b,
650 	0x921c, 0xffffffff, 0x0000000f,
651 	0x9220, 0xffffffff, 0x000e000d,
652 	0x9224, 0xffffffff, 0x00110010,
653 	0x9228, 0xffffffff, 0x000b000a,
654 	0x922c, 0xffffffff, 0x000d000c,
655 	0x9230, 0xffffffff, 0x00000010,
656 	0x9234, 0xffffffff, 0x000f000e,
657 	0x9238, 0xffffffff, 0x00120011,
658 	0x923c, 0xffffffff, 0x000c000b,
659 	0x9240, 0xffffffff, 0x000e000d,
660 	0x9244, 0xffffffff, 0x00000011,
661 	0x9248, 0xffffffff, 0x0010000f,
662 	0x924c, 0xffffffff, 0x00130012,
663 	0x9250, 0xffffffff, 0x000d000c,
664 	0x9254, 0xffffffff, 0x000f000e,
665 	0x9258, 0xffffffff, 0x00100013,
666 	0x925c, 0xffffffff, 0x00120011,
667 	0x9260, 0xffffffff, 0x00150014,
668 	0x9264, 0xffffffff, 0x000e000d,
669 	0x9268, 0xffffffff, 0x0010000f,
670 	0x926c, 0xffffffff, 0x00000013,
671 	0x9270, 0xffffffff, 0x00120011,
672 	0x9274, 0xffffffff, 0x00150014,
673 	0x9278, 0xffffffff, 0x000f000e,
674 	0x927c, 0xffffffff, 0x00110010,
675 	0x9280, 0xffffffff, 0x00000014,
676 	0x9284, 0xffffffff, 0x00130012,
677 	0x9288, 0xffffffff, 0x00160015,
678 	0x928c, 0xffffffff, 0x0010000f,
679 	0x9290, 0xffffffff, 0x00120011,
680 	0x9294, 0xffffffff, 0x00000015,
681 	0x9298, 0xffffffff, 0x00140013,
682 	0x929c, 0xffffffff, 0x00170016,
683 	0x9150, 0xffffffff, 0x96940200,
684 	0x8708, 0xffffffff, 0x00900100,
685 	0xc478, 0xffffffff, 0x00000080,
686 	0xc404, 0xffffffff, 0x0020003f,
687 	0x30, 0xffffffff, 0x0000001c,
688 	0x34, 0x000f0000, 0x000f0000,
689 	0x160c, 0xffffffff, 0x00000100,
690 	0x1024, 0xffffffff, 0x00000100,
691 	0x102c, 0x00000101, 0x00000000,
692 	0x20a8, 0xffffffff, 0x00000104,
693 	0x264c, 0x000c0000, 0x000c0000,
694 	0x2648, 0x000c0000, 0x000c0000,
695 	0x55e4, 0xff000fff, 0x00000100,
696 	0x55e8, 0x00000001, 0x00000001,
697 	0x2f50, 0x00000001, 0x00000001,
698 	0x30cc, 0xc0000fff, 0x00000104,
699 	0xc1e4, 0x00000001, 0x00000001,
700 	0xd0c0, 0xfffffff0, 0x00000100,
701 	0xd8c0, 0xfffffff0, 0x00000100
702 };
703 
704 static const u32 pitcairn_mgcg_cgcg_init[] =
705 {
706 	0xc400, 0xffffffff, 0xfffffffc,
707 	0x802c, 0xffffffff, 0xe0000000,
708 	0x9a60, 0xffffffff, 0x00000100,
709 	0x92a4, 0xffffffff, 0x00000100,
710 	0xc164, 0xffffffff, 0x00000100,
711 	0x9774, 0xffffffff, 0x00000100,
712 	0x8984, 0xffffffff, 0x06000100,
713 	0x8a18, 0xffffffff, 0x00000100,
714 	0x92a0, 0xffffffff, 0x00000100,
715 	0xc380, 0xffffffff, 0x00000100,
716 	0x8b28, 0xffffffff, 0x00000100,
717 	0x9144, 0xffffffff, 0x00000100,
718 	0x8d88, 0xffffffff, 0x00000100,
719 	0x8d8c, 0xffffffff, 0x00000100,
720 	0x9030, 0xffffffff, 0x00000100,
721 	0x9034, 0xffffffff, 0x00000100,
722 	0x9038, 0xffffffff, 0x00000100,
723 	0x903c, 0xffffffff, 0x00000100,
724 	0xad80, 0xffffffff, 0x00000100,
725 	0xac54, 0xffffffff, 0x00000100,
726 	0x897c, 0xffffffff, 0x06000100,
727 	0x9868, 0xffffffff, 0x00000100,
728 	0x9510, 0xffffffff, 0x00000100,
729 	0xaf04, 0xffffffff, 0x00000100,
730 	0xae04, 0xffffffff, 0x00000100,
731 	0x949c, 0xffffffff, 0x00000100,
732 	0x802c, 0xffffffff, 0xe0000000,
733 	0x9160, 0xffffffff, 0x00010000,
734 	0x9164, 0xffffffff, 0x00030002,
735 	0x9168, 0xffffffff, 0x00040007,
736 	0x916c, 0xffffffff, 0x00060005,
737 	0x9170, 0xffffffff, 0x00090008,
738 	0x9174, 0xffffffff, 0x00020001,
739 	0x9178, 0xffffffff, 0x00040003,
740 	0x917c, 0xffffffff, 0x00000007,
741 	0x9180, 0xffffffff, 0x00060005,
742 	0x9184, 0xffffffff, 0x00090008,
743 	0x9188, 0xffffffff, 0x00030002,
744 	0x918c, 0xffffffff, 0x00050004,
745 	0x9190, 0xffffffff, 0x00000008,
746 	0x9194, 0xffffffff, 0x00070006,
747 	0x9198, 0xffffffff, 0x000a0009,
748 	0x919c, 0xffffffff, 0x00040003,
749 	0x91a0, 0xffffffff, 0x00060005,
750 	0x91a4, 0xffffffff, 0x00000009,
751 	0x91a8, 0xffffffff, 0x00080007,
752 	0x91ac, 0xffffffff, 0x000b000a,
753 	0x91b0, 0xffffffff, 0x00050004,
754 	0x91b4, 0xffffffff, 0x00070006,
755 	0x91b8, 0xffffffff, 0x0008000b,
756 	0x91bc, 0xffffffff, 0x000a0009,
757 	0x91c0, 0xffffffff, 0x000d000c,
758 	0x9200, 0xffffffff, 0x00090008,
759 	0x9204, 0xffffffff, 0x000b000a,
760 	0x9208, 0xffffffff, 0x000c000f,
761 	0x920c, 0xffffffff, 0x000e000d,
762 	0x9210, 0xffffffff, 0x00110010,
763 	0x9214, 0xffffffff, 0x000a0009,
764 	0x9218, 0xffffffff, 0x000c000b,
765 	0x921c, 0xffffffff, 0x0000000f,
766 	0x9220, 0xffffffff, 0x000e000d,
767 	0x9224, 0xffffffff, 0x00110010,
768 	0x9228, 0xffffffff, 0x000b000a,
769 	0x922c, 0xffffffff, 0x000d000c,
770 	0x9230, 0xffffffff, 0x00000010,
771 	0x9234, 0xffffffff, 0x000f000e,
772 	0x9238, 0xffffffff, 0x00120011,
773 	0x923c, 0xffffffff, 0x000c000b,
774 	0x9240, 0xffffffff, 0x000e000d,
775 	0x9244, 0xffffffff, 0x00000011,
776 	0x9248, 0xffffffff, 0x0010000f,
777 	0x924c, 0xffffffff, 0x00130012,
778 	0x9250, 0xffffffff, 0x000d000c,
779 	0x9254, 0xffffffff, 0x000f000e,
780 	0x9258, 0xffffffff, 0x00100013,
781 	0x925c, 0xffffffff, 0x00120011,
782 	0x9260, 0xffffffff, 0x00150014,
783 	0x9150, 0xffffffff, 0x96940200,
784 	0x8708, 0xffffffff, 0x00900100,
785 	0xc478, 0xffffffff, 0x00000080,
786 	0xc404, 0xffffffff, 0x0020003f,
787 	0x30, 0xffffffff, 0x0000001c,
788 	0x34, 0x000f0000, 0x000f0000,
789 	0x160c, 0xffffffff, 0x00000100,
790 	0x1024, 0xffffffff, 0x00000100,
791 	0x102c, 0x00000101, 0x00000000,
792 	0x20a8, 0xffffffff, 0x00000104,
793 	0x55e4, 0xff000fff, 0x00000100,
794 	0x55e8, 0x00000001, 0x00000001,
795 	0x2f50, 0x00000001, 0x00000001,
796 	0x30cc, 0xc0000fff, 0x00000104,
797 	0xc1e4, 0x00000001, 0x00000001,
798 	0xd0c0, 0xfffffff0, 0x00000100,
799 	0xd8c0, 0xfffffff0, 0x00000100
800 };
801 
802 static const u32 verde_mgcg_cgcg_init[] =
803 {
804 	0xc400, 0xffffffff, 0xfffffffc,
805 	0x802c, 0xffffffff, 0xe0000000,
806 	0x9a60, 0xffffffff, 0x00000100,
807 	0x92a4, 0xffffffff, 0x00000100,
808 	0xc164, 0xffffffff, 0x00000100,
809 	0x9774, 0xffffffff, 0x00000100,
810 	0x8984, 0xffffffff, 0x06000100,
811 	0x8a18, 0xffffffff, 0x00000100,
812 	0x92a0, 0xffffffff, 0x00000100,
813 	0xc380, 0xffffffff, 0x00000100,
814 	0x8b28, 0xffffffff, 0x00000100,
815 	0x9144, 0xffffffff, 0x00000100,
816 	0x8d88, 0xffffffff, 0x00000100,
817 	0x8d8c, 0xffffffff, 0x00000100,
818 	0x9030, 0xffffffff, 0x00000100,
819 	0x9034, 0xffffffff, 0x00000100,
820 	0x9038, 0xffffffff, 0x00000100,
821 	0x903c, 0xffffffff, 0x00000100,
822 	0xad80, 0xffffffff, 0x00000100,
823 	0xac54, 0xffffffff, 0x00000100,
824 	0x897c, 0xffffffff, 0x06000100,
825 	0x9868, 0xffffffff, 0x00000100,
826 	0x9510, 0xffffffff, 0x00000100,
827 	0xaf04, 0xffffffff, 0x00000100,
828 	0xae04, 0xffffffff, 0x00000100,
829 	0x949c, 0xffffffff, 0x00000100,
830 	0x802c, 0xffffffff, 0xe0000000,
831 	0x9160, 0xffffffff, 0x00010000,
832 	0x9164, 0xffffffff, 0x00030002,
833 	0x9168, 0xffffffff, 0x00040007,
834 	0x916c, 0xffffffff, 0x00060005,
835 	0x9170, 0xffffffff, 0x00090008,
836 	0x9174, 0xffffffff, 0x00020001,
837 	0x9178, 0xffffffff, 0x00040003,
838 	0x917c, 0xffffffff, 0x00000007,
839 	0x9180, 0xffffffff, 0x00060005,
840 	0x9184, 0xffffffff, 0x00090008,
841 	0x9188, 0xffffffff, 0x00030002,
842 	0x918c, 0xffffffff, 0x00050004,
843 	0x9190, 0xffffffff, 0x00000008,
844 	0x9194, 0xffffffff, 0x00070006,
845 	0x9198, 0xffffffff, 0x000a0009,
846 	0x919c, 0xffffffff, 0x00040003,
847 	0x91a0, 0xffffffff, 0x00060005,
848 	0x91a4, 0xffffffff, 0x00000009,
849 	0x91a8, 0xffffffff, 0x00080007,
850 	0x91ac, 0xffffffff, 0x000b000a,
851 	0x91b0, 0xffffffff, 0x00050004,
852 	0x91b4, 0xffffffff, 0x00070006,
853 	0x91b8, 0xffffffff, 0x0008000b,
854 	0x91bc, 0xffffffff, 0x000a0009,
855 	0x91c0, 0xffffffff, 0x000d000c,
856 	0x9200, 0xffffffff, 0x00090008,
857 	0x9204, 0xffffffff, 0x000b000a,
858 	0x9208, 0xffffffff, 0x000c000f,
859 	0x920c, 0xffffffff, 0x000e000d,
860 	0x9210, 0xffffffff, 0x00110010,
861 	0x9214, 0xffffffff, 0x000a0009,
862 	0x9218, 0xffffffff, 0x000c000b,
863 	0x921c, 0xffffffff, 0x0000000f,
864 	0x9220, 0xffffffff, 0x000e000d,
865 	0x9224, 0xffffffff, 0x00110010,
866 	0x9228, 0xffffffff, 0x000b000a,
867 	0x922c, 0xffffffff, 0x000d000c,
868 	0x9230, 0xffffffff, 0x00000010,
869 	0x9234, 0xffffffff, 0x000f000e,
870 	0x9238, 0xffffffff, 0x00120011,
871 	0x923c, 0xffffffff, 0x000c000b,
872 	0x9240, 0xffffffff, 0x000e000d,
873 	0x9244, 0xffffffff, 0x00000011,
874 	0x9248, 0xffffffff, 0x0010000f,
875 	0x924c, 0xffffffff, 0x00130012,
876 	0x9250, 0xffffffff, 0x000d000c,
877 	0x9254, 0xffffffff, 0x000f000e,
878 	0x9258, 0xffffffff, 0x00100013,
879 	0x925c, 0xffffffff, 0x00120011,
880 	0x9260, 0xffffffff, 0x00150014,
881 	0x9150, 0xffffffff, 0x96940200,
882 	0x8708, 0xffffffff, 0x00900100,
883 	0xc478, 0xffffffff, 0x00000080,
884 	0xc404, 0xffffffff, 0x0020003f,
885 	0x30, 0xffffffff, 0x0000001c,
886 	0x34, 0x000f0000, 0x000f0000,
887 	0x160c, 0xffffffff, 0x00000100,
888 	0x1024, 0xffffffff, 0x00000100,
889 	0x102c, 0x00000101, 0x00000000,
890 	0x20a8, 0xffffffff, 0x00000104,
891 	0x264c, 0x000c0000, 0x000c0000,
892 	0x2648, 0x000c0000, 0x000c0000,
893 	0x55e4, 0xff000fff, 0x00000100,
894 	0x55e8, 0x00000001, 0x00000001,
895 	0x2f50, 0x00000001, 0x00000001,
896 	0x30cc, 0xc0000fff, 0x00000104,
897 	0xc1e4, 0x00000001, 0x00000001,
898 	0xd0c0, 0xfffffff0, 0x00000100,
899 	0xd8c0, 0xfffffff0, 0x00000100
900 };
901 
902 static const u32 oland_mgcg_cgcg_init[] =
903 {
904 	0xc400, 0xffffffff, 0xfffffffc,
905 	0x802c, 0xffffffff, 0xe0000000,
906 	0x9a60, 0xffffffff, 0x00000100,
907 	0x92a4, 0xffffffff, 0x00000100,
908 	0xc164, 0xffffffff, 0x00000100,
909 	0x9774, 0xffffffff, 0x00000100,
910 	0x8984, 0xffffffff, 0x06000100,
911 	0x8a18, 0xffffffff, 0x00000100,
912 	0x92a0, 0xffffffff, 0x00000100,
913 	0xc380, 0xffffffff, 0x00000100,
914 	0x8b28, 0xffffffff, 0x00000100,
915 	0x9144, 0xffffffff, 0x00000100,
916 	0x8d88, 0xffffffff, 0x00000100,
917 	0x8d8c, 0xffffffff, 0x00000100,
918 	0x9030, 0xffffffff, 0x00000100,
919 	0x9034, 0xffffffff, 0x00000100,
920 	0x9038, 0xffffffff, 0x00000100,
921 	0x903c, 0xffffffff, 0x00000100,
922 	0xad80, 0xffffffff, 0x00000100,
923 	0xac54, 0xffffffff, 0x00000100,
924 	0x897c, 0xffffffff, 0x06000100,
925 	0x9868, 0xffffffff, 0x00000100,
926 	0x9510, 0xffffffff, 0x00000100,
927 	0xaf04, 0xffffffff, 0x00000100,
928 	0xae04, 0xffffffff, 0x00000100,
929 	0x949c, 0xffffffff, 0x00000100,
930 	0x802c, 0xffffffff, 0xe0000000,
931 	0x9160, 0xffffffff, 0x00010000,
932 	0x9164, 0xffffffff, 0x00030002,
933 	0x9168, 0xffffffff, 0x00040007,
934 	0x916c, 0xffffffff, 0x00060005,
935 	0x9170, 0xffffffff, 0x00090008,
936 	0x9174, 0xffffffff, 0x00020001,
937 	0x9178, 0xffffffff, 0x00040003,
938 	0x917c, 0xffffffff, 0x00000007,
939 	0x9180, 0xffffffff, 0x00060005,
940 	0x9184, 0xffffffff, 0x00090008,
941 	0x9188, 0xffffffff, 0x00030002,
942 	0x918c, 0xffffffff, 0x00050004,
943 	0x9190, 0xffffffff, 0x00000008,
944 	0x9194, 0xffffffff, 0x00070006,
945 	0x9198, 0xffffffff, 0x000a0009,
946 	0x919c, 0xffffffff, 0x00040003,
947 	0x91a0, 0xffffffff, 0x00060005,
948 	0x91a4, 0xffffffff, 0x00000009,
949 	0x91a8, 0xffffffff, 0x00080007,
950 	0x91ac, 0xffffffff, 0x000b000a,
951 	0x91b0, 0xffffffff, 0x00050004,
952 	0x91b4, 0xffffffff, 0x00070006,
953 	0x91b8, 0xffffffff, 0x0008000b,
954 	0x91bc, 0xffffffff, 0x000a0009,
955 	0x91c0, 0xffffffff, 0x000d000c,
956 	0x91c4, 0xffffffff, 0x00060005,
957 	0x91c8, 0xffffffff, 0x00080007,
958 	0x91cc, 0xffffffff, 0x0000000b,
959 	0x91d0, 0xffffffff, 0x000a0009,
960 	0x91d4, 0xffffffff, 0x000d000c,
961 	0x9150, 0xffffffff, 0x96940200,
962 	0x8708, 0xffffffff, 0x00900100,
963 	0xc478, 0xffffffff, 0x00000080,
964 	0xc404, 0xffffffff, 0x0020003f,
965 	0x30, 0xffffffff, 0x0000001c,
966 	0x34, 0x000f0000, 0x000f0000,
967 	0x160c, 0xffffffff, 0x00000100,
968 	0x1024, 0xffffffff, 0x00000100,
969 	0x102c, 0x00000101, 0x00000000,
970 	0x20a8, 0xffffffff, 0x00000104,
971 	0x264c, 0x000c0000, 0x000c0000,
972 	0x2648, 0x000c0000, 0x000c0000,
973 	0x55e4, 0xff000fff, 0x00000100,
974 	0x55e8, 0x00000001, 0x00000001,
975 	0x2f50, 0x00000001, 0x00000001,
976 	0x30cc, 0xc0000fff, 0x00000104,
977 	0xc1e4, 0x00000001, 0x00000001,
978 	0xd0c0, 0xfffffff0, 0x00000100,
979 	0xd8c0, 0xfffffff0, 0x00000100
980 };
981 
982 static const u32 hainan_mgcg_cgcg_init[] =
983 {
984 	0xc400, 0xffffffff, 0xfffffffc,
985 	0x802c, 0xffffffff, 0xe0000000,
986 	0x9a60, 0xffffffff, 0x00000100,
987 	0x92a4, 0xffffffff, 0x00000100,
988 	0xc164, 0xffffffff, 0x00000100,
989 	0x9774, 0xffffffff, 0x00000100,
990 	0x8984, 0xffffffff, 0x06000100,
991 	0x8a18, 0xffffffff, 0x00000100,
992 	0x92a0, 0xffffffff, 0x00000100,
993 	0xc380, 0xffffffff, 0x00000100,
994 	0x8b28, 0xffffffff, 0x00000100,
995 	0x9144, 0xffffffff, 0x00000100,
996 	0x8d88, 0xffffffff, 0x00000100,
997 	0x8d8c, 0xffffffff, 0x00000100,
998 	0x9030, 0xffffffff, 0x00000100,
999 	0x9034, 0xffffffff, 0x00000100,
1000 	0x9038, 0xffffffff, 0x00000100,
1001 	0x903c, 0xffffffff, 0x00000100,
1002 	0xad80, 0xffffffff, 0x00000100,
1003 	0xac54, 0xffffffff, 0x00000100,
1004 	0x897c, 0xffffffff, 0x06000100,
1005 	0x9868, 0xffffffff, 0x00000100,
1006 	0x9510, 0xffffffff, 0x00000100,
1007 	0xaf04, 0xffffffff, 0x00000100,
1008 	0xae04, 0xffffffff, 0x00000100,
1009 	0x949c, 0xffffffff, 0x00000100,
1010 	0x802c, 0xffffffff, 0xe0000000,
1011 	0x9160, 0xffffffff, 0x00010000,
1012 	0x9164, 0xffffffff, 0x00030002,
1013 	0x9168, 0xffffffff, 0x00040007,
1014 	0x916c, 0xffffffff, 0x00060005,
1015 	0x9170, 0xffffffff, 0x00090008,
1016 	0x9174, 0xffffffff, 0x00020001,
1017 	0x9178, 0xffffffff, 0x00040003,
1018 	0x917c, 0xffffffff, 0x00000007,
1019 	0x9180, 0xffffffff, 0x00060005,
1020 	0x9184, 0xffffffff, 0x00090008,
1021 	0x9188, 0xffffffff, 0x00030002,
1022 	0x918c, 0xffffffff, 0x00050004,
1023 	0x9190, 0xffffffff, 0x00000008,
1024 	0x9194, 0xffffffff, 0x00070006,
1025 	0x9198, 0xffffffff, 0x000a0009,
1026 	0x919c, 0xffffffff, 0x00040003,
1027 	0x91a0, 0xffffffff, 0x00060005,
1028 	0x91a4, 0xffffffff, 0x00000009,
1029 	0x91a8, 0xffffffff, 0x00080007,
1030 	0x91ac, 0xffffffff, 0x000b000a,
1031 	0x91b0, 0xffffffff, 0x00050004,
1032 	0x91b4, 0xffffffff, 0x00070006,
1033 	0x91b8, 0xffffffff, 0x0008000b,
1034 	0x91bc, 0xffffffff, 0x000a0009,
1035 	0x91c0, 0xffffffff, 0x000d000c,
1036 	0x91c4, 0xffffffff, 0x00060005,
1037 	0x91c8, 0xffffffff, 0x00080007,
1038 	0x91cc, 0xffffffff, 0x0000000b,
1039 	0x91d0, 0xffffffff, 0x000a0009,
1040 	0x91d4, 0xffffffff, 0x000d000c,
1041 	0x9150, 0xffffffff, 0x96940200,
1042 	0x8708, 0xffffffff, 0x00900100,
1043 	0xc478, 0xffffffff, 0x00000080,
1044 	0xc404, 0xffffffff, 0x0020003f,
1045 	0x30, 0xffffffff, 0x0000001c,
1046 	0x34, 0x000f0000, 0x000f0000,
1047 	0x160c, 0xffffffff, 0x00000100,
1048 	0x1024, 0xffffffff, 0x00000100,
1049 	0x20a8, 0xffffffff, 0x00000104,
1050 	0x264c, 0x000c0000, 0x000c0000,
1051 	0x2648, 0x000c0000, 0x000c0000,
1052 	0x2f50, 0x00000001, 0x00000001,
1053 	0x30cc, 0xc0000fff, 0x00000104,
1054 	0xc1e4, 0x00000001, 0x00000001,
1055 	0xd0c0, 0xfffffff0, 0x00000100,
1056 	0xd8c0, 0xfffffff0, 0x00000100
1057 };
1058 
1059 static u32 verde_pg_init[] =
1060 {
1061 	0x353c, 0xffffffff, 0x40000,
1062 	0x3538, 0xffffffff, 0x200010ff,
1063 	0x353c, 0xffffffff, 0x0,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x353c, 0xffffffff, 0x0,
1066 	0x353c, 0xffffffff, 0x0,
1067 	0x353c, 0xffffffff, 0x0,
1068 	0x353c, 0xffffffff, 0x7007,
1069 	0x3538, 0xffffffff, 0x300010ff,
1070 	0x353c, 0xffffffff, 0x0,
1071 	0x353c, 0xffffffff, 0x0,
1072 	0x353c, 0xffffffff, 0x0,
1073 	0x353c, 0xffffffff, 0x0,
1074 	0x353c, 0xffffffff, 0x0,
1075 	0x353c, 0xffffffff, 0x400000,
1076 	0x3538, 0xffffffff, 0x100010ff,
1077 	0x353c, 0xffffffff, 0x0,
1078 	0x353c, 0xffffffff, 0x0,
1079 	0x353c, 0xffffffff, 0x0,
1080 	0x353c, 0xffffffff, 0x0,
1081 	0x353c, 0xffffffff, 0x0,
1082 	0x353c, 0xffffffff, 0x120200,
1083 	0x3538, 0xffffffff, 0x500010ff,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x0,
1087 	0x353c, 0xffffffff, 0x0,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x1e1e16,
1090 	0x3538, 0xffffffff, 0x600010ff,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x0,
1094 	0x353c, 0xffffffff, 0x0,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x171f1e,
1097 	0x3538, 0xffffffff, 0x700010ff,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x0,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x3538, 0xffffffff, 0x9ff,
1105 	0x3500, 0xffffffff, 0x0,
1106 	0x3504, 0xffffffff, 0x10000800,
1107 	0x3504, 0xffffffff, 0xf,
1108 	0x3504, 0xffffffff, 0xf,
1109 	0x3500, 0xffffffff, 0x4,
1110 	0x3504, 0xffffffff, 0x1000051e,
1111 	0x3504, 0xffffffff, 0xffff,
1112 	0x3504, 0xffffffff, 0xffff,
1113 	0x3500, 0xffffffff, 0x8,
1114 	0x3504, 0xffffffff, 0x80500,
1115 	0x3500, 0xffffffff, 0x12,
1116 	0x3504, 0xffffffff, 0x9050c,
1117 	0x3500, 0xffffffff, 0x1d,
1118 	0x3504, 0xffffffff, 0xb052c,
1119 	0x3500, 0xffffffff, 0x2a,
1120 	0x3504, 0xffffffff, 0x1053e,
1121 	0x3500, 0xffffffff, 0x2d,
1122 	0x3504, 0xffffffff, 0x10546,
1123 	0x3500, 0xffffffff, 0x30,
1124 	0x3504, 0xffffffff, 0xa054e,
1125 	0x3500, 0xffffffff, 0x3c,
1126 	0x3504, 0xffffffff, 0x1055f,
1127 	0x3500, 0xffffffff, 0x3f,
1128 	0x3504, 0xffffffff, 0x10567,
1129 	0x3500, 0xffffffff, 0x42,
1130 	0x3504, 0xffffffff, 0x1056f,
1131 	0x3500, 0xffffffff, 0x45,
1132 	0x3504, 0xffffffff, 0x10572,
1133 	0x3500, 0xffffffff, 0x48,
1134 	0x3504, 0xffffffff, 0x20575,
1135 	0x3500, 0xffffffff, 0x4c,
1136 	0x3504, 0xffffffff, 0x190801,
1137 	0x3500, 0xffffffff, 0x67,
1138 	0x3504, 0xffffffff, 0x1082a,
1139 	0x3500, 0xffffffff, 0x6a,
1140 	0x3504, 0xffffffff, 0x1b082d,
1141 	0x3500, 0xffffffff, 0x87,
1142 	0x3504, 0xffffffff, 0x310851,
1143 	0x3500, 0xffffffff, 0xba,
1144 	0x3504, 0xffffffff, 0x891,
1145 	0x3500, 0xffffffff, 0xbc,
1146 	0x3504, 0xffffffff, 0x893,
1147 	0x3500, 0xffffffff, 0xbe,
1148 	0x3504, 0xffffffff, 0x20895,
1149 	0x3500, 0xffffffff, 0xc2,
1150 	0x3504, 0xffffffff, 0x20899,
1151 	0x3500, 0xffffffff, 0xc6,
1152 	0x3504, 0xffffffff, 0x2089d,
1153 	0x3500, 0xffffffff, 0xca,
1154 	0x3504, 0xffffffff, 0x8a1,
1155 	0x3500, 0xffffffff, 0xcc,
1156 	0x3504, 0xffffffff, 0x8a3,
1157 	0x3500, 0xffffffff, 0xce,
1158 	0x3504, 0xffffffff, 0x308a5,
1159 	0x3500, 0xffffffff, 0xd3,
1160 	0x3504, 0xffffffff, 0x6d08cd,
1161 	0x3500, 0xffffffff, 0x142,
1162 	0x3504, 0xffffffff, 0x2000095a,
1163 	0x3504, 0xffffffff, 0x1,
1164 	0x3500, 0xffffffff, 0x144,
1165 	0x3504, 0xffffffff, 0x301f095b,
1166 	0x3500, 0xffffffff, 0x165,
1167 	0x3504, 0xffffffff, 0xc094d,
1168 	0x3500, 0xffffffff, 0x173,
1169 	0x3504, 0xffffffff, 0xf096d,
1170 	0x3500, 0xffffffff, 0x184,
1171 	0x3504, 0xffffffff, 0x15097f,
1172 	0x3500, 0xffffffff, 0x19b,
1173 	0x3504, 0xffffffff, 0xc0998,
1174 	0x3500, 0xffffffff, 0x1a9,
1175 	0x3504, 0xffffffff, 0x409a7,
1176 	0x3500, 0xffffffff, 0x1af,
1177 	0x3504, 0xffffffff, 0xcdc,
1178 	0x3500, 0xffffffff, 0x1b1,
1179 	0x3504, 0xffffffff, 0x800,
1180 	0x3508, 0xffffffff, 0x6c9b2000,
1181 	0x3510, 0xfc00, 0x2000,
1182 	0x3544, 0xffffffff, 0xfc0,
1183 	0x28d4, 0x00000100, 0x100
1184 };
1185 
1186 static void si_init_golden_registers(struct radeon_device *rdev)
1187 {
1188 	switch (rdev->family) {
1189 	case CHIP_TAHITI:
1190 		radeon_program_register_sequence(rdev,
1191 						 tahiti_golden_registers,
1192 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1193 		radeon_program_register_sequence(rdev,
1194 						 tahiti_golden_rlc_registers,
1195 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1196 		radeon_program_register_sequence(rdev,
1197 						 tahiti_mgcg_cgcg_init,
1198 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1199 		radeon_program_register_sequence(rdev,
1200 						 tahiti_golden_registers2,
1201 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1202 		break;
1203 	case CHIP_PITCAIRN:
1204 		radeon_program_register_sequence(rdev,
1205 						 pitcairn_golden_registers,
1206 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1207 		radeon_program_register_sequence(rdev,
1208 						 pitcairn_golden_rlc_registers,
1209 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1210 		radeon_program_register_sequence(rdev,
1211 						 pitcairn_mgcg_cgcg_init,
1212 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1213 		break;
1214 	case CHIP_VERDE:
1215 		radeon_program_register_sequence(rdev,
1216 						 verde_golden_registers,
1217 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1218 		radeon_program_register_sequence(rdev,
1219 						 verde_golden_rlc_registers,
1220 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1221 		radeon_program_register_sequence(rdev,
1222 						 verde_mgcg_cgcg_init,
1223 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1224 		radeon_program_register_sequence(rdev,
1225 						 verde_pg_init,
1226 						 (const u32)ARRAY_SIZE(verde_pg_init));
1227 		break;
1228 	case CHIP_OLAND:
1229 		radeon_program_register_sequence(rdev,
1230 						 oland_golden_registers,
1231 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1232 		radeon_program_register_sequence(rdev,
1233 						 oland_golden_rlc_registers,
1234 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1235 		radeon_program_register_sequence(rdev,
1236 						 oland_mgcg_cgcg_init,
1237 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1238 		break;
1239 	case CHIP_HAINAN:
1240 		radeon_program_register_sequence(rdev,
1241 						 hainan_golden_registers,
1242 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1243 		radeon_program_register_sequence(rdev,
1244 						 hainan_golden_registers2,
1245 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1246 		radeon_program_register_sequence(rdev,
1247 						 hainan_mgcg_cgcg_init,
1248 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1249 		break;
1250 	default:
1251 		break;
1252 	}
1253 }
1254 
1255 #define PCIE_BUS_CLK                10000
1256 #define TCLK                        (PCIE_BUS_CLK / 10)
1257 
1258 /**
1259  * si_get_xclk - get the xclk
1260  *
1261  * @rdev: radeon_device pointer
1262  *
1263  * Returns the reference clock used by the gfx engine
1264  * (SI).
1265  */
1266 u32 si_get_xclk(struct radeon_device *rdev)
1267 {
1268         u32 reference_clock = rdev->clock.spll.reference_freq;
1269 	u32 tmp;
1270 
1271 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1272 	if (tmp & MUX_TCLK_TO_XCLK)
1273 		return TCLK;
1274 
1275 	tmp = RREG32(CG_CLKPIN_CNTL);
1276 	if (tmp & XTALIN_DIVIDE)
1277 		return reference_clock / 4;
1278 
1279 	return reference_clock;
1280 }
1281 
1282 /* get temperature in millidegrees */
1283 int si_get_temp(struct radeon_device *rdev)
1284 {
1285 	u32 temp;
1286 	int actual_temp = 0;
1287 
1288 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1289 		CTF_TEMP_SHIFT;
1290 
1291 	if (temp & 0x200)
1292 		actual_temp = 255;
1293 	else
1294 		actual_temp = temp & 0x1ff;
1295 
1296 	actual_temp = (actual_temp * 1000);
1297 
1298 	return actual_temp;
1299 }
1300 
1301 #define TAHITI_IO_MC_REGS_SIZE 36
1302 
1303 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1304 	{0x0000006f, 0x03044000},
1305 	{0x00000070, 0x0480c018},
1306 	{0x00000071, 0x00000040},
1307 	{0x00000072, 0x01000000},
1308 	{0x00000074, 0x000000ff},
1309 	{0x00000075, 0x00143400},
1310 	{0x00000076, 0x08ec0800},
1311 	{0x00000077, 0x040000cc},
1312 	{0x00000079, 0x00000000},
1313 	{0x0000007a, 0x21000409},
1314 	{0x0000007c, 0x00000000},
1315 	{0x0000007d, 0xe8000000},
1316 	{0x0000007e, 0x044408a8},
1317 	{0x0000007f, 0x00000003},
1318 	{0x00000080, 0x00000000},
1319 	{0x00000081, 0x01000000},
1320 	{0x00000082, 0x02000000},
1321 	{0x00000083, 0x00000000},
1322 	{0x00000084, 0xe3f3e4f4},
1323 	{0x00000085, 0x00052024},
1324 	{0x00000087, 0x00000000},
1325 	{0x00000088, 0x66036603},
1326 	{0x00000089, 0x01000000},
1327 	{0x0000008b, 0x1c0a0000},
1328 	{0x0000008c, 0xff010000},
1329 	{0x0000008e, 0xffffefff},
1330 	{0x0000008f, 0xfff3efff},
1331 	{0x00000090, 0xfff3efbf},
1332 	{0x00000094, 0x00101101},
1333 	{0x00000095, 0x00000fff},
1334 	{0x00000096, 0x00116fff},
1335 	{0x00000097, 0x60010000},
1336 	{0x00000098, 0x10010000},
1337 	{0x00000099, 0x00006000},
1338 	{0x0000009a, 0x00001000},
1339 	{0x0000009f, 0x00a77400}
1340 };
1341 
1342 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1343 	{0x0000006f, 0x03044000},
1344 	{0x00000070, 0x0480c018},
1345 	{0x00000071, 0x00000040},
1346 	{0x00000072, 0x01000000},
1347 	{0x00000074, 0x000000ff},
1348 	{0x00000075, 0x00143400},
1349 	{0x00000076, 0x08ec0800},
1350 	{0x00000077, 0x040000cc},
1351 	{0x00000079, 0x00000000},
1352 	{0x0000007a, 0x21000409},
1353 	{0x0000007c, 0x00000000},
1354 	{0x0000007d, 0xe8000000},
1355 	{0x0000007e, 0x044408a8},
1356 	{0x0000007f, 0x00000003},
1357 	{0x00000080, 0x00000000},
1358 	{0x00000081, 0x01000000},
1359 	{0x00000082, 0x02000000},
1360 	{0x00000083, 0x00000000},
1361 	{0x00000084, 0xe3f3e4f4},
1362 	{0x00000085, 0x00052024},
1363 	{0x00000087, 0x00000000},
1364 	{0x00000088, 0x66036603},
1365 	{0x00000089, 0x01000000},
1366 	{0x0000008b, 0x1c0a0000},
1367 	{0x0000008c, 0xff010000},
1368 	{0x0000008e, 0xffffefff},
1369 	{0x0000008f, 0xfff3efff},
1370 	{0x00000090, 0xfff3efbf},
1371 	{0x00000094, 0x00101101},
1372 	{0x00000095, 0x00000fff},
1373 	{0x00000096, 0x00116fff},
1374 	{0x00000097, 0x60010000},
1375 	{0x00000098, 0x10010000},
1376 	{0x00000099, 0x00006000},
1377 	{0x0000009a, 0x00001000},
1378 	{0x0000009f, 0x00a47400}
1379 };
1380 
1381 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1382 	{0x0000006f, 0x03044000},
1383 	{0x00000070, 0x0480c018},
1384 	{0x00000071, 0x00000040},
1385 	{0x00000072, 0x01000000},
1386 	{0x00000074, 0x000000ff},
1387 	{0x00000075, 0x00143400},
1388 	{0x00000076, 0x08ec0800},
1389 	{0x00000077, 0x040000cc},
1390 	{0x00000079, 0x00000000},
1391 	{0x0000007a, 0x21000409},
1392 	{0x0000007c, 0x00000000},
1393 	{0x0000007d, 0xe8000000},
1394 	{0x0000007e, 0x044408a8},
1395 	{0x0000007f, 0x00000003},
1396 	{0x00000080, 0x00000000},
1397 	{0x00000081, 0x01000000},
1398 	{0x00000082, 0x02000000},
1399 	{0x00000083, 0x00000000},
1400 	{0x00000084, 0xe3f3e4f4},
1401 	{0x00000085, 0x00052024},
1402 	{0x00000087, 0x00000000},
1403 	{0x00000088, 0x66036603},
1404 	{0x00000089, 0x01000000},
1405 	{0x0000008b, 0x1c0a0000},
1406 	{0x0000008c, 0xff010000},
1407 	{0x0000008e, 0xffffefff},
1408 	{0x0000008f, 0xfff3efff},
1409 	{0x00000090, 0xfff3efbf},
1410 	{0x00000094, 0x00101101},
1411 	{0x00000095, 0x00000fff},
1412 	{0x00000096, 0x00116fff},
1413 	{0x00000097, 0x60010000},
1414 	{0x00000098, 0x10010000},
1415 	{0x00000099, 0x00006000},
1416 	{0x0000009a, 0x00001000},
1417 	{0x0000009f, 0x00a37400}
1418 };
1419 
1420 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1421 	{0x0000006f, 0x03044000},
1422 	{0x00000070, 0x0480c018},
1423 	{0x00000071, 0x00000040},
1424 	{0x00000072, 0x01000000},
1425 	{0x00000074, 0x000000ff},
1426 	{0x00000075, 0x00143400},
1427 	{0x00000076, 0x08ec0800},
1428 	{0x00000077, 0x040000cc},
1429 	{0x00000079, 0x00000000},
1430 	{0x0000007a, 0x21000409},
1431 	{0x0000007c, 0x00000000},
1432 	{0x0000007d, 0xe8000000},
1433 	{0x0000007e, 0x044408a8},
1434 	{0x0000007f, 0x00000003},
1435 	{0x00000080, 0x00000000},
1436 	{0x00000081, 0x01000000},
1437 	{0x00000082, 0x02000000},
1438 	{0x00000083, 0x00000000},
1439 	{0x00000084, 0xe3f3e4f4},
1440 	{0x00000085, 0x00052024},
1441 	{0x00000087, 0x00000000},
1442 	{0x00000088, 0x66036603},
1443 	{0x00000089, 0x01000000},
1444 	{0x0000008b, 0x1c0a0000},
1445 	{0x0000008c, 0xff010000},
1446 	{0x0000008e, 0xffffefff},
1447 	{0x0000008f, 0xfff3efff},
1448 	{0x00000090, 0xfff3efbf},
1449 	{0x00000094, 0x00101101},
1450 	{0x00000095, 0x00000fff},
1451 	{0x00000096, 0x00116fff},
1452 	{0x00000097, 0x60010000},
1453 	{0x00000098, 0x10010000},
1454 	{0x00000099, 0x00006000},
1455 	{0x0000009a, 0x00001000},
1456 	{0x0000009f, 0x00a17730}
1457 };
1458 
1459 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1460 	{0x0000006f, 0x03044000},
1461 	{0x00000070, 0x0480c018},
1462 	{0x00000071, 0x00000040},
1463 	{0x00000072, 0x01000000},
1464 	{0x00000074, 0x000000ff},
1465 	{0x00000075, 0x00143400},
1466 	{0x00000076, 0x08ec0800},
1467 	{0x00000077, 0x040000cc},
1468 	{0x00000079, 0x00000000},
1469 	{0x0000007a, 0x21000409},
1470 	{0x0000007c, 0x00000000},
1471 	{0x0000007d, 0xe8000000},
1472 	{0x0000007e, 0x044408a8},
1473 	{0x0000007f, 0x00000003},
1474 	{0x00000080, 0x00000000},
1475 	{0x00000081, 0x01000000},
1476 	{0x00000082, 0x02000000},
1477 	{0x00000083, 0x00000000},
1478 	{0x00000084, 0xe3f3e4f4},
1479 	{0x00000085, 0x00052024},
1480 	{0x00000087, 0x00000000},
1481 	{0x00000088, 0x66036603},
1482 	{0x00000089, 0x01000000},
1483 	{0x0000008b, 0x1c0a0000},
1484 	{0x0000008c, 0xff010000},
1485 	{0x0000008e, 0xffffefff},
1486 	{0x0000008f, 0xfff3efff},
1487 	{0x00000090, 0xfff3efbf},
1488 	{0x00000094, 0x00101101},
1489 	{0x00000095, 0x00000fff},
1490 	{0x00000096, 0x00116fff},
1491 	{0x00000097, 0x60010000},
1492 	{0x00000098, 0x10010000},
1493 	{0x00000099, 0x00006000},
1494 	{0x0000009a, 0x00001000},
1495 	{0x0000009f, 0x00a07730}
1496 };
1497 
1498 /* ucode loading */
1499 int si_mc_load_microcode(struct radeon_device *rdev)
1500 {
1501 	const __be32 *fw_data = NULL;
1502 	const __le32 *new_fw_data = NULL;
1503 	u32 running, blackout = 0;
1504 	u32 *io_mc_regs = NULL;
1505 	const __le32 *new_io_mc_regs = NULL;
1506 	int i, regs_size, ucode_size;
1507 
1508 	if (!rdev->mc_fw)
1509 		return -EINVAL;
1510 
1511 	if (rdev->new_fw) {
1512 		const struct mc_firmware_header_v1_0 *hdr =
1513 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1514 
1515 		radeon_ucode_print_mc_hdr(&hdr->header);
1516 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1517 		new_io_mc_regs = (const __le32 *)
1518 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1519 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1520 		new_fw_data = (const __le32 *)
1521 			((const char *)rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1522 	} else {
1523 		ucode_size = rdev->mc_fw->datasize / 4;
1524 
1525 		switch (rdev->family) {
1526 		case CHIP_TAHITI:
1527 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1528 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1529 			break;
1530 		case CHIP_PITCAIRN:
1531 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1532 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1533 			break;
1534 		case CHIP_VERDE:
1535 		default:
1536 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1537 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1538 			break;
1539 		case CHIP_OLAND:
1540 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1541 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1542 			break;
1543 		case CHIP_HAINAN:
1544 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1545 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1546 			break;
1547 		}
1548 		fw_data = (const __be32 *)rdev->mc_fw->data;
1549 	}
1550 
1551 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1552 
1553 	if (running == 0) {
1554 		if (running) {
1555 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1556 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1557 		}
1558 
1559 		/* reset the engine and set to writable */
1560 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1561 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1562 
1563 		/* load mc io regs */
1564 		for (i = 0; i < regs_size; i++) {
1565 			if (rdev->new_fw) {
1566 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1567 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1568 			} else {
1569 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1570 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1571 			}
1572 		}
1573 		/* load the MC ucode */
1574 		for (i = 0; i < ucode_size; i++) {
1575 			if (rdev->new_fw)
1576 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1577 			else
1578 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1579 		}
1580 
1581 		/* put the engine back into the active state */
1582 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1583 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1584 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1585 
1586 		/* wait for training to complete */
1587 		for (i = 0; i < rdev->usec_timeout; i++) {
1588 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1589 				break;
1590 			udelay(1);
1591 		}
1592 		for (i = 0; i < rdev->usec_timeout; i++) {
1593 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1594 				break;
1595 			udelay(1);
1596 		}
1597 
1598 		if (running)
1599 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1600 	}
1601 
1602 	return 0;
1603 }
1604 
1605 static int si_init_microcode(struct radeon_device *rdev)
1606 {
1607 	const char *chip_name;
1608 	const char *new_chip_name;
1609 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1610 	size_t smc_req_size, mc2_req_size;
1611 	char fw_name[30];
1612 	int err;
1613 	int new_fw = 0;
1614 
1615 	DRM_DEBUG("\n");
1616 
1617 	switch (rdev->family) {
1618 	case CHIP_TAHITI:
1619 		chip_name = "TAHITI";
1620 		new_chip_name = "tahiti";
1621 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1622 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1623 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1624 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1625 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1626 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1627 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1628 		break;
1629 	case CHIP_PITCAIRN:
1630 		chip_name = "PITCAIRN";
1631 		new_chip_name = "pitcairn";
1632 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1633 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1634 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1635 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1636 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1637 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1638 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1639 		break;
1640 	case CHIP_VERDE:
1641 		chip_name = "VERDE";
1642 		new_chip_name = "verde";
1643 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1644 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1645 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1646 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1647 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1648 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1649 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1650 		break;
1651 	case CHIP_OLAND:
1652 		chip_name = "OLAND";
1653 		new_chip_name = "oland";
1654 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1655 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1656 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1657 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1658 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1659 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1660 		break;
1661 	case CHIP_HAINAN:
1662 		chip_name = "HAINAN";
1663 		new_chip_name = "hainan";
1664 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1665 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1666 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1667 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1668 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1669 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1670 		break;
1671 	default: BUG();
1672 	}
1673 
1674 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1675 
1676 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
1677 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1678 	if (err) {
1679 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1680 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1681 		if (err)
1682 			goto out;
1683 		if (rdev->pfp_fw->datasize != pfp_req_size) {
1684 			printk(KERN_ERR
1685 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1686 			       rdev->pfp_fw->datasize, fw_name);
1687 			err = -EINVAL;
1688 			goto out;
1689 		}
1690 	} else {
1691 		err = radeon_ucode_validate(rdev->pfp_fw);
1692 		if (err) {
1693 			printk(KERN_ERR
1694 			       "si_cp: validation failed for firmware \"%s\"\n",
1695 			       fw_name);
1696 			goto out;
1697 		} else {
1698 			new_fw++;
1699 		}
1700 	}
1701 
1702 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
1703 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1704 	if (err) {
1705 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1706 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1707 		if (err)
1708 			goto out;
1709 		if (rdev->me_fw->datasize != me_req_size) {
1710 			printk(KERN_ERR
1711 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1712 			       rdev->me_fw->datasize, fw_name);
1713 			err = -EINVAL;
1714 		}
1715 	} else {
1716 		err = radeon_ucode_validate(rdev->me_fw);
1717 		if (err) {
1718 			printk(KERN_ERR
1719 			       "si_cp: validation failed for firmware \"%s\"\n",
1720 			       fw_name);
1721 			goto out;
1722 		} else {
1723 			new_fw++;
1724 		}
1725 	}
1726 
1727 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
1728 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1729 	if (err) {
1730 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1731 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1732 		if (err)
1733 			goto out;
1734 		if (rdev->ce_fw->datasize != ce_req_size) {
1735 			printk(KERN_ERR
1736 			       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1737 			       rdev->ce_fw->datasize, fw_name);
1738 			err = -EINVAL;
1739 		}
1740 	} else {
1741 		err = radeon_ucode_validate(rdev->ce_fw);
1742 		if (err) {
1743 			printk(KERN_ERR
1744 			       "si_cp: validation failed for firmware \"%s\"\n",
1745 			       fw_name);
1746 			goto out;
1747 		} else {
1748 			new_fw++;
1749 		}
1750 	}
1751 
1752 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
1753 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1754 	if (err) {
1755 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
1756 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1757 		if (err)
1758 			goto out;
1759 		if (rdev->rlc_fw->datasize != rlc_req_size) {
1760 			printk(KERN_ERR
1761 			       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1762 			       rdev->rlc_fw->datasize, fw_name);
1763 			err = -EINVAL;
1764 		}
1765 	} else {
1766 		err = radeon_ucode_validate(rdev->rlc_fw);
1767 		if (err) {
1768 			printk(KERN_ERR
1769 			       "si_cp: validation failed for firmware \"%s\"\n",
1770 			       fw_name);
1771 			goto out;
1772 		} else {
1773 			new_fw++;
1774 		}
1775 	}
1776 
1777 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
1778 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1779 	if (err) {
1780 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
1781 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1782 		if (err) {
1783 			ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1784 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1785 			if (err)
1786 				goto out;
1787 		}
1788 		if ((rdev->mc_fw->datasize != mc_req_size) &&
1789 		    (rdev->mc_fw->datasize != mc2_req_size)) {
1790 			printk(KERN_ERR
1791 			       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1792 			       rdev->mc_fw->datasize, fw_name);
1793 			err = -EINVAL;
1794 		}
1795 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
1796 	} else {
1797 		err = radeon_ucode_validate(rdev->mc_fw);
1798 		if (err) {
1799 			printk(KERN_ERR
1800 			       "si_cp: validation failed for firmware \"%s\"\n",
1801 			       fw_name);
1802 			goto out;
1803 		} else {
1804 			new_fw++;
1805 		}
1806 	}
1807 
1808 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
1809 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1810 	if (err) {
1811 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1812 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1813 		if (err) {
1814 			printk(KERN_ERR
1815 			       "smc: error loading firmware \"%s\"\n",
1816 			       fw_name);
1817 			release_firmware(rdev->smc_fw);
1818 			rdev->smc_fw = NULL;
1819 			err = 0;
1820 		} else if (rdev->smc_fw->datasize != smc_req_size) {
1821 			printk(KERN_ERR
1822 			       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1823 			       rdev->smc_fw->datasize, fw_name);
1824 			err = -EINVAL;
1825 		}
1826 	} else {
1827 		err = radeon_ucode_validate(rdev->smc_fw);
1828 		if (err) {
1829 			printk(KERN_ERR
1830 			       "si_cp: validation failed for firmware \"%s\"\n",
1831 			       fw_name);
1832 			goto out;
1833 		} else {
1834 			new_fw++;
1835 		}
1836 	}
1837 
1838 	if (new_fw == 0) {
1839 		rdev->new_fw = false;
1840 	} else if (new_fw < 6) {
1841 		printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1842 		err = -EINVAL;
1843 	} else {
1844 		rdev->new_fw = true;
1845 	}
1846 out:
1847 	if (err) {
1848 		if (err != -EINVAL)
1849 			printk(KERN_ERR
1850 			       "si_cp: Failed to load firmware \"%s\"\n",
1851 			       fw_name);
1852 		release_firmware(rdev->pfp_fw);
1853 		rdev->pfp_fw = NULL;
1854 		release_firmware(rdev->me_fw);
1855 		rdev->me_fw = NULL;
1856 		release_firmware(rdev->ce_fw);
1857 		rdev->ce_fw = NULL;
1858 		release_firmware(rdev->rlc_fw);
1859 		rdev->rlc_fw = NULL;
1860 		release_firmware(rdev->mc_fw);
1861 		rdev->mc_fw = NULL;
1862 		release_firmware(rdev->smc_fw);
1863 		rdev->smc_fw = NULL;
1864 	}
1865 	return err;
1866 }
1867 
1868 /**
1869  * si_fini_microcode - drop the firmwares image references
1870  *
1871  * @rdev: radeon_device pointer
1872  *
1873  * Drop the pfp, me, rlc, mc and ce firmware image references.
1874  * Called at driver shutdown.
1875  */
1876 static void si_fini_microcode(struct radeon_device *rdev)
1877 {
1878 	release_firmware(rdev->pfp_fw);
1879 	rdev->pfp_fw = NULL;
1880 	release_firmware(rdev->me_fw);
1881 	rdev->me_fw = NULL;
1882 	release_firmware(rdev->rlc_fw);
1883 	rdev->rlc_fw = NULL;
1884 	release_firmware(rdev->mc_fw);
1885 	rdev->mc_fw = NULL;
1886 	release_firmware(rdev->smc_fw);
1887 	rdev->smc_fw = NULL;
1888 	release_firmware(rdev->ce_fw);
1889 	rdev->ce_fw = NULL;
1890 }
1891 
1892 /* watermark setup */
1893 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1894 				   struct radeon_crtc *radeon_crtc,
1895 				   struct drm_display_mode *mode,
1896 				   struct drm_display_mode *other_mode)
1897 {
1898 	u32 tmp, buffer_alloc, i;
1899 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1900 	/*
1901 	 * Line Buffer Setup
1902 	 * There are 3 line buffers, each one shared by 2 display controllers.
1903 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1904 	 * the display controllers.  The paritioning is done via one of four
1905 	 * preset allocations specified in bits 21:20:
1906 	 *  0 - half lb
1907 	 *  2 - whole lb, other crtc must be disabled
1908 	 */
1909 	/* this can get tricky if we have two large displays on a paired group
1910 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1911 	 * non-linked crtcs for maximum line buffer allocation.
1912 	 */
1913 	if (radeon_crtc->base.enabled && mode) {
1914 		if (other_mode) {
1915 			tmp = 0; /* 1/2 */
1916 			buffer_alloc = 1;
1917 		} else {
1918 			tmp = 2; /* whole */
1919 			buffer_alloc = 2;
1920 		}
1921 	} else {
1922 		tmp = 0;
1923 		buffer_alloc = 0;
1924 	}
1925 
1926 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1927 	       DC_LB_MEMORY_CONFIG(tmp));
1928 
1929 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1930 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1931 	for (i = 0; i < rdev->usec_timeout; i++) {
1932 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1933 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1934 			break;
1935 		udelay(1);
1936 	}
1937 
1938 	if (radeon_crtc->base.enabled && mode) {
1939 		switch (tmp) {
1940 		case 0:
1941 		default:
1942 			return 4096 * 2;
1943 		case 2:
1944 			return 8192 * 2;
1945 		}
1946 	}
1947 
1948 	/* controller not enabled, so no lb used */
1949 	return 0;
1950 }
1951 
1952 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1953 {
1954 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1955 
1956 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1957 	case 0:
1958 	default:
1959 		return 1;
1960 	case 1:
1961 		return 2;
1962 	case 2:
1963 		return 4;
1964 	case 3:
1965 		return 8;
1966 	case 4:
1967 		return 3;
1968 	case 5:
1969 		return 6;
1970 	case 6:
1971 		return 10;
1972 	case 7:
1973 		return 12;
1974 	case 8:
1975 		return 16;
1976 	}
1977 }
1978 
1979 struct dce6_wm_params {
1980 	u32 dram_channels; /* number of dram channels */
1981 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1982 	u32 sclk;          /* engine clock in kHz */
1983 	u32 disp_clk;      /* display clock in kHz */
1984 	u32 src_width;     /* viewport width */
1985 	u32 active_time;   /* active display time in ns */
1986 	u32 blank_time;    /* blank time in ns */
1987 	bool interlaced;    /* mode is interlaced */
1988 	fixed20_12 vsc;    /* vertical scale ratio */
1989 	u32 num_heads;     /* number of active crtcs */
1990 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1991 	u32 lb_size;       /* line buffer allocated to pipe */
1992 	u32 vtaps;         /* vertical scaler taps */
1993 };
1994 
1995 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1996 {
1997 	/* Calculate raw DRAM Bandwidth */
1998 	fixed20_12 dram_efficiency; /* 0.7 */
1999 	fixed20_12 yclk, dram_channels, bandwidth;
2000 	fixed20_12 a;
2001 
2002 	a.full = dfixed_const(1000);
2003 	yclk.full = dfixed_const(wm->yclk);
2004 	yclk.full = dfixed_div(yclk, a);
2005 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2006 	a.full = dfixed_const(10);
2007 	dram_efficiency.full = dfixed_const(7);
2008 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2009 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2010 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2011 
2012 	return dfixed_trunc(bandwidth);
2013 }
2014 
2015 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2016 {
2017 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2018 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2019 	fixed20_12 yclk, dram_channels, bandwidth;
2020 	fixed20_12 a;
2021 
2022 	a.full = dfixed_const(1000);
2023 	yclk.full = dfixed_const(wm->yclk);
2024 	yclk.full = dfixed_div(yclk, a);
2025 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2026 	a.full = dfixed_const(10);
2027 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2028 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2029 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2030 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2031 
2032 	return dfixed_trunc(bandwidth);
2033 }
2034 
2035 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2036 {
2037 	/* Calculate the display Data return Bandwidth */
2038 	fixed20_12 return_efficiency; /* 0.8 */
2039 	fixed20_12 sclk, bandwidth;
2040 	fixed20_12 a;
2041 
2042 	a.full = dfixed_const(1000);
2043 	sclk.full = dfixed_const(wm->sclk);
2044 	sclk.full = dfixed_div(sclk, a);
2045 	a.full = dfixed_const(10);
2046 	return_efficiency.full = dfixed_const(8);
2047 	return_efficiency.full = dfixed_div(return_efficiency, a);
2048 	a.full = dfixed_const(32);
2049 	bandwidth.full = dfixed_mul(a, sclk);
2050 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2051 
2052 	return dfixed_trunc(bandwidth);
2053 }
2054 
2055 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2056 {
2057 	return 32;
2058 }
2059 
2060 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2061 {
2062 	/* Calculate the DMIF Request Bandwidth */
2063 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2064 	fixed20_12 disp_clk, sclk, bandwidth;
2065 	fixed20_12 a, b1, b2;
2066 	u32 min_bandwidth;
2067 
2068 	a.full = dfixed_const(1000);
2069 	disp_clk.full = dfixed_const(wm->disp_clk);
2070 	disp_clk.full = dfixed_div(disp_clk, a);
2071 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2072 	b1.full = dfixed_mul(a, disp_clk);
2073 
2074 	a.full = dfixed_const(1000);
2075 	sclk.full = dfixed_const(wm->sclk);
2076 	sclk.full = dfixed_div(sclk, a);
2077 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2078 	b2.full = dfixed_mul(a, sclk);
2079 
2080 	a.full = dfixed_const(10);
2081 	disp_clk_request_efficiency.full = dfixed_const(8);
2082 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2083 
2084 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2085 
2086 	a.full = dfixed_const(min_bandwidth);
2087 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2088 
2089 	return dfixed_trunc(bandwidth);
2090 }
2091 
2092 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2093 {
2094 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2095 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2096 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2097 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2098 
2099 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2100 }
2101 
2102 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2103 {
2104 	/* Calculate the display mode Average Bandwidth
2105 	 * DisplayMode should contain the source and destination dimensions,
2106 	 * timing, etc.
2107 	 */
2108 	fixed20_12 bpp;
2109 	fixed20_12 line_time;
2110 	fixed20_12 src_width;
2111 	fixed20_12 bandwidth;
2112 	fixed20_12 a;
2113 
2114 	a.full = dfixed_const(1000);
2115 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2116 	line_time.full = dfixed_div(line_time, a);
2117 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2118 	src_width.full = dfixed_const(wm->src_width);
2119 	bandwidth.full = dfixed_mul(src_width, bpp);
2120 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2121 	bandwidth.full = dfixed_div(bandwidth, line_time);
2122 
2123 	return dfixed_trunc(bandwidth);
2124 }
2125 
2126 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2127 {
2128 	/* First calcualte the latency in ns */
2129 	u32 mc_latency = 2000; /* 2000 ns. */
2130 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2131 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2132 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2133 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2134 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2135 		(wm->num_heads * cursor_line_pair_return_time);
2136 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2137 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2138 	u32 tmp, dmif_size = 12288;
2139 	fixed20_12 a, b, c;
2140 
2141 	if (wm->num_heads == 0)
2142 		return 0;
2143 
2144 	a.full = dfixed_const(2);
2145 	b.full = dfixed_const(1);
2146 	if ((wm->vsc.full > a.full) ||
2147 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2148 	    (wm->vtaps >= 5) ||
2149 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2150 		max_src_lines_per_dst_line = 4;
2151 	else
2152 		max_src_lines_per_dst_line = 2;
2153 
2154 	a.full = dfixed_const(available_bandwidth);
2155 	b.full = dfixed_const(wm->num_heads);
2156 	a.full = dfixed_div(a, b);
2157 
2158 	b.full = dfixed_const(mc_latency + 512);
2159 	c.full = dfixed_const(wm->disp_clk);
2160 	b.full = dfixed_div(b, c);
2161 
2162 	c.full = dfixed_const(dmif_size);
2163 	b.full = dfixed_div(c, b);
2164 
2165 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2166 
2167 	b.full = dfixed_const(1000);
2168 	c.full = dfixed_const(wm->disp_clk);
2169 	b.full = dfixed_div(c, b);
2170 	c.full = dfixed_const(wm->bytes_per_pixel);
2171 	b.full = dfixed_mul(b, c);
2172 
2173 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2174 
2175 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2176 	b.full = dfixed_const(1000);
2177 	c.full = dfixed_const(lb_fill_bw);
2178 	b.full = dfixed_div(c, b);
2179 	a.full = dfixed_div(a, b);
2180 	line_fill_time = dfixed_trunc(a);
2181 
2182 	if (line_fill_time < wm->active_time)
2183 		return latency;
2184 	else
2185 		return latency + (line_fill_time - wm->active_time);
2186 
2187 }
2188 
2189 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2190 {
2191 	if (dce6_average_bandwidth(wm) <=
2192 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2193 		return true;
2194 	else
2195 		return false;
2196 };
2197 
2198 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2199 {
2200 	if (dce6_average_bandwidth(wm) <=
2201 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2202 		return true;
2203 	else
2204 		return false;
2205 };
2206 
2207 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2208 {
2209 	u32 lb_partitions = wm->lb_size / wm->src_width;
2210 	u32 line_time = wm->active_time + wm->blank_time;
2211 	u32 latency_tolerant_lines;
2212 	u32 latency_hiding;
2213 	fixed20_12 a;
2214 
2215 	a.full = dfixed_const(1);
2216 	if (wm->vsc.full > a.full)
2217 		latency_tolerant_lines = 1;
2218 	else {
2219 		if (lb_partitions <= (wm->vtaps + 1))
2220 			latency_tolerant_lines = 1;
2221 		else
2222 			latency_tolerant_lines = 2;
2223 	}
2224 
2225 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2226 
2227 	if (dce6_latency_watermark(wm) <= latency_hiding)
2228 		return true;
2229 	else
2230 		return false;
2231 }
2232 
2233 static void dce6_program_watermarks(struct radeon_device *rdev,
2234 					 struct radeon_crtc *radeon_crtc,
2235 					 u32 lb_size, u32 num_heads)
2236 {
2237 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2238 	struct dce6_wm_params wm_low, wm_high;
2239 	u32 dram_channels;
2240 	u32 pixel_period;
2241 	u32 line_time = 0;
2242 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2243 	u32 priority_a_mark = 0, priority_b_mark = 0;
2244 	u32 priority_a_cnt = PRIORITY_OFF;
2245 	u32 priority_b_cnt = PRIORITY_OFF;
2246 	u32 tmp, arb_control3;
2247 	fixed20_12 a, b, c;
2248 
2249 	if (radeon_crtc->base.enabled && num_heads && mode) {
2250 		pixel_period = 1000000 / (u32)mode->clock;
2251 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2252 		priority_a_cnt = 0;
2253 		priority_b_cnt = 0;
2254 
2255 		if (rdev->family == CHIP_ARUBA)
2256 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2257 		else
2258 			dram_channels = si_get_number_of_dram_channels(rdev);
2259 
2260 		/* watermark for high clocks */
2261 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2262 			wm_high.yclk =
2263 				radeon_dpm_get_mclk(rdev, false) * 10;
2264 			wm_high.sclk =
2265 				radeon_dpm_get_sclk(rdev, false) * 10;
2266 		} else {
2267 			wm_high.yclk = rdev->pm.current_mclk * 10;
2268 			wm_high.sclk = rdev->pm.current_sclk * 10;
2269 		}
2270 
2271 		wm_high.disp_clk = mode->clock;
2272 		wm_high.src_width = mode->crtc_hdisplay;
2273 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2274 		wm_high.blank_time = line_time - wm_high.active_time;
2275 		wm_high.interlaced = false;
2276 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2277 			wm_high.interlaced = true;
2278 		wm_high.vsc = radeon_crtc->vsc;
2279 		wm_high.vtaps = 1;
2280 		if (radeon_crtc->rmx_type != RMX_OFF)
2281 			wm_high.vtaps = 2;
2282 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2283 		wm_high.lb_size = lb_size;
2284 		wm_high.dram_channels = dram_channels;
2285 		wm_high.num_heads = num_heads;
2286 
2287 		/* watermark for low clocks */
2288 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2289 			wm_low.yclk =
2290 				radeon_dpm_get_mclk(rdev, true) * 10;
2291 			wm_low.sclk =
2292 				radeon_dpm_get_sclk(rdev, true) * 10;
2293 		} else {
2294 			wm_low.yclk = rdev->pm.current_mclk * 10;
2295 			wm_low.sclk = rdev->pm.current_sclk * 10;
2296 		}
2297 
2298 		wm_low.disp_clk = mode->clock;
2299 		wm_low.src_width = mode->crtc_hdisplay;
2300 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2301 		wm_low.blank_time = line_time - wm_low.active_time;
2302 		wm_low.interlaced = false;
2303 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2304 			wm_low.interlaced = true;
2305 		wm_low.vsc = radeon_crtc->vsc;
2306 		wm_low.vtaps = 1;
2307 		if (radeon_crtc->rmx_type != RMX_OFF)
2308 			wm_low.vtaps = 2;
2309 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2310 		wm_low.lb_size = lb_size;
2311 		wm_low.dram_channels = dram_channels;
2312 		wm_low.num_heads = num_heads;
2313 
2314 		/* set for high clocks */
2315 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2316 		/* set for low clocks */
2317 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2318 
2319 		/* possibly force display priority to high */
2320 		/* should really do this at mode validation time... */
2321 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2322 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2323 		    !dce6_check_latency_hiding(&wm_high) ||
2324 		    (rdev->disp_priority == 2)) {
2325 			DRM_DEBUG_KMS("force priority to high\n");
2326 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2327 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2328 		}
2329 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2330 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2331 		    !dce6_check_latency_hiding(&wm_low) ||
2332 		    (rdev->disp_priority == 2)) {
2333 			DRM_DEBUG_KMS("force priority to high\n");
2334 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2335 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2336 		}
2337 
2338 		a.full = dfixed_const(1000);
2339 		b.full = dfixed_const(mode->clock);
2340 		b.full = dfixed_div(b, a);
2341 		c.full = dfixed_const(latency_watermark_a);
2342 		c.full = dfixed_mul(c, b);
2343 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2344 		c.full = dfixed_div(c, a);
2345 		a.full = dfixed_const(16);
2346 		c.full = dfixed_div(c, a);
2347 		priority_a_mark = dfixed_trunc(c);
2348 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2349 
2350 		a.full = dfixed_const(1000);
2351 		b.full = dfixed_const(mode->clock);
2352 		b.full = dfixed_div(b, a);
2353 		c.full = dfixed_const(latency_watermark_b);
2354 		c.full = dfixed_mul(c, b);
2355 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2356 		c.full = dfixed_div(c, a);
2357 		a.full = dfixed_const(16);
2358 		c.full = dfixed_div(c, a);
2359 		priority_b_mark = dfixed_trunc(c);
2360 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2361 	}
2362 
2363 	/* select wm A */
2364 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2365 	tmp = arb_control3;
2366 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2367 	tmp |= LATENCY_WATERMARK_MASK(1);
2368 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2369 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2370 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2371 		LATENCY_HIGH_WATERMARK(line_time)));
2372 	/* select wm B */
2373 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2374 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2375 	tmp |= LATENCY_WATERMARK_MASK(2);
2376 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2377 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2378 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2379 		LATENCY_HIGH_WATERMARK(line_time)));
2380 	/* restore original selection */
2381 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2382 
2383 	/* write the priority marks */
2384 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2385 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2386 
2387 	/* save values for DPM */
2388 	radeon_crtc->line_time = line_time;
2389 	radeon_crtc->wm_high = latency_watermark_a;
2390 	radeon_crtc->wm_low = latency_watermark_b;
2391 }
2392 
2393 void dce6_bandwidth_update(struct radeon_device *rdev)
2394 {
2395 	struct drm_display_mode *mode0 = NULL;
2396 	struct drm_display_mode *mode1 = NULL;
2397 	u32 num_heads = 0, lb_size;
2398 	int i;
2399 
2400 	if (!rdev->mode_info.mode_config_initialized)
2401 		return;
2402 
2403 	radeon_update_display_priority(rdev);
2404 
2405 	for (i = 0; i < rdev->num_crtc; i++) {
2406 		if (rdev->mode_info.crtcs[i]->base.enabled)
2407 			num_heads++;
2408 	}
2409 	for (i = 0; i < rdev->num_crtc; i += 2) {
2410 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2411 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2412 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2413 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2414 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2415 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2416 	}
2417 }
2418 
2419 /*
2420  * Core functions
2421  */
2422 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2423 {
2424 	const u32 num_tile_mode_states = 32;
2425 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2426 
2427 	switch (rdev->config.si.mem_row_size_in_kb) {
2428 	case 1:
2429 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2430 		break;
2431 	case 2:
2432 	default:
2433 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2434 		break;
2435 	case 4:
2436 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2437 		break;
2438 	}
2439 
2440 	if ((rdev->family == CHIP_TAHITI) ||
2441 	    (rdev->family == CHIP_PITCAIRN)) {
2442 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2443 			switch (reg_offset) {
2444 			case 0:  /* non-AA compressed depth or any compressed stencil */
2445 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2447 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2448 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2449 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2450 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2453 				break;
2454 			case 1:  /* 2xAA/4xAA compressed depth only */
2455 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2456 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2457 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2458 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2459 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2460 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2462 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2463 				break;
2464 			case 2:  /* 8xAA compressed depth only */
2465 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2466 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2467 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2468 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2469 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2470 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2472 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2473 				break;
2474 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2475 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2476 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2477 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2478 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2479 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2480 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2482 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2483 				break;
2484 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2485 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2486 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2487 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2488 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2489 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2490 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2492 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2493 				break;
2494 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2495 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2496 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2497 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2498 						 TILE_SPLIT(split_equal_to_row_size) |
2499 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2500 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2502 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2503 				break;
2504 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2505 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2506 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2507 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2508 						 TILE_SPLIT(split_equal_to_row_size) |
2509 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2510 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2513 				break;
2514 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2515 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2517 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2518 						 TILE_SPLIT(split_equal_to_row_size) |
2519 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2520 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2523 				break;
2524 			case 8:  /* 1D and 1D Array Surfaces */
2525 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2526 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2527 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2528 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2529 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2530 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2532 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2533 				break;
2534 			case 9:  /* Displayable maps. */
2535 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2536 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2537 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2538 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2539 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2540 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2542 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2543 				break;
2544 			case 10:  /* Display 8bpp. */
2545 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2548 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2549 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2550 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2552 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2553 				break;
2554 			case 11:  /* Display 16bpp. */
2555 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2558 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2559 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2560 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2562 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2563 				break;
2564 			case 12:  /* Display 32bpp. */
2565 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2566 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2567 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2568 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2569 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2570 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2572 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2573 				break;
2574 			case 13:  /* Thin. */
2575 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2576 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2577 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2578 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2579 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2580 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2581 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2582 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2583 				break;
2584 			case 14:  /* Thin 8 bpp. */
2585 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2586 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2587 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2588 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2589 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2590 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2592 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2593 				break;
2594 			case 15:  /* Thin 16 bpp. */
2595 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2597 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2598 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2599 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2600 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2602 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2603 				break;
2604 			case 16:  /* Thin 32 bpp. */
2605 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2607 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2608 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2609 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2610 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2612 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2613 				break;
2614 			case 17:  /* Thin 64 bpp. */
2615 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2617 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2618 						 TILE_SPLIT(split_equal_to_row_size) |
2619 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2620 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2622 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2623 				break;
2624 			case 21:  /* 8 bpp PRT. */
2625 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2626 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2627 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2628 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2629 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2630 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2631 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2633 				break;
2634 			case 22:  /* 16 bpp PRT */
2635 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2636 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2637 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2638 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2639 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2640 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2642 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2643 				break;
2644 			case 23:  /* 32 bpp PRT */
2645 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2646 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2647 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2648 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2649 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2650 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2652 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2653 				break;
2654 			case 24:  /* 64 bpp PRT */
2655 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2656 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2657 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2658 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2659 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2660 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2663 				break;
2664 			case 25:  /* 128 bpp PRT */
2665 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2667 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2668 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2669 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2670 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2673 				break;
2674 			default:
2675 				gb_tile_moden = 0;
2676 				break;
2677 			}
2678 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2679 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2680 		}
2681 	} else if ((rdev->family == CHIP_VERDE) ||
2682 		   (rdev->family == CHIP_OLAND) ||
2683 		   (rdev->family == CHIP_HAINAN)) {
2684 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2685 			switch (reg_offset) {
2686 			case 0:  /* non-AA compressed depth or any compressed stencil */
2687 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2688 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2689 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2690 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2691 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2692 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2694 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2695 				break;
2696 			case 1:  /* 2xAA/4xAA compressed depth only */
2697 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2699 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2701 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2702 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2705 				break;
2706 			case 2:  /* 8xAA compressed depth only */
2707 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2709 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2710 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2711 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2712 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2714 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2715 				break;
2716 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2717 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2719 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2720 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2721 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2722 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2725 				break;
2726 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2727 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2728 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2729 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2731 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2732 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2735 				break;
2736 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2737 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2739 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740 						 TILE_SPLIT(split_equal_to_row_size) |
2741 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2742 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2744 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2745 				break;
2746 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2747 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2748 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2749 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750 						 TILE_SPLIT(split_equal_to_row_size) |
2751 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2752 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2754 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2755 				break;
2756 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2757 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2759 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2760 						 TILE_SPLIT(split_equal_to_row_size) |
2761 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2762 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2764 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2765 				break;
2766 			case 8:  /* 1D and 1D Array Surfaces */
2767 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2768 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2769 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2771 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2772 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2774 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2775 				break;
2776 			case 9:  /* Displayable maps. */
2777 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2778 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2781 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2782 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2783 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2784 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2785 				break;
2786 			case 10:  /* Display 8bpp. */
2787 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2788 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2789 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2790 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2791 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2792 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2794 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2795 				break;
2796 			case 11:  /* Display 16bpp. */
2797 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2799 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2800 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2801 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2802 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2804 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2805 				break;
2806 			case 12:  /* Display 32bpp. */
2807 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2809 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2811 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2812 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2815 				break;
2816 			case 13:  /* Thin. */
2817 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2818 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2819 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2820 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2821 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2822 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2824 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2825 				break;
2826 			case 14:  /* Thin 8 bpp. */
2827 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2829 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2830 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2831 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2832 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2835 				break;
2836 			case 15:  /* Thin 16 bpp. */
2837 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2839 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2840 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2841 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2842 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2844 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2845 				break;
2846 			case 16:  /* Thin 32 bpp. */
2847 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2849 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2851 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2852 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2855 				break;
2856 			case 17:  /* Thin 64 bpp. */
2857 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2858 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2859 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2860 						 TILE_SPLIT(split_equal_to_row_size) |
2861 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2862 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2864 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2865 				break;
2866 			case 21:  /* 8 bpp PRT. */
2867 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2869 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2870 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2871 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2872 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2873 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2874 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2875 				break;
2876 			case 22:  /* 16 bpp PRT */
2877 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2879 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2880 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2881 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2882 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2884 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2885 				break;
2886 			case 23:  /* 32 bpp PRT */
2887 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2889 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2890 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2891 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2892 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2893 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2894 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2895 				break;
2896 			case 24:  /* 64 bpp PRT */
2897 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2899 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2900 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2901 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2902 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2903 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2904 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2905 				break;
2906 			case 25:  /* 128 bpp PRT */
2907 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2909 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2910 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2911 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2912 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2913 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2914 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2915 				break;
2916 			default:
2917 				gb_tile_moden = 0;
2918 				break;
2919 			}
2920 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2921 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2922 		}
2923 	} else
2924 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2925 }
2926 
2927 static void si_select_se_sh(struct radeon_device *rdev,
2928 			    u32 se_num, u32 sh_num)
2929 {
2930 	u32 data = INSTANCE_BROADCAST_WRITES;
2931 
2932 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2933 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2934 	else if (se_num == 0xffffffff)
2935 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2936 	else if (sh_num == 0xffffffff)
2937 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2938 	else
2939 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2940 	WREG32(GRBM_GFX_INDEX, data);
2941 }
2942 
2943 static u32 si_create_bitmask(u32 bit_width)
2944 {
2945 	u32 i, mask = 0;
2946 
2947 	for (i = 0; i < bit_width; i++) {
2948 		mask <<= 1;
2949 		mask |= 1;
2950 	}
2951 	return mask;
2952 }
2953 
2954 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2955 {
2956 	u32 data, mask;
2957 
2958 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2959 	if (data & 1)
2960 		data &= INACTIVE_CUS_MASK;
2961 	else
2962 		data = 0;
2963 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2964 
2965 	data >>= INACTIVE_CUS_SHIFT;
2966 
2967 	mask = si_create_bitmask(cu_per_sh);
2968 
2969 	return ~data & mask;
2970 }
2971 
2972 static void si_setup_spi(struct radeon_device *rdev,
2973 			 u32 se_num, u32 sh_per_se,
2974 			 u32 cu_per_sh)
2975 {
2976 	int i, j, k;
2977 	u32 data, mask, active_cu;
2978 
2979 	for (i = 0; i < se_num; i++) {
2980 		for (j = 0; j < sh_per_se; j++) {
2981 			si_select_se_sh(rdev, i, j);
2982 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2983 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2984 
2985 			mask = 1;
2986 			for (k = 0; k < 16; k++) {
2987 				mask <<= k;
2988 				if (active_cu & mask) {
2989 					data &= ~mask;
2990 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2991 					break;
2992 				}
2993 			}
2994 		}
2995 	}
2996 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2997 }
2998 
2999 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3000 			      u32 max_rb_num_per_se,
3001 			      u32 sh_per_se)
3002 {
3003 	u32 data, mask;
3004 
3005 	data = RREG32(CC_RB_BACKEND_DISABLE);
3006 	if (data & 1)
3007 		data &= BACKEND_DISABLE_MASK;
3008 	else
3009 		data = 0;
3010 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3011 
3012 	data >>= BACKEND_DISABLE_SHIFT;
3013 
3014 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3015 
3016 	return data & mask;
3017 }
3018 
3019 static void si_setup_rb(struct radeon_device *rdev,
3020 			u32 se_num, u32 sh_per_se,
3021 			u32 max_rb_num_per_se)
3022 {
3023 	int i, j;
3024 	u32 data, mask;
3025 	u32 disabled_rbs = 0;
3026 	u32 enabled_rbs = 0;
3027 
3028 	for (i = 0; i < se_num; i++) {
3029 		for (j = 0; j < sh_per_se; j++) {
3030 			si_select_se_sh(rdev, i, j);
3031 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3032 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3033 		}
3034 	}
3035 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3036 
3037 	mask = 1;
3038 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3039 		if (!(disabled_rbs & mask))
3040 			enabled_rbs |= mask;
3041 		mask <<= 1;
3042 	}
3043 
3044 	rdev->config.si.backend_enable_mask = enabled_rbs;
3045 
3046 	for (i = 0; i < se_num; i++) {
3047 		si_select_se_sh(rdev, i, 0xffffffff);
3048 		data = 0;
3049 		for (j = 0; j < sh_per_se; j++) {
3050 			switch (enabled_rbs & 3) {
3051 			case 1:
3052 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3053 				break;
3054 			case 2:
3055 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3056 				break;
3057 			case 3:
3058 			default:
3059 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3060 				break;
3061 			}
3062 			enabled_rbs >>= 2;
3063 		}
3064 		WREG32(PA_SC_RASTER_CONFIG, data);
3065 	}
3066 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3067 }
3068 
3069 static void si_gpu_init(struct radeon_device *rdev)
3070 {
3071 	u32 gb_addr_config = 0;
3072 	u32 mc_shared_chmap, mc_arb_ramcfg;
3073 	u32 sx_debug_1;
3074 	u32 hdp_host_path_cntl;
3075 	u32 tmp;
3076 	int i, j;
3077 
3078 	switch (rdev->family) {
3079 	case CHIP_TAHITI:
3080 		rdev->config.si.max_shader_engines = 2;
3081 		rdev->config.si.max_tile_pipes = 12;
3082 		rdev->config.si.max_cu_per_sh = 8;
3083 		rdev->config.si.max_sh_per_se = 2;
3084 		rdev->config.si.max_backends_per_se = 4;
3085 		rdev->config.si.max_texture_channel_caches = 12;
3086 		rdev->config.si.max_gprs = 256;
3087 		rdev->config.si.max_gs_threads = 32;
3088 		rdev->config.si.max_hw_contexts = 8;
3089 
3090 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3091 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3092 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3093 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3094 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3095 		break;
3096 	case CHIP_PITCAIRN:
3097 		rdev->config.si.max_shader_engines = 2;
3098 		rdev->config.si.max_tile_pipes = 8;
3099 		rdev->config.si.max_cu_per_sh = 5;
3100 		rdev->config.si.max_sh_per_se = 2;
3101 		rdev->config.si.max_backends_per_se = 4;
3102 		rdev->config.si.max_texture_channel_caches = 8;
3103 		rdev->config.si.max_gprs = 256;
3104 		rdev->config.si.max_gs_threads = 32;
3105 		rdev->config.si.max_hw_contexts = 8;
3106 
3107 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3108 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3109 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3110 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3111 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3112 		break;
3113 	case CHIP_VERDE:
3114 	default:
3115 		rdev->config.si.max_shader_engines = 1;
3116 		rdev->config.si.max_tile_pipes = 4;
3117 		rdev->config.si.max_cu_per_sh = 5;
3118 		rdev->config.si.max_sh_per_se = 2;
3119 		rdev->config.si.max_backends_per_se = 4;
3120 		rdev->config.si.max_texture_channel_caches = 4;
3121 		rdev->config.si.max_gprs = 256;
3122 		rdev->config.si.max_gs_threads = 32;
3123 		rdev->config.si.max_hw_contexts = 8;
3124 
3125 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3126 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3127 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3128 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3129 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3130 		break;
3131 	case CHIP_OLAND:
3132 		rdev->config.si.max_shader_engines = 1;
3133 		rdev->config.si.max_tile_pipes = 4;
3134 		rdev->config.si.max_cu_per_sh = 6;
3135 		rdev->config.si.max_sh_per_se = 1;
3136 		rdev->config.si.max_backends_per_se = 2;
3137 		rdev->config.si.max_texture_channel_caches = 4;
3138 		rdev->config.si.max_gprs = 256;
3139 		rdev->config.si.max_gs_threads = 16;
3140 		rdev->config.si.max_hw_contexts = 8;
3141 
3142 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3143 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3144 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3145 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3146 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3147 		break;
3148 	case CHIP_HAINAN:
3149 		rdev->config.si.max_shader_engines = 1;
3150 		rdev->config.si.max_tile_pipes = 4;
3151 		rdev->config.si.max_cu_per_sh = 5;
3152 		rdev->config.si.max_sh_per_se = 1;
3153 		rdev->config.si.max_backends_per_se = 1;
3154 		rdev->config.si.max_texture_channel_caches = 2;
3155 		rdev->config.si.max_gprs = 256;
3156 		rdev->config.si.max_gs_threads = 16;
3157 		rdev->config.si.max_hw_contexts = 8;
3158 
3159 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3160 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3161 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3162 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3163 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3164 		break;
3165 	}
3166 
3167 	/* Initialize HDP */
3168 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3169 		WREG32((0x2c14 + j), 0x00000000);
3170 		WREG32((0x2c18 + j), 0x00000000);
3171 		WREG32((0x2c1c + j), 0x00000000);
3172 		WREG32((0x2c20 + j), 0x00000000);
3173 		WREG32((0x2c24 + j), 0x00000000);
3174 	}
3175 
3176 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3177 
3178 	evergreen_fix_pci_max_read_req_size(rdev);
3179 
3180 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3181 
3182 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3183 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3184 
3185 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3186 	rdev->config.si.mem_max_burst_length_bytes = 256;
3187 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3188 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3189 	if (rdev->config.si.mem_row_size_in_kb > 4)
3190 		rdev->config.si.mem_row_size_in_kb = 4;
3191 	/* XXX use MC settings? */
3192 	rdev->config.si.shader_engine_tile_size = 32;
3193 	rdev->config.si.num_gpus = 1;
3194 	rdev->config.si.multi_gpu_tile_size = 64;
3195 
3196 	/* fix up row size */
3197 	gb_addr_config &= ~ROW_SIZE_MASK;
3198 	switch (rdev->config.si.mem_row_size_in_kb) {
3199 	case 1:
3200 	default:
3201 		gb_addr_config |= ROW_SIZE(0);
3202 		break;
3203 	case 2:
3204 		gb_addr_config |= ROW_SIZE(1);
3205 		break;
3206 	case 4:
3207 		gb_addr_config |= ROW_SIZE(2);
3208 		break;
3209 	}
3210 
3211 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3212 	 * not have bank info, so create a custom tiling dword.
3213 	 * bits 3:0   num_pipes
3214 	 * bits 7:4   num_banks
3215 	 * bits 11:8  group_size
3216 	 * bits 15:12 row_size
3217 	 */
3218 	rdev->config.si.tile_config = 0;
3219 	switch (rdev->config.si.num_tile_pipes) {
3220 	case 1:
3221 		rdev->config.si.tile_config |= (0 << 0);
3222 		break;
3223 	case 2:
3224 		rdev->config.si.tile_config |= (1 << 0);
3225 		break;
3226 	case 4:
3227 		rdev->config.si.tile_config |= (2 << 0);
3228 		break;
3229 	case 8:
3230 	default:
3231 		/* XXX what about 12? */
3232 		rdev->config.si.tile_config |= (3 << 0);
3233 		break;
3234 	}
3235 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3236 	case 0: /* four banks */
3237 		rdev->config.si.tile_config |= 0 << 4;
3238 		break;
3239 	case 1: /* eight banks */
3240 		rdev->config.si.tile_config |= 1 << 4;
3241 		break;
3242 	case 2: /* sixteen banks */
3243 	default:
3244 		rdev->config.si.tile_config |= 2 << 4;
3245 		break;
3246 	}
3247 	rdev->config.si.tile_config |=
3248 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3249 	rdev->config.si.tile_config |=
3250 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3251 
3252 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3253 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3254 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3255 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3256 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3257 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3258 	if (rdev->has_uvd) {
3259 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3260 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3261 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3262 	}
3263 
3264 	si_tiling_mode_table_init(rdev);
3265 
3266 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3267 		    rdev->config.si.max_sh_per_se,
3268 		    rdev->config.si.max_backends_per_se);
3269 
3270 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3271 		     rdev->config.si.max_sh_per_se,
3272 		     rdev->config.si.max_cu_per_sh);
3273 
3274 	rdev->config.si.active_cus = 0;
3275 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3276 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3277 			rdev->config.si.active_cus +=
3278 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3279 		}
3280 	}
3281 
3282 	/* set HW defaults for 3D engine */
3283 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3284 				     ROQ_IB2_START(0x2b)));
3285 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3286 
3287 	sx_debug_1 = RREG32(SX_DEBUG_1);
3288 	WREG32(SX_DEBUG_1, sx_debug_1);
3289 
3290 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3291 
3292 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3293 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3294 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3295 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3296 
3297 	WREG32(VGT_NUM_INSTANCES, 1);
3298 
3299 	WREG32(CP_PERFMON_CNTL, 0);
3300 
3301 	WREG32(SQ_CONFIG, 0);
3302 
3303 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3304 					  FORCE_EOV_MAX_REZ_CNT(255)));
3305 
3306 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3307 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3308 
3309 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3310 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3311 
3312 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3313 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3314 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3315 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3316 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3317 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3318 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3319 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3320 
3321 	tmp = RREG32(HDP_MISC_CNTL);
3322 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3323 	WREG32(HDP_MISC_CNTL, tmp);
3324 
3325 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3326 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3327 
3328 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3329 
3330 	udelay(50);
3331 }
3332 
3333 /*
3334  * GPU scratch registers helpers function.
3335  */
3336 static void si_scratch_init(struct radeon_device *rdev)
3337 {
3338 	int i;
3339 
3340 	rdev->scratch.num_reg = 7;
3341 	rdev->scratch.reg_base = SCRATCH_REG0;
3342 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3343 		rdev->scratch.free[i] = true;
3344 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3345 	}
3346 }
3347 
3348 void si_fence_ring_emit(struct radeon_device *rdev,
3349 			struct radeon_fence *fence)
3350 {
3351 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3352 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3353 
3354 	/* flush read cache over gart */
3355 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3356 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3357 	radeon_ring_write(ring, 0);
3358 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3359 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3360 			  PACKET3_TC_ACTION_ENA |
3361 			  PACKET3_SH_KCACHE_ACTION_ENA |
3362 			  PACKET3_SH_ICACHE_ACTION_ENA);
3363 	radeon_ring_write(ring, 0xFFFFFFFF);
3364 	radeon_ring_write(ring, 0);
3365 	radeon_ring_write(ring, 10); /* poll interval */
3366 	/* EVENT_WRITE_EOP - flush caches, send int */
3367 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3368 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3369 	radeon_ring_write(ring, lower_32_bits(addr));
3370 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3371 	radeon_ring_write(ring, fence->seq);
3372 	radeon_ring_write(ring, 0);
3373 }
3374 
3375 /*
3376  * IB stuff
3377  */
3378 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3379 {
3380 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3381 	u32 header;
3382 
3383 	if (ib->is_const_ib) {
3384 		/* set switch buffer packet before const IB */
3385 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3386 		radeon_ring_write(ring, 0);
3387 
3388 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3389 	} else {
3390 		u32 next_rptr;
3391 		if (ring->rptr_save_reg) {
3392 			next_rptr = ring->wptr + 3 + 4 + 8;
3393 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3394 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3395 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3396 			radeon_ring_write(ring, next_rptr);
3397 		} else if (rdev->wb.enabled) {
3398 			next_rptr = ring->wptr + 5 + 4 + 8;
3399 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3400 			radeon_ring_write(ring, (1 << 8));
3401 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3402 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3403 			radeon_ring_write(ring, next_rptr);
3404 		}
3405 
3406 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3407 	}
3408 
3409 	radeon_ring_write(ring, header);
3410 	radeon_ring_write(ring,
3411 #ifdef __BIG_ENDIAN
3412 			  (2 << 0) |
3413 #endif
3414 			  (ib->gpu_addr & 0xFFFFFFFC));
3415 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3416 	radeon_ring_write(ring, ib->length_dw |
3417 			  (ib->vm ? (ib->vm->id << 24) : 0));
3418 
3419 	if (!ib->is_const_ib) {
3420 		/* flush read cache over gart for this vmid */
3421 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3422 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3423 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3424 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3425 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3426 				  PACKET3_TC_ACTION_ENA |
3427 				  PACKET3_SH_KCACHE_ACTION_ENA |
3428 				  PACKET3_SH_ICACHE_ACTION_ENA);
3429 		radeon_ring_write(ring, 0xFFFFFFFF);
3430 		radeon_ring_write(ring, 0);
3431 		radeon_ring_write(ring, 10); /* poll interval */
3432 	}
3433 }
3434 
3435 /*
3436  * CP.
3437  */
3438 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3439 {
3440 	if (enable)
3441 		WREG32(CP_ME_CNTL, 0);
3442 	else {
3443 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3444 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3445 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3446 		WREG32(SCRATCH_UMSK, 0);
3447 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3448 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3449 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3450 	}
3451 	udelay(50);
3452 }
3453 
3454 static int si_cp_load_microcode(struct radeon_device *rdev)
3455 {
3456 	int i;
3457 
3458 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3459 		return -EINVAL;
3460 
3461 	si_cp_enable(rdev, false);
3462 
3463 	if (rdev->new_fw) {
3464 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3465 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3466 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3467 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3468 		const struct gfx_firmware_header_v1_0 *me_hdr =
3469 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3470 		const __le32 *fw_data;
3471 		u32 fw_size;
3472 
3473 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3474 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3475 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3476 
3477 		/* PFP */
3478 		fw_data = (const __le32 *)
3479 			((const char *)rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3480 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3481 		WREG32(CP_PFP_UCODE_ADDR, 0);
3482 		for (i = 0; i < fw_size; i++)
3483 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3484 		WREG32(CP_PFP_UCODE_ADDR, 0);
3485 
3486 		/* CE */
3487 		fw_data = (const __le32 *)
3488 			((const char *)rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3489 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3490 		WREG32(CP_CE_UCODE_ADDR, 0);
3491 		for (i = 0; i < fw_size; i++)
3492 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3493 		WREG32(CP_CE_UCODE_ADDR, 0);
3494 
3495 		/* ME */
3496 		fw_data = (const __be32 *)
3497 			((const char *)rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3498 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3499 		WREG32(CP_ME_RAM_WADDR, 0);
3500 		for (i = 0; i < fw_size; i++)
3501 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3502 		WREG32(CP_ME_RAM_WADDR, 0);
3503 	} else {
3504 		const __be32 *fw_data;
3505 
3506 		/* PFP */
3507 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3508 		WREG32(CP_PFP_UCODE_ADDR, 0);
3509 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3510 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3511 		WREG32(CP_PFP_UCODE_ADDR, 0);
3512 
3513 		/* CE */
3514 		fw_data = (const __be32 *)rdev->ce_fw->data;
3515 		WREG32(CP_CE_UCODE_ADDR, 0);
3516 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3517 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3518 		WREG32(CP_CE_UCODE_ADDR, 0);
3519 
3520 		/* ME */
3521 		fw_data = (const __be32 *)rdev->me_fw->data;
3522 		WREG32(CP_ME_RAM_WADDR, 0);
3523 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3524 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3525 		WREG32(CP_ME_RAM_WADDR, 0);
3526 	}
3527 
3528 	WREG32(CP_PFP_UCODE_ADDR, 0);
3529 	WREG32(CP_CE_UCODE_ADDR, 0);
3530 	WREG32(CP_ME_RAM_WADDR, 0);
3531 	WREG32(CP_ME_RAM_RADDR, 0);
3532 	return 0;
3533 }
3534 
3535 static int si_cp_start(struct radeon_device *rdev)
3536 {
3537 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3538 	int r, i;
3539 
3540 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3541 	if (r) {
3542 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3543 		return r;
3544 	}
3545 	/* init the CP */
3546 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3547 	radeon_ring_write(ring, 0x1);
3548 	radeon_ring_write(ring, 0x0);
3549 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3550 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3551 	radeon_ring_write(ring, 0);
3552 	radeon_ring_write(ring, 0);
3553 
3554 	/* init the CE partitions */
3555 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3556 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3557 	radeon_ring_write(ring, 0xc000);
3558 	radeon_ring_write(ring, 0xe000);
3559 	radeon_ring_unlock_commit(rdev, ring, false);
3560 
3561 	si_cp_enable(rdev, true);
3562 
3563 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3564 	if (r) {
3565 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3566 		return r;
3567 	}
3568 
3569 	/* setup clear context state */
3570 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3571 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3572 
3573 	for (i = 0; i < si_default_size; i++)
3574 		radeon_ring_write(ring, si_default_state[i]);
3575 
3576 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3577 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3578 
3579 	/* set clear context state */
3580 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3581 	radeon_ring_write(ring, 0);
3582 
3583 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3584 	radeon_ring_write(ring, 0x00000316);
3585 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3586 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3587 
3588 	radeon_ring_unlock_commit(rdev, ring, false);
3589 
3590 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3591 		ring = &rdev->ring[i];
3592 		r = radeon_ring_lock(rdev, ring, 2);
3593 
3594 		/* clear the compute context state */
3595 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3596 		radeon_ring_write(ring, 0);
3597 
3598 		radeon_ring_unlock_commit(rdev, ring, false);
3599 	}
3600 
3601 	return 0;
3602 }
3603 
3604 static void si_cp_fini(struct radeon_device *rdev)
3605 {
3606 	struct radeon_ring *ring;
3607 	si_cp_enable(rdev, false);
3608 
3609 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3610 	radeon_ring_fini(rdev, ring);
3611 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3612 
3613 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3614 	radeon_ring_fini(rdev, ring);
3615 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3616 
3617 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3618 	radeon_ring_fini(rdev, ring);
3619 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3620 }
3621 
3622 static int si_cp_resume(struct radeon_device *rdev)
3623 {
3624 	struct radeon_ring *ring;
3625 	u32 tmp;
3626 	u32 rb_bufsz;
3627 	int r;
3628 
3629 	si_enable_gui_idle_interrupt(rdev, false);
3630 
3631 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3632 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3633 
3634 	/* Set the write pointer delay */
3635 	WREG32(CP_RB_WPTR_DELAY, 0);
3636 
3637 	WREG32(CP_DEBUG, 0);
3638 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3639 
3640 	/* ring 0 - compute and gfx */
3641 	/* Set ring buffer size */
3642 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3643 	rb_bufsz = order_base_2(ring->ring_size / 8);
3644 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3645 #ifdef __BIG_ENDIAN
3646 	tmp |= BUF_SWAP_32BIT;
3647 #endif
3648 	WREG32(CP_RB0_CNTL, tmp);
3649 
3650 	/* Initialize the ring buffer's read and write pointers */
3651 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3652 	ring->wptr = 0;
3653 	WREG32(CP_RB0_WPTR, ring->wptr);
3654 
3655 	/* set the wb address whether it's enabled or not */
3656 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3657 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3658 
3659 	if (rdev->wb.enabled)
3660 		WREG32(SCRATCH_UMSK, 0xff);
3661 	else {
3662 		tmp |= RB_NO_UPDATE;
3663 		WREG32(SCRATCH_UMSK, 0);
3664 	}
3665 
3666 	mdelay(1);
3667 	WREG32(CP_RB0_CNTL, tmp);
3668 
3669 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3670 
3671 	/* ring1  - compute only */
3672 	/* Set ring buffer size */
3673 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3674 	rb_bufsz = order_base_2(ring->ring_size / 8);
3675 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3676 #ifdef __BIG_ENDIAN
3677 	tmp |= BUF_SWAP_32BIT;
3678 #endif
3679 	WREG32(CP_RB1_CNTL, tmp);
3680 
3681 	/* Initialize the ring buffer's read and write pointers */
3682 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3683 	ring->wptr = 0;
3684 	WREG32(CP_RB1_WPTR, ring->wptr);
3685 
3686 	/* set the wb address whether it's enabled or not */
3687 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3688 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3689 
3690 	mdelay(1);
3691 	WREG32(CP_RB1_CNTL, tmp);
3692 
3693 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3694 
3695 	/* ring2 - compute only */
3696 	/* Set ring buffer size */
3697 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3698 	rb_bufsz = order_base_2(ring->ring_size / 8);
3699 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3700 #ifdef __BIG_ENDIAN
3701 	tmp |= BUF_SWAP_32BIT;
3702 #endif
3703 	WREG32(CP_RB2_CNTL, tmp);
3704 
3705 	/* Initialize the ring buffer's read and write pointers */
3706 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3707 	ring->wptr = 0;
3708 	WREG32(CP_RB2_WPTR, ring->wptr);
3709 
3710 	/* set the wb address whether it's enabled or not */
3711 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3712 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3713 
3714 	mdelay(1);
3715 	WREG32(CP_RB2_CNTL, tmp);
3716 
3717 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3718 
3719 	/* start the rings */
3720 	si_cp_start(rdev);
3721 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3722 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3723 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3724 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3725 	if (r) {
3726 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3727 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3728 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3729 		return r;
3730 	}
3731 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3732 	if (r) {
3733 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3734 	}
3735 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3736 	if (r) {
3737 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3738 	}
3739 
3740 	si_enable_gui_idle_interrupt(rdev, true);
3741 
3742 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3743 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3744 
3745 	return 0;
3746 }
3747 
3748 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3749 {
3750 	u32 reset_mask = 0;
3751 	u32 tmp;
3752 
3753 	/* GRBM_STATUS */
3754 	tmp = RREG32(GRBM_STATUS);
3755 	if (tmp & (PA_BUSY | SC_BUSY |
3756 		   BCI_BUSY | SX_BUSY |
3757 		   TA_BUSY | VGT_BUSY |
3758 		   DB_BUSY | CB_BUSY |
3759 		   GDS_BUSY | SPI_BUSY |
3760 		   IA_BUSY | IA_BUSY_NO_DMA))
3761 		reset_mask |= RADEON_RESET_GFX;
3762 
3763 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3764 		   CP_BUSY | CP_COHERENCY_BUSY))
3765 		reset_mask |= RADEON_RESET_CP;
3766 
3767 	if (tmp & GRBM_EE_BUSY)
3768 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3769 
3770 	/* GRBM_STATUS2 */
3771 	tmp = RREG32(GRBM_STATUS2);
3772 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3773 		reset_mask |= RADEON_RESET_RLC;
3774 
3775 	/* DMA_STATUS_REG 0 */
3776 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3777 	if (!(tmp & DMA_IDLE))
3778 		reset_mask |= RADEON_RESET_DMA;
3779 
3780 	/* DMA_STATUS_REG 1 */
3781 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3782 	if (!(tmp & DMA_IDLE))
3783 		reset_mask |= RADEON_RESET_DMA1;
3784 
3785 	/* SRBM_STATUS2 */
3786 	tmp = RREG32(SRBM_STATUS2);
3787 	if (tmp & DMA_BUSY)
3788 		reset_mask |= RADEON_RESET_DMA;
3789 
3790 	if (tmp & DMA1_BUSY)
3791 		reset_mask |= RADEON_RESET_DMA1;
3792 
3793 	/* SRBM_STATUS */
3794 	tmp = RREG32(SRBM_STATUS);
3795 
3796 	if (tmp & IH_BUSY)
3797 		reset_mask |= RADEON_RESET_IH;
3798 
3799 	if (tmp & SEM_BUSY)
3800 		reset_mask |= RADEON_RESET_SEM;
3801 
3802 	if (tmp & GRBM_RQ_PENDING)
3803 		reset_mask |= RADEON_RESET_GRBM;
3804 
3805 	if (tmp & VMC_BUSY)
3806 		reset_mask |= RADEON_RESET_VMC;
3807 
3808 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3809 		   MCC_BUSY | MCD_BUSY))
3810 		reset_mask |= RADEON_RESET_MC;
3811 
3812 	if (evergreen_is_display_hung(rdev))
3813 		reset_mask |= RADEON_RESET_DISPLAY;
3814 
3815 	/* VM_L2_STATUS */
3816 	tmp = RREG32(VM_L2_STATUS);
3817 	if (tmp & L2_BUSY)
3818 		reset_mask |= RADEON_RESET_VMC;
3819 
3820 	/* Skip MC reset as it's mostly likely not hung, just busy */
3821 	if (reset_mask & RADEON_RESET_MC) {
3822 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3823 		reset_mask &= ~RADEON_RESET_MC;
3824 	}
3825 
3826 	return reset_mask;
3827 }
3828 
3829 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3830 {
3831 	struct evergreen_mc_save save;
3832 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3833 	u32 tmp;
3834 
3835 	if (reset_mask == 0)
3836 		return;
3837 
3838 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3839 
3840 	evergreen_print_gpu_status_regs(rdev);
3841 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3842 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3843 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3844 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3845 
3846 	/* disable PG/CG */
3847 	si_fini_pg(rdev);
3848 	si_fini_cg(rdev);
3849 
3850 	/* stop the rlc */
3851 	si_rlc_stop(rdev);
3852 
3853 	/* Disable CP parsing/prefetching */
3854 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3855 
3856 	if (reset_mask & RADEON_RESET_DMA) {
3857 		/* dma0 */
3858 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3859 		tmp &= ~DMA_RB_ENABLE;
3860 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3861 	}
3862 	if (reset_mask & RADEON_RESET_DMA1) {
3863 		/* dma1 */
3864 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3865 		tmp &= ~DMA_RB_ENABLE;
3866 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3867 	}
3868 
3869 	udelay(50);
3870 
3871 	evergreen_mc_stop(rdev, &save);
3872 	if (evergreen_mc_wait_for_idle(rdev)) {
3873 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3874 	}
3875 
3876 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3877 		grbm_soft_reset = SOFT_RESET_CB |
3878 			SOFT_RESET_DB |
3879 			SOFT_RESET_GDS |
3880 			SOFT_RESET_PA |
3881 			SOFT_RESET_SC |
3882 			SOFT_RESET_BCI |
3883 			SOFT_RESET_SPI |
3884 			SOFT_RESET_SX |
3885 			SOFT_RESET_TC |
3886 			SOFT_RESET_TA |
3887 			SOFT_RESET_VGT |
3888 			SOFT_RESET_IA;
3889 	}
3890 
3891 	if (reset_mask & RADEON_RESET_CP) {
3892 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3893 
3894 		srbm_soft_reset |= SOFT_RESET_GRBM;
3895 	}
3896 
3897 	if (reset_mask & RADEON_RESET_DMA)
3898 		srbm_soft_reset |= SOFT_RESET_DMA;
3899 
3900 	if (reset_mask & RADEON_RESET_DMA1)
3901 		srbm_soft_reset |= SOFT_RESET_DMA1;
3902 
3903 	if (reset_mask & RADEON_RESET_DISPLAY)
3904 		srbm_soft_reset |= SOFT_RESET_DC;
3905 
3906 	if (reset_mask & RADEON_RESET_RLC)
3907 		grbm_soft_reset |= SOFT_RESET_RLC;
3908 
3909 	if (reset_mask & RADEON_RESET_SEM)
3910 		srbm_soft_reset |= SOFT_RESET_SEM;
3911 
3912 	if (reset_mask & RADEON_RESET_IH)
3913 		srbm_soft_reset |= SOFT_RESET_IH;
3914 
3915 	if (reset_mask & RADEON_RESET_GRBM)
3916 		srbm_soft_reset |= SOFT_RESET_GRBM;
3917 
3918 	if (reset_mask & RADEON_RESET_VMC)
3919 		srbm_soft_reset |= SOFT_RESET_VMC;
3920 
3921 	if (reset_mask & RADEON_RESET_MC)
3922 		srbm_soft_reset |= SOFT_RESET_MC;
3923 
3924 	if (grbm_soft_reset) {
3925 		tmp = RREG32(GRBM_SOFT_RESET);
3926 		tmp |= grbm_soft_reset;
3927 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3928 		WREG32(GRBM_SOFT_RESET, tmp);
3929 		tmp = RREG32(GRBM_SOFT_RESET);
3930 
3931 		udelay(50);
3932 
3933 		tmp &= ~grbm_soft_reset;
3934 		WREG32(GRBM_SOFT_RESET, tmp);
3935 		tmp = RREG32(GRBM_SOFT_RESET);
3936 	}
3937 
3938 	if (srbm_soft_reset) {
3939 		tmp = RREG32(SRBM_SOFT_RESET);
3940 		tmp |= srbm_soft_reset;
3941 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3942 		WREG32(SRBM_SOFT_RESET, tmp);
3943 		tmp = RREG32(SRBM_SOFT_RESET);
3944 
3945 		udelay(50);
3946 
3947 		tmp &= ~srbm_soft_reset;
3948 		WREG32(SRBM_SOFT_RESET, tmp);
3949 		tmp = RREG32(SRBM_SOFT_RESET);
3950 	}
3951 
3952 	/* Wait a little for things to settle down */
3953 	udelay(50);
3954 
3955 	evergreen_mc_resume(rdev, &save);
3956 	udelay(50);
3957 
3958 	evergreen_print_gpu_status_regs(rdev);
3959 }
3960 
3961 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3962 {
3963 	u32 tmp, i;
3964 
3965 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3966 	tmp |= SPLL_BYPASS_EN;
3967 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3968 
3969 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3970 	tmp |= SPLL_CTLREQ_CHG;
3971 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3972 
3973 	for (i = 0; i < rdev->usec_timeout; i++) {
3974 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3975 			break;
3976 		udelay(1);
3977 	}
3978 
3979 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3980 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3981 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3982 
3983 	tmp = RREG32(MPLL_CNTL_MODE);
3984 	tmp &= ~MPLL_MCLK_SEL;
3985 	WREG32(MPLL_CNTL_MODE, tmp);
3986 }
3987 
3988 static void si_spll_powerdown(struct radeon_device *rdev)
3989 {
3990 	u32 tmp;
3991 
3992 	tmp = RREG32(SPLL_CNTL_MODE);
3993 	tmp |= SPLL_SW_DIR_CONTROL;
3994 	WREG32(SPLL_CNTL_MODE, tmp);
3995 
3996 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3997 	tmp |= SPLL_RESET;
3998 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3999 
4000 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4001 	tmp |= SPLL_SLEEP;
4002 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4003 
4004 	tmp = RREG32(SPLL_CNTL_MODE);
4005 	tmp &= ~SPLL_SW_DIR_CONTROL;
4006 	WREG32(SPLL_CNTL_MODE, tmp);
4007 }
4008 
4009 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4010 {
4011 	struct evergreen_mc_save save;
4012 	u32 tmp, i;
4013 
4014 	dev_info(rdev->dev, "GPU pci config reset\n");
4015 
4016 	/* disable dpm? */
4017 
4018 	/* disable cg/pg */
4019 	si_fini_pg(rdev);
4020 	si_fini_cg(rdev);
4021 
4022 	/* Disable CP parsing/prefetching */
4023 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4024 	/* dma0 */
4025 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4026 	tmp &= ~DMA_RB_ENABLE;
4027 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4028 	/* dma1 */
4029 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4030 	tmp &= ~DMA_RB_ENABLE;
4031 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4032 	/* XXX other engines? */
4033 
4034 	/* halt the rlc, disable cp internal ints */
4035 	si_rlc_stop(rdev);
4036 
4037 	udelay(50);
4038 
4039 	/* disable mem access */
4040 	evergreen_mc_stop(rdev, &save);
4041 	if (evergreen_mc_wait_for_idle(rdev)) {
4042 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4043 	}
4044 
4045 	/* set mclk/sclk to bypass */
4046 	si_set_clk_bypass_mode(rdev);
4047 	/* powerdown spll */
4048 	si_spll_powerdown(rdev);
4049 	/* disable BM */
4050 	pci_disable_busmaster(rdev->pdev->dev.bsddev);
4051 	/* reset */
4052 	radeon_pci_config_reset(rdev);
4053 	/* wait for asic to come out of reset */
4054 	for (i = 0; i < rdev->usec_timeout; i++) {
4055 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4056 			break;
4057 		udelay(1);
4058 	}
4059 }
4060 
4061 int si_asic_reset(struct radeon_device *rdev)
4062 {
4063 	u32 reset_mask;
4064 
4065 	reset_mask = si_gpu_check_soft_reset(rdev);
4066 
4067 	if (reset_mask)
4068 		r600_set_bios_scratch_engine_hung(rdev, true);
4069 
4070 	/* try soft reset */
4071 	si_gpu_soft_reset(rdev, reset_mask);
4072 
4073 	reset_mask = si_gpu_check_soft_reset(rdev);
4074 
4075 	/* try pci config reset */
4076 	if (reset_mask && radeon_hard_reset)
4077 		si_gpu_pci_config_reset(rdev);
4078 
4079 	reset_mask = si_gpu_check_soft_reset(rdev);
4080 
4081 	if (!reset_mask)
4082 		r600_set_bios_scratch_engine_hung(rdev, false);
4083 
4084 	return 0;
4085 }
4086 
4087 /**
4088  * si_gfx_is_lockup - Check if the GFX engine is locked up
4089  *
4090  * @rdev: radeon_device pointer
4091  * @ring: radeon_ring structure holding ring information
4092  *
4093  * Check if the GFX engine is locked up.
4094  * Returns true if the engine appears to be locked up, false if not.
4095  */
4096 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4097 {
4098 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4099 
4100 	if (!(reset_mask & (RADEON_RESET_GFX |
4101 			    RADEON_RESET_COMPUTE |
4102 			    RADEON_RESET_CP))) {
4103 		radeon_ring_lockup_update(rdev, ring);
4104 		return false;
4105 	}
4106 	return radeon_ring_test_lockup(rdev, ring);
4107 }
4108 
4109 /* MC */
4110 static void si_mc_program(struct radeon_device *rdev)
4111 {
4112 	struct evergreen_mc_save save;
4113 	u32 tmp;
4114 	int i, j;
4115 
4116 	/* Initialize HDP */
4117 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4118 		WREG32((0x2c14 + j), 0x00000000);
4119 		WREG32((0x2c18 + j), 0x00000000);
4120 		WREG32((0x2c1c + j), 0x00000000);
4121 		WREG32((0x2c20 + j), 0x00000000);
4122 		WREG32((0x2c24 + j), 0x00000000);
4123 	}
4124 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4125 
4126 	evergreen_mc_stop(rdev, &save);
4127 	if (radeon_mc_wait_for_idle(rdev)) {
4128 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4129 	}
4130 	if (!ASIC_IS_NODCE(rdev))
4131 		/* Lockout access through VGA aperture*/
4132 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4133 	/* Update configuration */
4134 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4135 	       rdev->mc.vram_start >> 12);
4136 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4137 	       rdev->mc.vram_end >> 12);
4138 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4139 	       rdev->vram_scratch.gpu_addr >> 12);
4140 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4141 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4142 	WREG32(MC_VM_FB_LOCATION, tmp);
4143 	/* XXX double check these! */
4144 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4145 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4146 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4147 	WREG32(MC_VM_AGP_BASE, 0);
4148 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4149 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4150 	if (radeon_mc_wait_for_idle(rdev)) {
4151 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4152 	}
4153 	evergreen_mc_resume(rdev, &save);
4154 	if (!ASIC_IS_NODCE(rdev)) {
4155 		/* we need to own VRAM, so turn off the VGA renderer here
4156 		 * to stop it overwriting our objects */
4157 		rv515_vga_render_disable(rdev);
4158 	}
4159 }
4160 
4161 void si_vram_gtt_location(struct radeon_device *rdev,
4162 			  struct radeon_mc *mc)
4163 {
4164 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4165 		/* leave room for at least 1024M GTT */
4166 		dev_warn(rdev->dev, "limiting VRAM\n");
4167 		mc->real_vram_size = 0xFFC0000000ULL;
4168 		mc->mc_vram_size = 0xFFC0000000ULL;
4169 	}
4170 	radeon_vram_location(rdev, &rdev->mc, 0);
4171 	rdev->mc.gtt_base_align = 0;
4172 	radeon_gtt_location(rdev, mc);
4173 }
4174 
4175 static int si_mc_init(struct radeon_device *rdev)
4176 {
4177 	u32 tmp;
4178 	int chansize, numchan;
4179 
4180 	/* Get VRAM informations */
4181 	rdev->mc.vram_is_ddr = true;
4182 	tmp = RREG32(MC_ARB_RAMCFG);
4183 	if (tmp & CHANSIZE_OVERRIDE) {
4184 		chansize = 16;
4185 	} else if (tmp & CHANSIZE_MASK) {
4186 		chansize = 64;
4187 	} else {
4188 		chansize = 32;
4189 	}
4190 	tmp = RREG32(MC_SHARED_CHMAP);
4191 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4192 	case 0:
4193 	default:
4194 		numchan = 1;
4195 		break;
4196 	case 1:
4197 		numchan = 2;
4198 		break;
4199 	case 2:
4200 		numchan = 4;
4201 		break;
4202 	case 3:
4203 		numchan = 8;
4204 		break;
4205 	case 4:
4206 		numchan = 3;
4207 		break;
4208 	case 5:
4209 		numchan = 6;
4210 		break;
4211 	case 6:
4212 		numchan = 10;
4213 		break;
4214 	case 7:
4215 		numchan = 12;
4216 		break;
4217 	case 8:
4218 		numchan = 16;
4219 		break;
4220 	}
4221 	rdev->mc.vram_width = numchan * chansize;
4222 	/* Could aper size report 0 ? */
4223 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4224 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4225 	/* size in MB on si */
4226 	tmp = RREG32(CONFIG_MEMSIZE);
4227 	/* some boards may have garbage in the upper 16 bits */
4228 	if (tmp & 0xffff0000) {
4229 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4230 		if (tmp & 0xffff)
4231 			tmp &= 0xffff;
4232 	}
4233 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4234 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4235 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4236 	si_vram_gtt_location(rdev, &rdev->mc);
4237 	radeon_update_bandwidth_info(rdev);
4238 
4239 	return 0;
4240 }
4241 
4242 /*
4243  * GART
4244  */
4245 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4246 {
4247 	/* flush hdp cache */
4248 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4249 
4250 	/* bits 0-15 are the VM contexts0-15 */
4251 	WREG32(VM_INVALIDATE_REQUEST, 1);
4252 }
4253 
4254 static int si_pcie_gart_enable(struct radeon_device *rdev)
4255 {
4256 	int r, i;
4257 
4258 	if (rdev->gart.robj == NULL) {
4259 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4260 		return -EINVAL;
4261 	}
4262 	r = radeon_gart_table_vram_pin(rdev);
4263 	if (r)
4264 		return r;
4265 	/* Setup TLB control */
4266 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4267 	       (0xA << 7) |
4268 	       ENABLE_L1_TLB |
4269 	       ENABLE_L1_FRAGMENT_PROCESSING |
4270 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4271 	       ENABLE_ADVANCED_DRIVER_MODEL |
4272 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4273 	/* Setup L2 cache */
4274 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4275 	       ENABLE_L2_FRAGMENT_PROCESSING |
4276 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4277 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4278 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4279 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4280 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4281 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4282 	       BANK_SELECT(4) |
4283 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4284 	/* setup context0 */
4285 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4286 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4287 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4288 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4289 			(u32)(rdev->dummy_page.addr >> 12));
4290 	WREG32(VM_CONTEXT0_CNTL2, 0);
4291 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4292 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4293 
4294 	WREG32(0x15D4, 0);
4295 	WREG32(0x15D8, 0);
4296 	WREG32(0x15DC, 0);
4297 
4298 	/* empty context1-15 */
4299 	/* set vm size, must be a multiple of 4 */
4300 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4301 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4302 	/* Assign the pt base to something valid for now; the pts used for
4303 	 * the VMs are determined by the application and setup and assigned
4304 	 * on the fly in the vm part of radeon_gart.c
4305 	 */
4306 	for (i = 1; i < 16; i++) {
4307 		if (i < 8)
4308 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4309 			       rdev->vm_manager.saved_table_addr[i]);
4310 		else
4311 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4312 			       rdev->vm_manager.saved_table_addr[i]);
4313 	}
4314 
4315 	/* enable context1-15 */
4316 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4317 	       (u32)(rdev->dummy_page.addr >> 12));
4318 	WREG32(VM_CONTEXT1_CNTL2, 4);
4319 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4320 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4321 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4322 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4323 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4324 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4325 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4326 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4327 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4328 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4329 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4330 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4331 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4332 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4333 
4334 	si_pcie_gart_tlb_flush(rdev);
4335 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4336 		 (unsigned)(rdev->mc.gtt_size >> 20),
4337 		 (unsigned long long)rdev->gart.table_addr);
4338 	rdev->gart.ready = true;
4339 	return 0;
4340 }
4341 
4342 static void si_pcie_gart_disable(struct radeon_device *rdev)
4343 {
4344 	unsigned i;
4345 
4346 	for (i = 1; i < 16; ++i) {
4347 		uint32_t reg;
4348 		if (i < 8)
4349 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4350 		else
4351 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4352 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4353 	}
4354 
4355 	/* Disable all tables */
4356 	WREG32(VM_CONTEXT0_CNTL, 0);
4357 	WREG32(VM_CONTEXT1_CNTL, 0);
4358 	/* Setup TLB control */
4359 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4360 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4361 	/* Setup L2 cache */
4362 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4363 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4364 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4365 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4366 	WREG32(VM_L2_CNTL2, 0);
4367 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4368 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4369 	radeon_gart_table_vram_unpin(rdev);
4370 }
4371 
4372 static void si_pcie_gart_fini(struct radeon_device *rdev)
4373 {
4374 	si_pcie_gart_disable(rdev);
4375 	radeon_gart_table_vram_free(rdev);
4376 	radeon_gart_fini(rdev);
4377 }
4378 
4379 /* vm parser */
4380 static bool si_vm_reg_valid(u32 reg)
4381 {
4382 	/* context regs are fine */
4383 	if (reg >= 0x28000)
4384 		return true;
4385 
4386 	/* check config regs */
4387 	switch (reg) {
4388 	case GRBM_GFX_INDEX:
4389 	case CP_STRMOUT_CNTL:
4390 	case VGT_VTX_VECT_EJECT_REG:
4391 	case VGT_CACHE_INVALIDATION:
4392 	case VGT_ESGS_RING_SIZE:
4393 	case VGT_GSVS_RING_SIZE:
4394 	case VGT_GS_VERTEX_REUSE:
4395 	case VGT_PRIMITIVE_TYPE:
4396 	case VGT_INDEX_TYPE:
4397 	case VGT_NUM_INDICES:
4398 	case VGT_NUM_INSTANCES:
4399 	case VGT_TF_RING_SIZE:
4400 	case VGT_HS_OFFCHIP_PARAM:
4401 	case VGT_TF_MEMORY_BASE:
4402 	case PA_CL_ENHANCE:
4403 	case PA_SU_LINE_STIPPLE_VALUE:
4404 	case PA_SC_LINE_STIPPLE_STATE:
4405 	case PA_SC_ENHANCE:
4406 	case SQC_CACHES:
4407 	case SPI_STATIC_THREAD_MGMT_1:
4408 	case SPI_STATIC_THREAD_MGMT_2:
4409 	case SPI_STATIC_THREAD_MGMT_3:
4410 	case SPI_PS_MAX_WAVE_ID:
4411 	case SPI_CONFIG_CNTL:
4412 	case SPI_CONFIG_CNTL_1:
4413 	case TA_CNTL_AUX:
4414 		return true;
4415 	default:
4416 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4417 		return false;
4418 	}
4419 }
4420 
4421 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4422 				  u32 *ib, struct radeon_cs_packet *pkt)
4423 {
4424 	switch (pkt->opcode) {
4425 	case PACKET3_NOP:
4426 	case PACKET3_SET_BASE:
4427 	case PACKET3_SET_CE_DE_COUNTERS:
4428 	case PACKET3_LOAD_CONST_RAM:
4429 	case PACKET3_WRITE_CONST_RAM:
4430 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4431 	case PACKET3_DUMP_CONST_RAM:
4432 	case PACKET3_INCREMENT_CE_COUNTER:
4433 	case PACKET3_WAIT_ON_DE_COUNTER:
4434 	case PACKET3_CE_WRITE:
4435 		break;
4436 	default:
4437 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4438 		return -EINVAL;
4439 	}
4440 	return 0;
4441 }
4442 
4443 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4444 {
4445 	u32 start_reg, reg, i;
4446 	u32 command = ib[idx + 4];
4447 	u32 info = ib[idx + 1];
4448 	u32 idx_value = ib[idx];
4449 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4450 		/* src address space is register */
4451 		if (((info & 0x60000000) >> 29) == 0) {
4452 			start_reg = idx_value << 2;
4453 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4454 				reg = start_reg;
4455 				if (!si_vm_reg_valid(reg)) {
4456 					DRM_ERROR("CP DMA Bad SRC register\n");
4457 					return -EINVAL;
4458 				}
4459 			} else {
4460 				for (i = 0; i < (command & 0x1fffff); i++) {
4461 					reg = start_reg + (4 * i);
4462 					if (!si_vm_reg_valid(reg)) {
4463 						DRM_ERROR("CP DMA Bad SRC register\n");
4464 						return -EINVAL;
4465 					}
4466 				}
4467 			}
4468 		}
4469 	}
4470 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4471 		/* dst address space is register */
4472 		if (((info & 0x00300000) >> 20) == 0) {
4473 			start_reg = ib[idx + 2];
4474 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4475 				reg = start_reg;
4476 				if (!si_vm_reg_valid(reg)) {
4477 					DRM_ERROR("CP DMA Bad DST register\n");
4478 					return -EINVAL;
4479 				}
4480 			} else {
4481 				for (i = 0; i < (command & 0x1fffff); i++) {
4482 					reg = start_reg + (4 * i);
4483 				if (!si_vm_reg_valid(reg)) {
4484 						DRM_ERROR("CP DMA Bad DST register\n");
4485 						return -EINVAL;
4486 					}
4487 				}
4488 			}
4489 		}
4490 	}
4491 	return 0;
4492 }
4493 
4494 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4495 				   u32 *ib, struct radeon_cs_packet *pkt)
4496 {
4497 	int r;
4498 	u32 idx = pkt->idx + 1;
4499 	u32 idx_value = ib[idx];
4500 	u32 start_reg, end_reg, reg, i;
4501 
4502 	switch (pkt->opcode) {
4503 	case PACKET3_NOP:
4504 	case PACKET3_SET_BASE:
4505 	case PACKET3_CLEAR_STATE:
4506 	case PACKET3_INDEX_BUFFER_SIZE:
4507 	case PACKET3_DISPATCH_DIRECT:
4508 	case PACKET3_DISPATCH_INDIRECT:
4509 	case PACKET3_ALLOC_GDS:
4510 	case PACKET3_WRITE_GDS_RAM:
4511 	case PACKET3_ATOMIC_GDS:
4512 	case PACKET3_ATOMIC:
4513 	case PACKET3_OCCLUSION_QUERY:
4514 	case PACKET3_SET_PREDICATION:
4515 	case PACKET3_COND_EXEC:
4516 	case PACKET3_PRED_EXEC:
4517 	case PACKET3_DRAW_INDIRECT:
4518 	case PACKET3_DRAW_INDEX_INDIRECT:
4519 	case PACKET3_INDEX_BASE:
4520 	case PACKET3_DRAW_INDEX_2:
4521 	case PACKET3_CONTEXT_CONTROL:
4522 	case PACKET3_INDEX_TYPE:
4523 	case PACKET3_DRAW_INDIRECT_MULTI:
4524 	case PACKET3_DRAW_INDEX_AUTO:
4525 	case PACKET3_DRAW_INDEX_IMMD:
4526 	case PACKET3_NUM_INSTANCES:
4527 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4528 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4529 	case PACKET3_DRAW_INDEX_OFFSET_2:
4530 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4531 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4532 	case PACKET3_MPEG_INDEX:
4533 	case PACKET3_WAIT_REG_MEM:
4534 	case PACKET3_MEM_WRITE:
4535 	case PACKET3_PFP_SYNC_ME:
4536 	case PACKET3_SURFACE_SYNC:
4537 	case PACKET3_EVENT_WRITE:
4538 	case PACKET3_EVENT_WRITE_EOP:
4539 	case PACKET3_EVENT_WRITE_EOS:
4540 	case PACKET3_SET_CONTEXT_REG:
4541 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4542 	case PACKET3_SET_SH_REG:
4543 	case PACKET3_SET_SH_REG_OFFSET:
4544 	case PACKET3_INCREMENT_DE_COUNTER:
4545 	case PACKET3_WAIT_ON_CE_COUNTER:
4546 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4547 	case PACKET3_ME_WRITE:
4548 		break;
4549 	case PACKET3_COPY_DATA:
4550 		if ((idx_value & 0xf00) == 0) {
4551 			reg = ib[idx + 3] * 4;
4552 			if (!si_vm_reg_valid(reg))
4553 				return -EINVAL;
4554 		}
4555 		break;
4556 	case PACKET3_WRITE_DATA:
4557 		if ((idx_value & 0xf00) == 0) {
4558 			start_reg = ib[idx + 1] * 4;
4559 			if (idx_value & 0x10000) {
4560 				if (!si_vm_reg_valid(start_reg))
4561 					return -EINVAL;
4562 			} else {
4563 				for (i = 0; i < (pkt->count - 2); i++) {
4564 					reg = start_reg + (4 * i);
4565 					if (!si_vm_reg_valid(reg))
4566 						return -EINVAL;
4567 				}
4568 			}
4569 		}
4570 		break;
4571 	case PACKET3_COND_WRITE:
4572 		if (idx_value & 0x100) {
4573 			reg = ib[idx + 5] * 4;
4574 			if (!si_vm_reg_valid(reg))
4575 				return -EINVAL;
4576 		}
4577 		break;
4578 	case PACKET3_COPY_DW:
4579 		if (idx_value & 0x2) {
4580 			reg = ib[idx + 3] * 4;
4581 			if (!si_vm_reg_valid(reg))
4582 				return -EINVAL;
4583 		}
4584 		break;
4585 	case PACKET3_SET_CONFIG_REG:
4586 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4587 		end_reg = 4 * pkt->count + start_reg - 4;
4588 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4589 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4590 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4591 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4592 			return -EINVAL;
4593 		}
4594 		for (i = 0; i < pkt->count; i++) {
4595 			reg = start_reg + (4 * i);
4596 			if (!si_vm_reg_valid(reg))
4597 				return -EINVAL;
4598 		}
4599 		break;
4600 	case PACKET3_CP_DMA:
4601 		r = si_vm_packet3_cp_dma_check(ib, idx);
4602 		if (r)
4603 			return r;
4604 		break;
4605 	default:
4606 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4607 		return -EINVAL;
4608 	}
4609 	return 0;
4610 }
4611 
4612 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4613 				       u32 *ib, struct radeon_cs_packet *pkt)
4614 {
4615 	int r;
4616 	u32 idx = pkt->idx + 1;
4617 	u32 idx_value = ib[idx];
4618 	u32 start_reg, reg, i;
4619 
4620 	switch (pkt->opcode) {
4621 	case PACKET3_NOP:
4622 	case PACKET3_SET_BASE:
4623 	case PACKET3_CLEAR_STATE:
4624 	case PACKET3_DISPATCH_DIRECT:
4625 	case PACKET3_DISPATCH_INDIRECT:
4626 	case PACKET3_ALLOC_GDS:
4627 	case PACKET3_WRITE_GDS_RAM:
4628 	case PACKET3_ATOMIC_GDS:
4629 	case PACKET3_ATOMIC:
4630 	case PACKET3_OCCLUSION_QUERY:
4631 	case PACKET3_SET_PREDICATION:
4632 	case PACKET3_COND_EXEC:
4633 	case PACKET3_PRED_EXEC:
4634 	case PACKET3_CONTEXT_CONTROL:
4635 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4636 	case PACKET3_WAIT_REG_MEM:
4637 	case PACKET3_MEM_WRITE:
4638 	case PACKET3_PFP_SYNC_ME:
4639 	case PACKET3_SURFACE_SYNC:
4640 	case PACKET3_EVENT_WRITE:
4641 	case PACKET3_EVENT_WRITE_EOP:
4642 	case PACKET3_EVENT_WRITE_EOS:
4643 	case PACKET3_SET_CONTEXT_REG:
4644 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4645 	case PACKET3_SET_SH_REG:
4646 	case PACKET3_SET_SH_REG_OFFSET:
4647 	case PACKET3_INCREMENT_DE_COUNTER:
4648 	case PACKET3_WAIT_ON_CE_COUNTER:
4649 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4650 	case PACKET3_ME_WRITE:
4651 		break;
4652 	case PACKET3_COPY_DATA:
4653 		if ((idx_value & 0xf00) == 0) {
4654 			reg = ib[idx + 3] * 4;
4655 			if (!si_vm_reg_valid(reg))
4656 				return -EINVAL;
4657 		}
4658 		break;
4659 	case PACKET3_WRITE_DATA:
4660 		if ((idx_value & 0xf00) == 0) {
4661 			start_reg = ib[idx + 1] * 4;
4662 			if (idx_value & 0x10000) {
4663 				if (!si_vm_reg_valid(start_reg))
4664 					return -EINVAL;
4665 			} else {
4666 				for (i = 0; i < (pkt->count - 2); i++) {
4667 					reg = start_reg + (4 * i);
4668 					if (!si_vm_reg_valid(reg))
4669 						return -EINVAL;
4670 				}
4671 			}
4672 		}
4673 		break;
4674 	case PACKET3_COND_WRITE:
4675 		if (idx_value & 0x100) {
4676 			reg = ib[idx + 5] * 4;
4677 			if (!si_vm_reg_valid(reg))
4678 				return -EINVAL;
4679 		}
4680 		break;
4681 	case PACKET3_COPY_DW:
4682 		if (idx_value & 0x2) {
4683 			reg = ib[idx + 3] * 4;
4684 			if (!si_vm_reg_valid(reg))
4685 				return -EINVAL;
4686 		}
4687 		break;
4688 	case PACKET3_CP_DMA:
4689 		r = si_vm_packet3_cp_dma_check(ib, idx);
4690 		if (r)
4691 			return r;
4692 		break;
4693 	default:
4694 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4695 		return -EINVAL;
4696 	}
4697 	return 0;
4698 }
4699 
4700 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4701 {
4702 	int ret = 0;
4703 	u32 idx = 0, i;
4704 	struct radeon_cs_packet pkt;
4705 
4706 	do {
4707 		pkt.idx = idx;
4708 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4709 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4710 		pkt.one_reg_wr = 0;
4711 		switch (pkt.type) {
4712 		case RADEON_PACKET_TYPE0:
4713 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4714 			for (i = 0; i < ib->length_dw; i++) {
4715 				if (i == idx)
4716 					printk("\t0x%08x <---\n", ib->ptr[i]);
4717 				else
4718 					printk("\t0x%08x\n", ib->ptr[i]);
4719 			}
4720 			ret = -EINVAL;
4721 			break;
4722 		case RADEON_PACKET_TYPE2:
4723 			idx += 1;
4724 			break;
4725 		case RADEON_PACKET_TYPE3:
4726 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4727 			if (ib->is_const_ib)
4728 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4729 			else {
4730 				switch (ib->ring) {
4731 				case RADEON_RING_TYPE_GFX_INDEX:
4732 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4733 					break;
4734 				case CAYMAN_RING_TYPE_CP1_INDEX:
4735 				case CAYMAN_RING_TYPE_CP2_INDEX:
4736 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4737 					break;
4738 				default:
4739 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4740 					ret = -EINVAL;
4741 					break;
4742 				}
4743 			}
4744 			idx += pkt.count + 2;
4745 			break;
4746 		default:
4747 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4748 			ret = -EINVAL;
4749 			break;
4750 		}
4751 		if (ret)
4752 			break;
4753 	} while (idx < ib->length_dw);
4754 
4755 	return ret;
4756 }
4757 
4758 /*
4759  * vm
4760  */
4761 int si_vm_init(struct radeon_device *rdev)
4762 {
4763 	/* number of VMs */
4764 	rdev->vm_manager.nvm = 16;
4765 	/* base offset of vram pages */
4766 	rdev->vm_manager.vram_base_offset = 0;
4767 
4768 	return 0;
4769 }
4770 
4771 void si_vm_fini(struct radeon_device *rdev)
4772 {
4773 }
4774 
4775 /**
4776  * si_vm_decode_fault - print human readable fault info
4777  *
4778  * @rdev: radeon_device pointer
4779  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4780  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4781  *
4782  * Print human readable fault information (SI).
4783  */
4784 static void si_vm_decode_fault(struct radeon_device *rdev,
4785 			       u32 status, u32 addr)
4786 {
4787 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4788 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4789 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4790 	char *block;
4791 
4792 	if (rdev->family == CHIP_TAHITI) {
4793 		switch (mc_id) {
4794 		case 160:
4795 		case 144:
4796 		case 96:
4797 		case 80:
4798 		case 224:
4799 		case 208:
4800 		case 32:
4801 		case 16:
4802 			block = "CB";
4803 			break;
4804 		case 161:
4805 		case 145:
4806 		case 97:
4807 		case 81:
4808 		case 225:
4809 		case 209:
4810 		case 33:
4811 		case 17:
4812 			block = "CB_FMASK";
4813 			break;
4814 		case 162:
4815 		case 146:
4816 		case 98:
4817 		case 82:
4818 		case 226:
4819 		case 210:
4820 		case 34:
4821 		case 18:
4822 			block = "CB_CMASK";
4823 			break;
4824 		case 163:
4825 		case 147:
4826 		case 99:
4827 		case 83:
4828 		case 227:
4829 		case 211:
4830 		case 35:
4831 		case 19:
4832 			block = "CB_IMMED";
4833 			break;
4834 		case 164:
4835 		case 148:
4836 		case 100:
4837 		case 84:
4838 		case 228:
4839 		case 212:
4840 		case 36:
4841 		case 20:
4842 			block = "DB";
4843 			break;
4844 		case 165:
4845 		case 149:
4846 		case 101:
4847 		case 85:
4848 		case 229:
4849 		case 213:
4850 		case 37:
4851 		case 21:
4852 			block = "DB_HTILE";
4853 			break;
4854 		case 167:
4855 		case 151:
4856 		case 103:
4857 		case 87:
4858 		case 231:
4859 		case 215:
4860 		case 39:
4861 		case 23:
4862 			block = "DB_STEN";
4863 			break;
4864 		case 72:
4865 		case 68:
4866 		case 64:
4867 		case 8:
4868 		case 4:
4869 		case 0:
4870 		case 136:
4871 		case 132:
4872 		case 128:
4873 		case 200:
4874 		case 196:
4875 		case 192:
4876 			block = "TC";
4877 			break;
4878 		case 112:
4879 		case 48:
4880 			block = "CP";
4881 			break;
4882 		case 49:
4883 		case 177:
4884 		case 50:
4885 		case 178:
4886 			block = "SH";
4887 			break;
4888 		case 53:
4889 		case 190:
4890 			block = "VGT";
4891 			break;
4892 		case 117:
4893 			block = "IH";
4894 			break;
4895 		case 51:
4896 		case 115:
4897 			block = "RLC";
4898 			break;
4899 		case 119:
4900 		case 183:
4901 			block = "DMA0";
4902 			break;
4903 		case 61:
4904 			block = "DMA1";
4905 			break;
4906 		case 248:
4907 		case 120:
4908 			block = "HDP";
4909 			break;
4910 		default:
4911 			block = "unknown";
4912 			break;
4913 		}
4914 	} else {
4915 		switch (mc_id) {
4916 		case 32:
4917 		case 16:
4918 		case 96:
4919 		case 80:
4920 		case 160:
4921 		case 144:
4922 		case 224:
4923 		case 208:
4924 			block = "CB";
4925 			break;
4926 		case 33:
4927 		case 17:
4928 		case 97:
4929 		case 81:
4930 		case 161:
4931 		case 145:
4932 		case 225:
4933 		case 209:
4934 			block = "CB_FMASK";
4935 			break;
4936 		case 34:
4937 		case 18:
4938 		case 98:
4939 		case 82:
4940 		case 162:
4941 		case 146:
4942 		case 226:
4943 		case 210:
4944 			block = "CB_CMASK";
4945 			break;
4946 		case 35:
4947 		case 19:
4948 		case 99:
4949 		case 83:
4950 		case 163:
4951 		case 147:
4952 		case 227:
4953 		case 211:
4954 			block = "CB_IMMED";
4955 			break;
4956 		case 36:
4957 		case 20:
4958 		case 100:
4959 		case 84:
4960 		case 164:
4961 		case 148:
4962 		case 228:
4963 		case 212:
4964 			block = "DB";
4965 			break;
4966 		case 37:
4967 		case 21:
4968 		case 101:
4969 		case 85:
4970 		case 165:
4971 		case 149:
4972 		case 229:
4973 		case 213:
4974 			block = "DB_HTILE";
4975 			break;
4976 		case 39:
4977 		case 23:
4978 		case 103:
4979 		case 87:
4980 		case 167:
4981 		case 151:
4982 		case 231:
4983 		case 215:
4984 			block = "DB_STEN";
4985 			break;
4986 		case 72:
4987 		case 68:
4988 		case 8:
4989 		case 4:
4990 		case 136:
4991 		case 132:
4992 		case 200:
4993 		case 196:
4994 			block = "TC";
4995 			break;
4996 		case 112:
4997 		case 48:
4998 			block = "CP";
4999 			break;
5000 		case 49:
5001 		case 177:
5002 		case 50:
5003 		case 178:
5004 			block = "SH";
5005 			break;
5006 		case 53:
5007 			block = "VGT";
5008 			break;
5009 		case 117:
5010 			block = "IH";
5011 			break;
5012 		case 51:
5013 		case 115:
5014 			block = "RLC";
5015 			break;
5016 		case 119:
5017 		case 183:
5018 			block = "DMA0";
5019 			break;
5020 		case 61:
5021 			block = "DMA1";
5022 			break;
5023 		case 248:
5024 		case 120:
5025 			block = "HDP";
5026 			break;
5027 		default:
5028 			block = "unknown";
5029 			break;
5030 		}
5031 	}
5032 
5033 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5034 	       protections, vmid, addr,
5035 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5036 	       block, mc_id);
5037 }
5038 
5039 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5040 {
5041 	struct radeon_ring *ring = &rdev->ring[ridx];
5042 
5043 	if (vm == NULL)
5044 		return;
5045 
5046 	/* write new base address */
5047 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5048 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5049 				 WRITE_DATA_DST_SEL(0)));
5050 
5051 	if (vm->id < 8) {
5052 		radeon_ring_write(ring,
5053 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5054 	} else {
5055 		radeon_ring_write(ring,
5056 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5057 	}
5058 	radeon_ring_write(ring, 0);
5059 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5060 
5061 	/* flush hdp cache */
5062 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5063 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5064 				 WRITE_DATA_DST_SEL(0)));
5065 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5066 	radeon_ring_write(ring, 0);
5067 	radeon_ring_write(ring, 0x1);
5068 
5069 	/* bits 0-15 are the VM contexts0-15 */
5070 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5071 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5072 				 WRITE_DATA_DST_SEL(0)));
5073 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5074 	radeon_ring_write(ring, 0);
5075 	radeon_ring_write(ring, 1 << vm->id);
5076 
5077 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5078 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5079 	radeon_ring_write(ring, 0x0);
5080 }
5081 
5082 /*
5083  *  Power and clock gating
5084  */
5085 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5086 {
5087 	int i;
5088 
5089 	for (i = 0; i < rdev->usec_timeout; i++) {
5090 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5091 			break;
5092 		udelay(1);
5093 	}
5094 
5095 	for (i = 0; i < rdev->usec_timeout; i++) {
5096 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5097 			break;
5098 		udelay(1);
5099 	}
5100 }
5101 
5102 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5103 					 bool enable)
5104 {
5105 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5106 	u32 mask;
5107 	int i;
5108 
5109 	if (enable)
5110 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5111 	else
5112 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5113 	WREG32(CP_INT_CNTL_RING0, tmp);
5114 
5115 	if (!enable) {
5116 		/* read a gfx register */
5117 		tmp = RREG32(DB_DEPTH_INFO);
5118 
5119 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5120 		for (i = 0; i < rdev->usec_timeout; i++) {
5121 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5122 				break;
5123 			udelay(1);
5124 		}
5125 	}
5126 }
5127 
5128 static void si_set_uvd_dcm(struct radeon_device *rdev,
5129 			   bool sw_mode)
5130 {
5131 	u32 tmp, tmp2;
5132 
5133 	tmp = RREG32(UVD_CGC_CTRL);
5134 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5135 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5136 
5137 	if (sw_mode) {
5138 		tmp &= ~0x7ffff800;
5139 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5140 	} else {
5141 		tmp |= 0x7ffff800;
5142 		tmp2 = 0;
5143 	}
5144 
5145 	WREG32(UVD_CGC_CTRL, tmp);
5146 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5147 }
5148 
5149 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5150 {
5151 	bool hw_mode = true;
5152 
5153 	if (hw_mode) {
5154 		si_set_uvd_dcm(rdev, false);
5155 	} else {
5156 		u32 tmp = RREG32(UVD_CGC_CTRL);
5157 		tmp &= ~DCM;
5158 		WREG32(UVD_CGC_CTRL, tmp);
5159 	}
5160 }
5161 
5162 static u32 si_halt_rlc(struct radeon_device *rdev)
5163 {
5164 	u32 data, orig;
5165 
5166 	orig = data = RREG32(RLC_CNTL);
5167 
5168 	if (data & RLC_ENABLE) {
5169 		data &= ~RLC_ENABLE;
5170 		WREG32(RLC_CNTL, data);
5171 
5172 		si_wait_for_rlc_serdes(rdev);
5173 	}
5174 
5175 	return orig;
5176 }
5177 
5178 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5179 {
5180 	u32 tmp;
5181 
5182 	tmp = RREG32(RLC_CNTL);
5183 	if (tmp != rlc)
5184 		WREG32(RLC_CNTL, rlc);
5185 }
5186 
5187 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5188 {
5189 	u32 data, orig;
5190 
5191 	orig = data = RREG32(DMA_PG);
5192 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5193 		data |= PG_CNTL_ENABLE;
5194 	else
5195 		data &= ~PG_CNTL_ENABLE;
5196 	if (orig != data)
5197 		WREG32(DMA_PG, data);
5198 }
5199 
5200 static void si_init_dma_pg(struct radeon_device *rdev)
5201 {
5202 	u32 tmp;
5203 
5204 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5205 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5206 
5207 	for (tmp = 0; tmp < 5; tmp++)
5208 		WREG32(DMA_PGFSM_WRITE, 0);
5209 }
5210 
5211 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5212 			       bool enable)
5213 {
5214 	u32 tmp;
5215 
5216 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5217 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5218 		WREG32(RLC_TTOP_D, tmp);
5219 
5220 		tmp = RREG32(RLC_PG_CNTL);
5221 		tmp |= GFX_PG_ENABLE;
5222 		WREG32(RLC_PG_CNTL, tmp);
5223 
5224 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5225 		tmp |= AUTO_PG_EN;
5226 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5227 	} else {
5228 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5229 		tmp &= ~AUTO_PG_EN;
5230 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5231 
5232 		tmp = RREG32(DB_RENDER_CONTROL);
5233 	}
5234 }
5235 
5236 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5237 {
5238 	u32 tmp;
5239 
5240 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5241 
5242 	tmp = RREG32(RLC_PG_CNTL);
5243 	tmp |= GFX_PG_SRC;
5244 	WREG32(RLC_PG_CNTL, tmp);
5245 
5246 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5247 
5248 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5249 
5250 	tmp &= ~GRBM_REG_SGIT_MASK;
5251 	tmp |= GRBM_REG_SGIT(0x700);
5252 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5253 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5254 }
5255 
5256 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5257 {
5258 	u32 mask = 0, tmp, tmp1;
5259 	int i;
5260 
5261 	si_select_se_sh(rdev, se, sh);
5262 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5263 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5264 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5265 
5266 	tmp &= 0xffff0000;
5267 
5268 	tmp |= tmp1;
5269 	tmp >>= 16;
5270 
5271 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5272 		mask <<= 1;
5273 		mask |= 1;
5274 	}
5275 
5276 	return (~tmp) & mask;
5277 }
5278 
5279 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5280 {
5281 	u32 i, j, k, active_cu_number = 0;
5282 	u32 mask, counter, cu_bitmap;
5283 	u32 tmp = 0;
5284 
5285 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5286 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5287 			mask = 1;
5288 			cu_bitmap = 0;
5289 			counter  = 0;
5290 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5291 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5292 					if (counter < 2)
5293 						cu_bitmap |= mask;
5294 					counter++;
5295 				}
5296 				mask <<= 1;
5297 			}
5298 
5299 			active_cu_number += counter;
5300 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5301 		}
5302 	}
5303 
5304 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5305 
5306 	tmp = RREG32(RLC_MAX_PG_CU);
5307 	tmp &= ~MAX_PU_CU_MASK;
5308 	tmp |= MAX_PU_CU(active_cu_number);
5309 	WREG32(RLC_MAX_PG_CU, tmp);
5310 }
5311 
5312 static void si_enable_cgcg(struct radeon_device *rdev,
5313 			   bool enable)
5314 {
5315 	u32 data, orig, tmp;
5316 
5317 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5318 
5319 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5320 		si_enable_gui_idle_interrupt(rdev, true);
5321 
5322 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5323 
5324 		tmp = si_halt_rlc(rdev);
5325 
5326 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5327 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5328 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5329 
5330 		si_wait_for_rlc_serdes(rdev);
5331 
5332 		si_update_rlc(rdev, tmp);
5333 
5334 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5335 
5336 		data |= CGCG_EN | CGLS_EN;
5337 	} else {
5338 		si_enable_gui_idle_interrupt(rdev, false);
5339 
5340 		RREG32(CB_CGTT_SCLK_CTRL);
5341 		RREG32(CB_CGTT_SCLK_CTRL);
5342 		RREG32(CB_CGTT_SCLK_CTRL);
5343 		RREG32(CB_CGTT_SCLK_CTRL);
5344 
5345 		data &= ~(CGCG_EN | CGLS_EN);
5346 	}
5347 
5348 	if (orig != data)
5349 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5350 }
5351 
5352 static void si_enable_mgcg(struct radeon_device *rdev,
5353 			   bool enable)
5354 {
5355 	u32 data, orig, tmp = 0;
5356 
5357 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5358 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5359 		data = 0x96940200;
5360 		if (orig != data)
5361 			WREG32(CGTS_SM_CTRL_REG, data);
5362 
5363 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5364 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5365 			data |= CP_MEM_LS_EN;
5366 			if (orig != data)
5367 				WREG32(CP_MEM_SLP_CNTL, data);
5368 		}
5369 
5370 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5371 		data &= 0xffffffc0;
5372 		if (orig != data)
5373 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5374 
5375 		tmp = si_halt_rlc(rdev);
5376 
5377 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5378 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5379 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5380 
5381 		si_update_rlc(rdev, tmp);
5382 	} else {
5383 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5384 		data |= 0x00000003;
5385 		if (orig != data)
5386 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5387 
5388 		data = RREG32(CP_MEM_SLP_CNTL);
5389 		if (data & CP_MEM_LS_EN) {
5390 			data &= ~CP_MEM_LS_EN;
5391 			WREG32(CP_MEM_SLP_CNTL, data);
5392 		}
5393 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5394 		data |= LS_OVERRIDE | OVERRIDE;
5395 		if (orig != data)
5396 			WREG32(CGTS_SM_CTRL_REG, data);
5397 
5398 		tmp = si_halt_rlc(rdev);
5399 
5400 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5401 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5402 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5403 
5404 		si_update_rlc(rdev, tmp);
5405 	}
5406 }
5407 
5408 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5409 			       bool enable)
5410 {
5411 	u32 orig, data, tmp;
5412 
5413 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5414 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5415 		tmp |= 0x3fff;
5416 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5417 
5418 		orig = data = RREG32(UVD_CGC_CTRL);
5419 		data |= DCM;
5420 		if (orig != data)
5421 			WREG32(UVD_CGC_CTRL, data);
5422 
5423 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5424 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5425 	} else {
5426 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5427 		tmp &= ~0x3fff;
5428 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5429 
5430 		orig = data = RREG32(UVD_CGC_CTRL);
5431 		data &= ~DCM;
5432 		if (orig != data)
5433 			WREG32(UVD_CGC_CTRL, data);
5434 
5435 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5436 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5437 	}
5438 }
5439 
5440 static const u32 mc_cg_registers[] =
5441 {
5442 	MC_HUB_MISC_HUB_CG,
5443 	MC_HUB_MISC_SIP_CG,
5444 	MC_HUB_MISC_VM_CG,
5445 	MC_XPB_CLK_GAT,
5446 	ATC_MISC_CG,
5447 	MC_CITF_MISC_WR_CG,
5448 	MC_CITF_MISC_RD_CG,
5449 	MC_CITF_MISC_VM_CG,
5450 	VM_L2_CG,
5451 };
5452 
5453 static void si_enable_mc_ls(struct radeon_device *rdev,
5454 			    bool enable)
5455 {
5456 	int i;
5457 	u32 orig, data;
5458 
5459 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5460 		orig = data = RREG32(mc_cg_registers[i]);
5461 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5462 			data |= MC_LS_ENABLE;
5463 		else
5464 			data &= ~MC_LS_ENABLE;
5465 		if (data != orig)
5466 			WREG32(mc_cg_registers[i], data);
5467 	}
5468 }
5469 
5470 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5471 			       bool enable)
5472 {
5473 	int i;
5474 	u32 orig, data;
5475 
5476 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5477 		orig = data = RREG32(mc_cg_registers[i]);
5478 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5479 			data |= MC_CG_ENABLE;
5480 		else
5481 			data &= ~MC_CG_ENABLE;
5482 		if (data != orig)
5483 			WREG32(mc_cg_registers[i], data);
5484 	}
5485 }
5486 
5487 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5488 			       bool enable)
5489 {
5490 	u32 orig, data, offset;
5491 	int i;
5492 
5493 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5494 		for (i = 0; i < 2; i++) {
5495 			if (i == 0)
5496 				offset = DMA0_REGISTER_OFFSET;
5497 			else
5498 				offset = DMA1_REGISTER_OFFSET;
5499 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5500 			data &= ~MEM_POWER_OVERRIDE;
5501 			if (data != orig)
5502 				WREG32(DMA_POWER_CNTL + offset, data);
5503 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5504 		}
5505 	} else {
5506 		for (i = 0; i < 2; i++) {
5507 			if (i == 0)
5508 				offset = DMA0_REGISTER_OFFSET;
5509 			else
5510 				offset = DMA1_REGISTER_OFFSET;
5511 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5512 			data |= MEM_POWER_OVERRIDE;
5513 			if (data != orig)
5514 				WREG32(DMA_POWER_CNTL + offset, data);
5515 
5516 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5517 			data = 0xff000000;
5518 			if (data != orig)
5519 				WREG32(DMA_CLK_CTRL + offset, data);
5520 		}
5521 	}
5522 }
5523 
5524 static void si_enable_bif_mgls(struct radeon_device *rdev,
5525 			       bool enable)
5526 {
5527 	u32 orig, data;
5528 
5529 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5530 
5531 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5532 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5533 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5534 	else
5535 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5536 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5537 
5538 	if (orig != data)
5539 		WREG32_PCIE(PCIE_CNTL2, data);
5540 }
5541 
5542 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5543 			       bool enable)
5544 {
5545 	u32 orig, data;
5546 
5547 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5548 
5549 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5550 		data &= ~CLOCK_GATING_DIS;
5551 	else
5552 		data |= CLOCK_GATING_DIS;
5553 
5554 	if (orig != data)
5555 		WREG32(HDP_HOST_PATH_CNTL, data);
5556 }
5557 
5558 static void si_enable_hdp_ls(struct radeon_device *rdev,
5559 			     bool enable)
5560 {
5561 	u32 orig, data;
5562 
5563 	orig = data = RREG32(HDP_MEM_POWER_LS);
5564 
5565 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5566 		data |= HDP_LS_ENABLE;
5567 	else
5568 		data &= ~HDP_LS_ENABLE;
5569 
5570 	if (orig != data)
5571 		WREG32(HDP_MEM_POWER_LS, data);
5572 }
5573 
5574 static void si_update_cg(struct radeon_device *rdev,
5575 			 u32 block, bool enable)
5576 {
5577 	if (block & RADEON_CG_BLOCK_GFX) {
5578 		si_enable_gui_idle_interrupt(rdev, false);
5579 		/* order matters! */
5580 		if (enable) {
5581 			si_enable_mgcg(rdev, true);
5582 			si_enable_cgcg(rdev, true);
5583 		} else {
5584 			si_enable_cgcg(rdev, false);
5585 			si_enable_mgcg(rdev, false);
5586 		}
5587 		si_enable_gui_idle_interrupt(rdev, true);
5588 	}
5589 
5590 	if (block & RADEON_CG_BLOCK_MC) {
5591 		si_enable_mc_mgcg(rdev, enable);
5592 		si_enable_mc_ls(rdev, enable);
5593 	}
5594 
5595 	if (block & RADEON_CG_BLOCK_SDMA) {
5596 		si_enable_dma_mgcg(rdev, enable);
5597 	}
5598 
5599 	if (block & RADEON_CG_BLOCK_BIF) {
5600 		si_enable_bif_mgls(rdev, enable);
5601 	}
5602 
5603 	if (block & RADEON_CG_BLOCK_UVD) {
5604 		if (rdev->has_uvd) {
5605 			si_enable_uvd_mgcg(rdev, enable);
5606 		}
5607 	}
5608 
5609 	if (block & RADEON_CG_BLOCK_HDP) {
5610 		si_enable_hdp_mgcg(rdev, enable);
5611 		si_enable_hdp_ls(rdev, enable);
5612 	}
5613 }
5614 
5615 static void si_init_cg(struct radeon_device *rdev)
5616 {
5617 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5618 			    RADEON_CG_BLOCK_MC |
5619 			    RADEON_CG_BLOCK_SDMA |
5620 			    RADEON_CG_BLOCK_BIF |
5621 			    RADEON_CG_BLOCK_HDP), true);
5622 	if (rdev->has_uvd) {
5623 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5624 		si_init_uvd_internal_cg(rdev);
5625 	}
5626 }
5627 
5628 static void si_fini_cg(struct radeon_device *rdev)
5629 {
5630 	if (rdev->has_uvd) {
5631 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5632 	}
5633 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5634 			    RADEON_CG_BLOCK_MC |
5635 			    RADEON_CG_BLOCK_SDMA |
5636 			    RADEON_CG_BLOCK_BIF |
5637 			    RADEON_CG_BLOCK_HDP), false);
5638 }
5639 
5640 u32 si_get_csb_size(struct radeon_device *rdev)
5641 {
5642 	u32 count = 0;
5643 	const struct cs_section_def *sect = NULL;
5644 	const struct cs_extent_def *ext = NULL;
5645 
5646 	if (rdev->rlc.cs_data == NULL)
5647 		return 0;
5648 
5649 	/* begin clear state */
5650 	count += 2;
5651 	/* context control state */
5652 	count += 3;
5653 
5654 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5655 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5656 			if (sect->id == SECT_CONTEXT)
5657 				count += 2 + ext->reg_count;
5658 			else
5659 				return 0;
5660 		}
5661 	}
5662 	/* pa_sc_raster_config */
5663 	count += 3;
5664 	/* end clear state */
5665 	count += 2;
5666 	/* clear state */
5667 	count += 2;
5668 
5669 	return count;
5670 }
5671 
5672 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5673 {
5674 	u32 count = 0, i;
5675 	const struct cs_section_def *sect = NULL;
5676 	const struct cs_extent_def *ext = NULL;
5677 
5678 	if (rdev->rlc.cs_data == NULL)
5679 		return;
5680 	if (buffer == NULL)
5681 		return;
5682 
5683 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5684 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5685 
5686 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5687 	buffer[count++] = cpu_to_le32(0x80000000);
5688 	buffer[count++] = cpu_to_le32(0x80000000);
5689 
5690 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5691 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5692 			if (sect->id == SECT_CONTEXT) {
5693 				buffer[count++] =
5694 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5695 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5696 				for (i = 0; i < ext->reg_count; i++)
5697 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5698 			} else {
5699 				return;
5700 			}
5701 		}
5702 	}
5703 
5704 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5705 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5706 	switch (rdev->family) {
5707 	case CHIP_TAHITI:
5708 	case CHIP_PITCAIRN:
5709 		buffer[count++] = cpu_to_le32(0x2a00126a);
5710 		break;
5711 	case CHIP_VERDE:
5712 		buffer[count++] = cpu_to_le32(0x0000124a);
5713 		break;
5714 	case CHIP_OLAND:
5715 		buffer[count++] = cpu_to_le32(0x00000082);
5716 		break;
5717 	case CHIP_HAINAN:
5718 		buffer[count++] = cpu_to_le32(0x00000000);
5719 		break;
5720 	default:
5721 		buffer[count++] = cpu_to_le32(0x00000000);
5722 		break;
5723 	}
5724 
5725 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5726 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5727 
5728 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5729 	buffer[count++] = cpu_to_le32(0);
5730 }
5731 
5732 static void si_init_pg(struct radeon_device *rdev)
5733 {
5734 	if (rdev->pg_flags) {
5735 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5736 			si_init_dma_pg(rdev);
5737 		}
5738 		si_init_ao_cu_mask(rdev);
5739 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5740 			si_init_gfx_cgpg(rdev);
5741 		} else {
5742 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5743 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5744 		}
5745 		si_enable_dma_pg(rdev, true);
5746 		si_enable_gfx_cgpg(rdev, true);
5747 	} else {
5748 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5749 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5750 	}
5751 }
5752 
5753 static void si_fini_pg(struct radeon_device *rdev)
5754 {
5755 	if (rdev->pg_flags) {
5756 		si_enable_dma_pg(rdev, false);
5757 		si_enable_gfx_cgpg(rdev, false);
5758 	}
5759 }
5760 
5761 /*
5762  * RLC
5763  */
5764 void si_rlc_reset(struct radeon_device *rdev)
5765 {
5766 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5767 
5768 	tmp |= SOFT_RESET_RLC;
5769 	WREG32(GRBM_SOFT_RESET, tmp);
5770 	udelay(50);
5771 	tmp &= ~SOFT_RESET_RLC;
5772 	WREG32(GRBM_SOFT_RESET, tmp);
5773 	udelay(50);
5774 }
5775 
5776 static void si_rlc_stop(struct radeon_device *rdev)
5777 {
5778 	WREG32(RLC_CNTL, 0);
5779 
5780 	si_enable_gui_idle_interrupt(rdev, false);
5781 
5782 	si_wait_for_rlc_serdes(rdev);
5783 }
5784 
5785 static void si_rlc_start(struct radeon_device *rdev)
5786 {
5787 	WREG32(RLC_CNTL, RLC_ENABLE);
5788 
5789 	si_enable_gui_idle_interrupt(rdev, true);
5790 
5791 	udelay(50);
5792 }
5793 
5794 static bool si_lbpw_supported(struct radeon_device *rdev)
5795 {
5796 	u32 tmp;
5797 
5798 	/* Enable LBPW only for DDR3 */
5799 	tmp = RREG32(MC_SEQ_MISC0);
5800 	if ((tmp & 0xF0000000) == 0xB0000000)
5801 		return true;
5802 	return false;
5803 }
5804 
5805 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5806 {
5807 	u32 tmp;
5808 
5809 	tmp = RREG32(RLC_LB_CNTL);
5810 	if (enable)
5811 		tmp |= LOAD_BALANCE_ENABLE;
5812 	else
5813 		tmp &= ~LOAD_BALANCE_ENABLE;
5814 	WREG32(RLC_LB_CNTL, tmp);
5815 
5816 	if (!enable) {
5817 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5818 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5819 	}
5820 }
5821 
5822 static int si_rlc_resume(struct radeon_device *rdev)
5823 {
5824 	u32 i;
5825 
5826 	if (!rdev->rlc_fw)
5827 		return -EINVAL;
5828 
5829 	si_rlc_stop(rdev);
5830 
5831 	si_rlc_reset(rdev);
5832 
5833 	si_init_pg(rdev);
5834 
5835 	si_init_cg(rdev);
5836 
5837 	WREG32(RLC_RL_BASE, 0);
5838 	WREG32(RLC_RL_SIZE, 0);
5839 	WREG32(RLC_LB_CNTL, 0);
5840 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5841 	WREG32(RLC_LB_CNTR_INIT, 0);
5842 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5843 
5844 	WREG32(RLC_MC_CNTL, 0);
5845 	WREG32(RLC_UCODE_CNTL, 0);
5846 
5847 	if (rdev->new_fw) {
5848 		const struct rlc_firmware_header_v1_0 *hdr =
5849 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5850 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5851 		const __le32 *fw_data = (const __le32 *)
5852 			((const char *)rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5853 
5854 		radeon_ucode_print_rlc_hdr(&hdr->header);
5855 
5856 		for (i = 0; i < fw_size; i++) {
5857 			WREG32(RLC_UCODE_ADDR, i);
5858 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5859 		}
5860 	} else {
5861 		const __be32 *fw_data =
5862 			(const __be32 *)rdev->rlc_fw->data;
5863 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5864 			WREG32(RLC_UCODE_ADDR, i);
5865 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5866 		}
5867 	}
5868 	WREG32(RLC_UCODE_ADDR, 0);
5869 
5870 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5871 
5872 	si_rlc_start(rdev);
5873 
5874 	return 0;
5875 }
5876 
5877 static void si_enable_interrupts(struct radeon_device *rdev)
5878 {
5879 	u32 ih_cntl = RREG32(IH_CNTL);
5880 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5881 
5882 	ih_cntl |= ENABLE_INTR;
5883 	ih_rb_cntl |= IH_RB_ENABLE;
5884 	WREG32(IH_CNTL, ih_cntl);
5885 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5886 	rdev->ih.enabled = true;
5887 }
5888 
5889 static void si_disable_interrupts(struct radeon_device *rdev)
5890 {
5891 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5892 	u32 ih_cntl = RREG32(IH_CNTL);
5893 
5894 	ih_rb_cntl &= ~IH_RB_ENABLE;
5895 	ih_cntl &= ~ENABLE_INTR;
5896 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5897 	WREG32(IH_CNTL, ih_cntl);
5898 	/* set rptr, wptr to 0 */
5899 	WREG32(IH_RB_RPTR, 0);
5900 	WREG32(IH_RB_WPTR, 0);
5901 	rdev->ih.enabled = false;
5902 	rdev->ih.rptr = 0;
5903 }
5904 
5905 static void si_disable_interrupt_state(struct radeon_device *rdev)
5906 {
5907 	u32 tmp;
5908 
5909 	tmp = RREG32(CP_INT_CNTL_RING0) &
5910 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5911 	WREG32(CP_INT_CNTL_RING0, tmp);
5912 	WREG32(CP_INT_CNTL_RING1, 0);
5913 	WREG32(CP_INT_CNTL_RING2, 0);
5914 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5915 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5916 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5917 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5918 	WREG32(GRBM_INT_CNTL, 0);
5919 	if (rdev->num_crtc >= 2) {
5920 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5921 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5922 	}
5923 	if (rdev->num_crtc >= 4) {
5924 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5925 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5926 	}
5927 	if (rdev->num_crtc >= 6) {
5928 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5929 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5930 	}
5931 
5932 	if (rdev->num_crtc >= 2) {
5933 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5934 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5935 	}
5936 	if (rdev->num_crtc >= 4) {
5937 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5938 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5939 	}
5940 	if (rdev->num_crtc >= 6) {
5941 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5942 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5943 	}
5944 
5945 	if (!ASIC_IS_NODCE(rdev)) {
5946 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5947 
5948 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5949 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5950 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5951 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5952 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5953 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5954 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5955 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5956 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5957 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5958 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5959 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5960 	}
5961 }
5962 
5963 static int si_irq_init(struct radeon_device *rdev)
5964 {
5965 	int ret = 0;
5966 	int rb_bufsz;
5967 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5968 
5969 	/* allocate ring */
5970 	ret = r600_ih_ring_alloc(rdev);
5971 	if (ret)
5972 		return ret;
5973 
5974 	/* disable irqs */
5975 	si_disable_interrupts(rdev);
5976 
5977 	/* init rlc */
5978 	ret = si_rlc_resume(rdev);
5979 	if (ret) {
5980 		r600_ih_ring_fini(rdev);
5981 		return ret;
5982 	}
5983 
5984 	/* setup interrupt control */
5985 	/* set dummy read address to ring address */
5986 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5987 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5988 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5989 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5990 	 */
5991 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5992 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5993 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5994 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5995 
5996 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5997 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5998 
5999 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6000 		      IH_WPTR_OVERFLOW_CLEAR |
6001 		      (rb_bufsz << 1));
6002 
6003 	if (rdev->wb.enabled)
6004 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6005 
6006 	/* set the writeback address whether it's enabled or not */
6007 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6008 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6009 
6010 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6011 
6012 	/* set rptr, wptr to 0 */
6013 	WREG32(IH_RB_RPTR, 0);
6014 	WREG32(IH_RB_WPTR, 0);
6015 
6016 	/* Default settings for IH_CNTL (disabled at first) */
6017 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6018 	/* RPTR_REARM only works if msi's are enabled */
6019 	if (rdev->msi_enabled)
6020 		ih_cntl |= RPTR_REARM;
6021 	WREG32(IH_CNTL, ih_cntl);
6022 
6023 	/* force the active interrupt state to all disabled */
6024 	si_disable_interrupt_state(rdev);
6025 
6026 	pci_enable_busmaster(rdev->dev->bsddev);
6027 
6028 	/* enable irqs */
6029 	si_enable_interrupts(rdev);
6030 
6031 	return ret;
6032 }
6033 
6034 int si_irq_set(struct radeon_device *rdev)
6035 {
6036 	u32 cp_int_cntl;
6037 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6038 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6039 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6040 	u32 grbm_int_cntl = 0;
6041 	u32 dma_cntl, dma_cntl1;
6042 	u32 thermal_int = 0;
6043 
6044 	if (!rdev->irq.installed) {
6045 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6046 		return -EINVAL;
6047 	}
6048 	/* don't enable anything if the ih is disabled */
6049 	if (!rdev->ih.enabled) {
6050 		si_disable_interrupts(rdev);
6051 		/* force the active interrupt state to all disabled */
6052 		si_disable_interrupt_state(rdev);
6053 		return 0;
6054 	}
6055 
6056 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6057 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6058 
6059 	if (!ASIC_IS_NODCE(rdev)) {
6060 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6061 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6062 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6063 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6064 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6065 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6066 	}
6067 
6068 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6069 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6070 
6071 	thermal_int = RREG32(CG_THERMAL_INT) &
6072 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6073 
6074 	/* enable CP interrupts on all rings */
6075 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6076 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6077 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6078 	}
6079 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6080 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6081 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6082 	}
6083 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6084 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6085 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6086 	}
6087 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6088 		DRM_DEBUG("si_irq_set: sw int dma\n");
6089 		dma_cntl |= TRAP_ENABLE;
6090 	}
6091 
6092 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6093 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6094 		dma_cntl1 |= TRAP_ENABLE;
6095 	}
6096 	if (rdev->irq.crtc_vblank_int[0] ||
6097 	    atomic_read(&rdev->irq.pflip[0])) {
6098 		DRM_DEBUG("si_irq_set: vblank 0\n");
6099 		crtc1 |= VBLANK_INT_MASK;
6100 	}
6101 	if (rdev->irq.crtc_vblank_int[1] ||
6102 	    atomic_read(&rdev->irq.pflip[1])) {
6103 		DRM_DEBUG("si_irq_set: vblank 1\n");
6104 		crtc2 |= VBLANK_INT_MASK;
6105 	}
6106 	if (rdev->irq.crtc_vblank_int[2] ||
6107 	    atomic_read(&rdev->irq.pflip[2])) {
6108 		DRM_DEBUG("si_irq_set: vblank 2\n");
6109 		crtc3 |= VBLANK_INT_MASK;
6110 	}
6111 	if (rdev->irq.crtc_vblank_int[3] ||
6112 	    atomic_read(&rdev->irq.pflip[3])) {
6113 		DRM_DEBUG("si_irq_set: vblank 3\n");
6114 		crtc4 |= VBLANK_INT_MASK;
6115 	}
6116 	if (rdev->irq.crtc_vblank_int[4] ||
6117 	    atomic_read(&rdev->irq.pflip[4])) {
6118 		DRM_DEBUG("si_irq_set: vblank 4\n");
6119 		crtc5 |= VBLANK_INT_MASK;
6120 	}
6121 	if (rdev->irq.crtc_vblank_int[5] ||
6122 	    atomic_read(&rdev->irq.pflip[5])) {
6123 		DRM_DEBUG("si_irq_set: vblank 5\n");
6124 		crtc6 |= VBLANK_INT_MASK;
6125 	}
6126 	if (rdev->irq.hpd[0]) {
6127 		DRM_DEBUG("si_irq_set: hpd 1\n");
6128 		hpd1 |= DC_HPDx_INT_EN;
6129 	}
6130 	if (rdev->irq.hpd[1]) {
6131 		DRM_DEBUG("si_irq_set: hpd 2\n");
6132 		hpd2 |= DC_HPDx_INT_EN;
6133 	}
6134 	if (rdev->irq.hpd[2]) {
6135 		DRM_DEBUG("si_irq_set: hpd 3\n");
6136 		hpd3 |= DC_HPDx_INT_EN;
6137 	}
6138 	if (rdev->irq.hpd[3]) {
6139 		DRM_DEBUG("si_irq_set: hpd 4\n");
6140 		hpd4 |= DC_HPDx_INT_EN;
6141 	}
6142 	if (rdev->irq.hpd[4]) {
6143 		DRM_DEBUG("si_irq_set: hpd 5\n");
6144 		hpd5 |= DC_HPDx_INT_EN;
6145 	}
6146 	if (rdev->irq.hpd[5]) {
6147 		DRM_DEBUG("si_irq_set: hpd 6\n");
6148 		hpd6 |= DC_HPDx_INT_EN;
6149 	}
6150 
6151 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6152 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6153 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6154 
6155 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6156 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6157 
6158 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6159 
6160 	if (rdev->irq.dpm_thermal) {
6161 		DRM_DEBUG("dpm thermal\n");
6162 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6163 	}
6164 
6165 	if (rdev->num_crtc >= 2) {
6166 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6167 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6168 	}
6169 	if (rdev->num_crtc >= 4) {
6170 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6171 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6172 	}
6173 	if (rdev->num_crtc >= 6) {
6174 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6175 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6176 	}
6177 
6178 	if (rdev->num_crtc >= 2) {
6179 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6180 		       GRPH_PFLIP_INT_MASK);
6181 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6182 		       GRPH_PFLIP_INT_MASK);
6183 	}
6184 	if (rdev->num_crtc >= 4) {
6185 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6186 		       GRPH_PFLIP_INT_MASK);
6187 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6188 		       GRPH_PFLIP_INT_MASK);
6189 	}
6190 	if (rdev->num_crtc >= 6) {
6191 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6192 		       GRPH_PFLIP_INT_MASK);
6193 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6194 		       GRPH_PFLIP_INT_MASK);
6195 	}
6196 
6197 	if (!ASIC_IS_NODCE(rdev)) {
6198 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6199 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6200 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6201 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6202 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6203 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6204 	}
6205 
6206 	WREG32(CG_THERMAL_INT, thermal_int);
6207 
6208 	return 0;
6209 }
6210 
6211 static inline void si_irq_ack(struct radeon_device *rdev)
6212 {
6213 	u32 tmp;
6214 
6215 	if (ASIC_IS_NODCE(rdev))
6216 		return;
6217 
6218 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6219 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6220 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6221 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6222 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6223 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6224 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6225 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6226 	if (rdev->num_crtc >= 4) {
6227 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6228 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6229 	}
6230 	if (rdev->num_crtc >= 6) {
6231 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6232 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6233 	}
6234 
6235 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6236 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6237 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6238 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6239 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6240 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6241 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6242 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6243 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6244 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6245 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6246 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6247 
6248 	if (rdev->num_crtc >= 4) {
6249 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6250 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6251 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6252 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6253 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6254 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6255 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6256 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6257 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6258 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6259 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6260 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6261 	}
6262 
6263 	if (rdev->num_crtc >= 6) {
6264 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6265 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6266 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6267 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6268 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6269 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6270 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6271 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6272 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6273 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6274 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6275 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6276 	}
6277 
6278 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6279 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6280 		tmp |= DC_HPDx_INT_ACK;
6281 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6282 	}
6283 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6284 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6285 		tmp |= DC_HPDx_INT_ACK;
6286 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6287 	}
6288 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6289 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6290 		tmp |= DC_HPDx_INT_ACK;
6291 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6292 	}
6293 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6294 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6295 		tmp |= DC_HPDx_INT_ACK;
6296 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6297 	}
6298 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6299 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6300 		tmp |= DC_HPDx_INT_ACK;
6301 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6302 	}
6303 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6304 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6305 		tmp |= DC_HPDx_INT_ACK;
6306 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6307 	}
6308 }
6309 
6310 static void si_irq_disable(struct radeon_device *rdev)
6311 {
6312 	si_disable_interrupts(rdev);
6313 	/* Wait and acknowledge irq */
6314 	mdelay(1);
6315 	si_irq_ack(rdev);
6316 	si_disable_interrupt_state(rdev);
6317 }
6318 
6319 static void si_irq_suspend(struct radeon_device *rdev)
6320 {
6321 	si_irq_disable(rdev);
6322 	si_rlc_stop(rdev);
6323 }
6324 
6325 static void si_irq_fini(struct radeon_device *rdev)
6326 {
6327 	si_irq_suspend(rdev);
6328 	r600_ih_ring_fini(rdev);
6329 }
6330 
6331 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6332 {
6333 	u32 wptr, tmp;
6334 
6335 	if (rdev->wb.enabled)
6336 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6337 	else
6338 		wptr = RREG32(IH_RB_WPTR);
6339 
6340 	if (wptr & RB_OVERFLOW) {
6341 		wptr &= ~RB_OVERFLOW;
6342 		/* When a ring buffer overflow happen start parsing interrupt
6343 		 * from the last not overwritten vector (wptr + 16). Hopefully
6344 		 * this should allow us to catchup.
6345 		 */
6346 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6347 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6348 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6349 		tmp = RREG32(IH_RB_CNTL);
6350 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6351 		WREG32(IH_RB_CNTL, tmp);
6352 	}
6353 	return (wptr & rdev->ih.ptr_mask);
6354 }
6355 
6356 /*        SI IV Ring
6357  * Each IV ring entry is 128 bits:
6358  * [7:0]    - interrupt source id
6359  * [31:8]   - reserved
6360  * [59:32]  - interrupt source data
6361  * [63:60]  - reserved
6362  * [71:64]  - RINGID
6363  * [79:72]  - VMID
6364  * [127:80] - reserved
6365  */
6366 irqreturn_t si_irq_process(struct radeon_device *rdev)
6367 {
6368 	u32 wptr;
6369 	u32 rptr;
6370 	u32 src_id, src_data, ring_id;
6371 	u32 ring_index;
6372 	bool queue_hotplug = false;
6373 	bool queue_thermal = false;
6374 	u32 status, addr;
6375 
6376 	if (!rdev->ih.enabled || rdev->shutdown)
6377 		return IRQ_NONE;
6378 
6379 	wptr = si_get_ih_wptr(rdev);
6380 
6381 restart_ih:
6382 	/* is somebody else already processing irqs? */
6383 	if (atomic_xchg(&rdev->ih.lock, 1))
6384 		return IRQ_NONE;
6385 
6386 	rptr = rdev->ih.rptr;
6387 	DRM_DEBUG_VBLANK("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6388 
6389 	/* Order reading of wptr vs. reading of IH ring data */
6390 	rmb();
6391 
6392 	/* display interrupts */
6393 	si_irq_ack(rdev);
6394 
6395 	while (rptr != wptr) {
6396 		/* wptr/rptr are in bytes! */
6397 		ring_index = rptr / 4;
6398 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6399 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6400 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6401 
6402 		switch (src_id) {
6403 		case 1: /* D1 vblank/vline */
6404 			switch (src_data) {
6405 			case 0: /* D1 vblank */
6406 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6407 					if (rdev->irq.crtc_vblank_int[0]) {
6408 						drm_handle_vblank(rdev->ddev, 0);
6409 						rdev->pm.vblank_sync = true;
6410 						wake_up(&rdev->irq.vblank_queue);
6411 					}
6412 					if (atomic_read(&rdev->irq.pflip[0]))
6413 						radeon_crtc_handle_vblank(rdev, 0);
6414 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6415 					DRM_DEBUG_VBLANK("IH: D1 vblank\n");
6416 				}
6417 				break;
6418 			case 1: /* D1 vline */
6419 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6420 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6421 					DRM_DEBUG_VBLANK("IH: D1 vline\n");
6422 				}
6423 				break;
6424 			default:
6425 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6426 				break;
6427 			}
6428 			break;
6429 		case 2: /* D2 vblank/vline */
6430 			switch (src_data) {
6431 			case 0: /* D2 vblank */
6432 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6433 					if (rdev->irq.crtc_vblank_int[1]) {
6434 						drm_handle_vblank(rdev->ddev, 1);
6435 						rdev->pm.vblank_sync = true;
6436 						wake_up(&rdev->irq.vblank_queue);
6437 					}
6438 					if (atomic_read(&rdev->irq.pflip[1]))
6439 						radeon_crtc_handle_vblank(rdev, 1);
6440 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6441 					DRM_DEBUG_VBLANK("IH: D2 vblank\n");
6442 				}
6443 				break;
6444 			case 1: /* D2 vline */
6445 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6446 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6447 					DRM_DEBUG_VBLANK("IH: D2 vline\n");
6448 				}
6449 				break;
6450 			default:
6451 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6452 				break;
6453 			}
6454 			break;
6455 		case 3: /* D3 vblank/vline */
6456 			switch (src_data) {
6457 			case 0: /* D3 vblank */
6458 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6459 					if (rdev->irq.crtc_vblank_int[2]) {
6460 						drm_handle_vblank(rdev->ddev, 2);
6461 						rdev->pm.vblank_sync = true;
6462 						wake_up(&rdev->irq.vblank_queue);
6463 					}
6464 					if (atomic_read(&rdev->irq.pflip[2]))
6465 						radeon_crtc_handle_vblank(rdev, 2);
6466 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6467 					DRM_DEBUG_VBLANK("IH: D3 vblank\n");
6468 				}
6469 				break;
6470 			case 1: /* D3 vline */
6471 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6472 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6473 					DRM_DEBUG_VBLANK("IH: D3 vline\n");
6474 				}
6475 				break;
6476 			default:
6477 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6478 				break;
6479 			}
6480 			break;
6481 		case 4: /* D4 vblank/vline */
6482 			switch (src_data) {
6483 			case 0: /* D4 vblank */
6484 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6485 					if (rdev->irq.crtc_vblank_int[3]) {
6486 						drm_handle_vblank(rdev->ddev, 3);
6487 						rdev->pm.vblank_sync = true;
6488 						wake_up(&rdev->irq.vblank_queue);
6489 					}
6490 					if (atomic_read(&rdev->irq.pflip[3]))
6491 						radeon_crtc_handle_vblank(rdev, 3);
6492 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6493 					DRM_DEBUG_VBLANK("IH: D4 vblank\n");
6494 				}
6495 				break;
6496 			case 1: /* D4 vline */
6497 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6498 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6499 					DRM_DEBUG_VBLANK("IH: D4 vline\n");
6500 				}
6501 				break;
6502 			default:
6503 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6504 				break;
6505 			}
6506 			break;
6507 		case 5: /* D5 vblank/vline */
6508 			switch (src_data) {
6509 			case 0: /* D5 vblank */
6510 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6511 					if (rdev->irq.crtc_vblank_int[4]) {
6512 						drm_handle_vblank(rdev->ddev, 4);
6513 						rdev->pm.vblank_sync = true;
6514 						wake_up(&rdev->irq.vblank_queue);
6515 					}
6516 					if (atomic_read(&rdev->irq.pflip[4]))
6517 						radeon_crtc_handle_vblank(rdev, 4);
6518 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6519 					DRM_DEBUG_VBLANK("IH: D5 vblank\n");
6520 				}
6521 				break;
6522 			case 1: /* D5 vline */
6523 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6524 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6525 					DRM_DEBUG_VBLANK("IH: D5 vline\n");
6526 				}
6527 				break;
6528 			default:
6529 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6530 				break;
6531 			}
6532 			break;
6533 		case 6: /* D6 vblank/vline */
6534 			switch (src_data) {
6535 			case 0: /* D6 vblank */
6536 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6537 					if (rdev->irq.crtc_vblank_int[5]) {
6538 						drm_handle_vblank(rdev->ddev, 5);
6539 						rdev->pm.vblank_sync = true;
6540 						wake_up(&rdev->irq.vblank_queue);
6541 					}
6542 					if (atomic_read(&rdev->irq.pflip[5]))
6543 						radeon_crtc_handle_vblank(rdev, 5);
6544 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6545 					DRM_DEBUG_VBLANK("IH: D6 vblank\n");
6546 				}
6547 				break;
6548 			case 1: /* D6 vline */
6549 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6550 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6551 					DRM_DEBUG_VBLANK("IH: D6 vline\n");
6552 				}
6553 				break;
6554 			default:
6555 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6556 				break;
6557 			}
6558 			break;
6559 		case 8: /* D1 page flip */
6560 		case 10: /* D2 page flip */
6561 		case 12: /* D3 page flip */
6562 		case 14: /* D4 page flip */
6563 		case 16: /* D5 page flip */
6564 		case 18: /* D6 page flip */
6565 			DRM_DEBUG_VBLANK("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6566 			if (radeon_use_pflipirq > 0)
6567 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6568 			break;
6569 		case 42: /* HPD hotplug */
6570 			switch (src_data) {
6571 			case 0:
6572 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6573 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6574 					queue_hotplug = true;
6575 					DRM_DEBUG("IH: HPD1\n");
6576 				}
6577 				break;
6578 			case 1:
6579 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6580 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6581 					queue_hotplug = true;
6582 					DRM_DEBUG("IH: HPD2\n");
6583 				}
6584 				break;
6585 			case 2:
6586 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6587 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6588 					queue_hotplug = true;
6589 					DRM_DEBUG("IH: HPD3\n");
6590 				}
6591 				break;
6592 			case 3:
6593 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6594 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6595 					queue_hotplug = true;
6596 					DRM_DEBUG("IH: HPD4\n");
6597 				}
6598 				break;
6599 			case 4:
6600 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6601 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6602 					queue_hotplug = true;
6603 					DRM_DEBUG("IH: HPD5\n");
6604 				}
6605 				break;
6606 			case 5:
6607 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6608 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6609 					queue_hotplug = true;
6610 					DRM_DEBUG("IH: HPD6\n");
6611 				}
6612 				break;
6613 			default:
6614 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6615 				break;
6616 			}
6617 			break;
6618 		case 124: /* UVD */
6619 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6620 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6621 			break;
6622 		case 146:
6623 		case 147:
6624 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6625 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6626 			/* reset addr and status */
6627 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6628 			if (addr == 0x0 && status == 0x0)
6629 				break;
6630 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6631 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6632 				addr);
6633 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6634 				status);
6635 			si_vm_decode_fault(rdev, status, addr);
6636 			break;
6637 		case 176: /* RINGID0 CP_INT */
6638 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6639 			break;
6640 		case 177: /* RINGID1 CP_INT */
6641 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6642 			break;
6643 		case 178: /* RINGID2 CP_INT */
6644 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6645 			break;
6646 		case 181: /* CP EOP event */
6647 			DRM_DEBUG("IH: CP EOP\n");
6648 			switch (ring_id) {
6649 			case 0:
6650 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6651 				break;
6652 			case 1:
6653 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6654 				break;
6655 			case 2:
6656 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6657 				break;
6658 			}
6659 			break;
6660 		case 224: /* DMA trap event */
6661 			DRM_DEBUG("IH: DMA trap\n");
6662 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6663 			break;
6664 		case 230: /* thermal low to high */
6665 			DRM_DEBUG("IH: thermal low to high\n");
6666 			rdev->pm.dpm.thermal.high_to_low = false;
6667 			queue_thermal = true;
6668 			break;
6669 		case 231: /* thermal high to low */
6670 			DRM_DEBUG("IH: thermal high to low\n");
6671 			rdev->pm.dpm.thermal.high_to_low = true;
6672 			queue_thermal = true;
6673 			break;
6674 		case 233: /* GUI IDLE */
6675 			DRM_DEBUG("IH: GUI idle\n");
6676 			break;
6677 		case 244: /* DMA trap event */
6678 			DRM_DEBUG("IH: DMA1 trap\n");
6679 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6680 			break;
6681 		default:
6682 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6683 			break;
6684 		}
6685 
6686 		/* wptr/rptr are in bytes! */
6687 		rptr += 16;
6688 		rptr &= rdev->ih.ptr_mask;
6689 		WREG32(IH_RB_RPTR, rptr);
6690 	}
6691 	if (queue_hotplug)
6692 		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
6693 	if (queue_thermal && rdev->pm.dpm_enabled)
6694 		taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
6695 	rdev->ih.rptr = rptr;
6696 	atomic_set(&rdev->ih.lock, 0);
6697 
6698 	/* make sure wptr hasn't changed while processing */
6699 	wptr = si_get_ih_wptr(rdev);
6700 	if (wptr != rptr)
6701 		goto restart_ih;
6702 
6703 	return IRQ_HANDLED;
6704 }
6705 
6706 /*
6707  * startup/shutdown callbacks
6708  */
6709 static int si_startup(struct radeon_device *rdev)
6710 {
6711 	struct radeon_ring *ring;
6712 	int r;
6713 
6714 	/* enable pcie gen2/3 link */
6715 	si_pcie_gen3_enable(rdev);
6716 	/* enable aspm */
6717 	si_program_aspm(rdev);
6718 
6719 	/* scratch needs to be initialized before MC */
6720 	r = r600_vram_scratch_init(rdev);
6721 	if (r)
6722 		return r;
6723 
6724 	si_mc_program(rdev);
6725 
6726 	if (!rdev->pm.dpm_enabled) {
6727 		r = si_mc_load_microcode(rdev);
6728 		if (r) {
6729 			DRM_ERROR("Failed to load MC firmware!\n");
6730 			return r;
6731 		}
6732 	}
6733 
6734 	r = si_pcie_gart_enable(rdev);
6735 	if (r)
6736 		return r;
6737 	si_gpu_init(rdev);
6738 
6739 	/* allocate rlc buffers */
6740 	if (rdev->family == CHIP_VERDE) {
6741 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6742 		rdev->rlc.reg_list_size =
6743 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6744 	}
6745 	rdev->rlc.cs_data = si_cs_data;
6746 	r = sumo_rlc_init(rdev);
6747 	if (r) {
6748 		DRM_ERROR("Failed to init rlc BOs!\n");
6749 		return r;
6750 	}
6751 
6752 	/* allocate wb buffer */
6753 	r = radeon_wb_init(rdev);
6754 	if (r)
6755 		return r;
6756 
6757 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6758 	if (r) {
6759 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6760 		return r;
6761 	}
6762 
6763 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6764 	if (r) {
6765 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6766 		return r;
6767 	}
6768 
6769 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6770 	if (r) {
6771 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6772 		return r;
6773 	}
6774 
6775 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6776 	if (r) {
6777 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6778 		return r;
6779 	}
6780 
6781 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6782 	if (r) {
6783 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6784 		return r;
6785 	}
6786 
6787 	if (rdev->has_uvd) {
6788 		r = uvd_v2_2_resume(rdev);
6789 		if (!r) {
6790 			r = radeon_fence_driver_start_ring(rdev,
6791 							   R600_RING_TYPE_UVD_INDEX);
6792 			if (r)
6793 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6794 		}
6795 		if (r)
6796 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6797 	}
6798 
6799 	/* Enable IRQ */
6800 	if (!rdev->irq.installed) {
6801 		r = radeon_irq_kms_init(rdev);
6802 		if (r)
6803 			return r;
6804 	}
6805 
6806 	r = si_irq_init(rdev);
6807 	if (r) {
6808 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6809 		radeon_irq_kms_fini(rdev);
6810 		return r;
6811 	}
6812 	si_irq_set(rdev);
6813 
6814 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6815 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6816 			     RADEON_CP_PACKET2);
6817 	if (r)
6818 		return r;
6819 
6820 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6821 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6822 			     RADEON_CP_PACKET2);
6823 	if (r)
6824 		return r;
6825 
6826 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6827 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6828 			     RADEON_CP_PACKET2);
6829 	if (r)
6830 		return r;
6831 
6832 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6833 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6834 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6835 	if (r)
6836 		return r;
6837 
6838 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6839 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6840 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6841 	if (r)
6842 		return r;
6843 
6844 	r = si_cp_load_microcode(rdev);
6845 	if (r)
6846 		return r;
6847 	r = si_cp_resume(rdev);
6848 	if (r)
6849 		return r;
6850 
6851 	r = cayman_dma_resume(rdev);
6852 	if (r)
6853 		return r;
6854 
6855 	if (rdev->has_uvd) {
6856 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6857 		if (ring->ring_size) {
6858 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6859 					     RADEON_CP_PACKET2);
6860 			if (!r)
6861 				r = uvd_v1_0_init(rdev);
6862 			if (r)
6863 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6864 		}
6865 	}
6866 
6867 	r = radeon_ib_pool_init(rdev);
6868 	if (r) {
6869 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6870 		return r;
6871 	}
6872 
6873 	r = radeon_vm_manager_init(rdev);
6874 	if (r) {
6875 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6876 		return r;
6877 	}
6878 
6879 	r = dce6_audio_init(rdev);
6880 	if (r)
6881 		return r;
6882 
6883 	return 0;
6884 }
6885 
6886 int si_resume(struct radeon_device *rdev)
6887 {
6888 	int r;
6889 
6890 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6891 	 * posting will perform necessary task to bring back GPU into good
6892 	 * shape.
6893 	 */
6894 	/* post card */
6895 	atom_asic_init(rdev->mode_info.atom_context);
6896 
6897 	/* init golden registers */
6898 	si_init_golden_registers(rdev);
6899 
6900 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6901 		radeon_pm_resume(rdev);
6902 
6903 	rdev->accel_working = true;
6904 	r = si_startup(rdev);
6905 	if (r) {
6906 		DRM_ERROR("si startup failed on resume\n");
6907 		rdev->accel_working = false;
6908 		return r;
6909 	}
6910 
6911 	return r;
6912 
6913 }
6914 
6915 int si_suspend(struct radeon_device *rdev)
6916 {
6917 	radeon_pm_suspend(rdev);
6918 	dce6_audio_fini(rdev);
6919 	radeon_vm_manager_fini(rdev);
6920 	si_cp_enable(rdev, false);
6921 	cayman_dma_stop(rdev);
6922 	if (rdev->has_uvd) {
6923 		uvd_v1_0_fini(rdev);
6924 		radeon_uvd_suspend(rdev);
6925 	}
6926 	si_fini_pg(rdev);
6927 	si_fini_cg(rdev);
6928 	si_irq_suspend(rdev);
6929 	radeon_wb_disable(rdev);
6930 	si_pcie_gart_disable(rdev);
6931 	return 0;
6932 }
6933 
6934 /* Plan is to move initialization in that function and use
6935  * helper function so that radeon_device_init pretty much
6936  * do nothing more than calling asic specific function. This
6937  * should also allow to remove a bunch of callback function
6938  * like vram_info.
6939  */
6940 int si_init(struct radeon_device *rdev)
6941 {
6942 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6943 	int r;
6944 
6945 	/* Read BIOS */
6946 	if (!radeon_get_bios(rdev)) {
6947 		if (ASIC_IS_AVIVO(rdev))
6948 			return -EINVAL;
6949 	}
6950 	/* Must be an ATOMBIOS */
6951 	if (!rdev->is_atom_bios) {
6952 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6953 		return -EINVAL;
6954 	}
6955 	r = radeon_atombios_init(rdev);
6956 	if (r)
6957 		return r;
6958 
6959 	/* Post card if necessary */
6960 	if (!radeon_card_posted(rdev)) {
6961 		if (!rdev->bios) {
6962 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6963 			return -EINVAL;
6964 		}
6965 		DRM_INFO("GPU not posted. posting now...\n");
6966 		atom_asic_init(rdev->mode_info.atom_context);
6967 	}
6968 	/* init golden registers */
6969 	si_init_golden_registers(rdev);
6970 	/* Initialize scratch registers */
6971 	si_scratch_init(rdev);
6972 	/* Initialize surface registers */
6973 	radeon_surface_init(rdev);
6974 	/* Initialize clocks */
6975 	radeon_get_clock_info(rdev->ddev);
6976 
6977 	/* Fence driver */
6978 	r = radeon_fence_driver_init(rdev);
6979 	if (r)
6980 		return r;
6981 
6982 	/* initialize memory controller */
6983 	r = si_mc_init(rdev);
6984 	if (r)
6985 		return r;
6986 	/* Memory manager */
6987 	r = radeon_bo_init(rdev);
6988 	if (r)
6989 		return r;
6990 
6991 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6992 	    !rdev->rlc_fw || !rdev->mc_fw) {
6993 		r = si_init_microcode(rdev);
6994 		if (r) {
6995 			DRM_ERROR("Failed to load firmware!\n");
6996 			return r;
6997 		}
6998 	}
6999 
7000 	/* Initialize power management */
7001 	radeon_pm_init(rdev);
7002 
7003 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7004 	ring->ring_obj = NULL;
7005 	r600_ring_init(rdev, ring, 1024 * 1024);
7006 
7007 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7008 	ring->ring_obj = NULL;
7009 	r600_ring_init(rdev, ring, 1024 * 1024);
7010 
7011 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7012 	ring->ring_obj = NULL;
7013 	r600_ring_init(rdev, ring, 1024 * 1024);
7014 
7015 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7016 	ring->ring_obj = NULL;
7017 	r600_ring_init(rdev, ring, 64 * 1024);
7018 
7019 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7020 	ring->ring_obj = NULL;
7021 	r600_ring_init(rdev, ring, 64 * 1024);
7022 
7023 	if (rdev->has_uvd) {
7024 		r = radeon_uvd_init(rdev);
7025 		if (!r) {
7026 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7027 			ring->ring_obj = NULL;
7028 			r600_ring_init(rdev, ring, 4096);
7029 		}
7030 	}
7031 
7032 	rdev->ih.ring_obj = NULL;
7033 	r600_ih_ring_init(rdev, 64 * 1024);
7034 
7035 	r = r600_pcie_gart_init(rdev);
7036 	if (r)
7037 		return r;
7038 
7039 	rdev->accel_working = true;
7040 	r = si_startup(rdev);
7041 	if (r) {
7042 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7043 		si_cp_fini(rdev);
7044 		cayman_dma_fini(rdev);
7045 		si_irq_fini(rdev);
7046 		sumo_rlc_fini(rdev);
7047 		radeon_wb_fini(rdev);
7048 		radeon_ib_pool_fini(rdev);
7049 		radeon_vm_manager_fini(rdev);
7050 		radeon_irq_kms_fini(rdev);
7051 		si_pcie_gart_fini(rdev);
7052 		rdev->accel_working = false;
7053 	}
7054 
7055 	/* Don't start up if the MC ucode is missing.
7056 	 * The default clocks and voltages before the MC ucode
7057 	 * is loaded are not suffient for advanced operations.
7058 	 */
7059 	if (!rdev->mc_fw) {
7060 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7061 		return -EINVAL;
7062 	}
7063 
7064 	return 0;
7065 }
7066 
7067 void si_fini(struct radeon_device *rdev)
7068 {
7069 	radeon_pm_fini(rdev);
7070 	si_cp_fini(rdev);
7071 	cayman_dma_fini(rdev);
7072 	si_fini_pg(rdev);
7073 	si_fini_cg(rdev);
7074 	si_irq_fini(rdev);
7075 	sumo_rlc_fini(rdev);
7076 	radeon_wb_fini(rdev);
7077 	radeon_vm_manager_fini(rdev);
7078 	radeon_ib_pool_fini(rdev);
7079 	radeon_irq_kms_fini(rdev);
7080 	if (rdev->has_uvd) {
7081 		uvd_v1_0_fini(rdev);
7082 		radeon_uvd_fini(rdev);
7083 	}
7084 	si_pcie_gart_fini(rdev);
7085 	r600_vram_scratch_fini(rdev);
7086 	radeon_gem_fini(rdev);
7087 	radeon_fence_driver_fini(rdev);
7088 	radeon_bo_fini(rdev);
7089 	radeon_atombios_fini(rdev);
7090 	si_fini_microcode(rdev);
7091 	kfree(rdev->bios);
7092 	rdev->bios = NULL;
7093 }
7094 
7095 /**
7096  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7097  *
7098  * @rdev: radeon_device pointer
7099  *
7100  * Fetches a GPU clock counter snapshot (SI).
7101  * Returns the 64 bit clock counter snapshot.
7102  */
7103 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7104 {
7105 	uint64_t clock;
7106 
7107 	mutex_lock(&rdev->gpu_clock_mutex);
7108 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7109 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7110 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7111 	mutex_unlock(&rdev->gpu_clock_mutex);
7112 	return clock;
7113 }
7114 
7115 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7116 {
7117 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7118 	int r;
7119 
7120 	/* bypass vclk and dclk with bclk */
7121 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7122 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7123 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7124 
7125 	/* put PLL in bypass mode */
7126 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7127 
7128 	if (!vclk || !dclk) {
7129 		/* keep the Bypass mode, put PLL to sleep */
7130 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7131 		return 0;
7132 	}
7133 
7134 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7135 					  16384, 0x03FFFFFF, 0, 128, 5,
7136 					  &fb_div, &vclk_div, &dclk_div);
7137 	if (r)
7138 		return r;
7139 
7140 	/* set RESET_ANTI_MUX to 0 */
7141 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7142 
7143 	/* set VCO_MODE to 1 */
7144 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7145 
7146 	/* toggle UPLL_SLEEP to 1 then back to 0 */
7147 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
7148 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7149 
7150 	/* deassert UPLL_RESET */
7151 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7152 
7153 	mdelay(1);
7154 
7155 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7156 	if (r)
7157 		return r;
7158 
7159 	/* assert UPLL_RESET again */
7160 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7161 
7162 	/* disable spread spectrum. */
7163 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7164 
7165 	/* set feedback divider */
7166 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7167 
7168 	/* set ref divider to 0 */
7169 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7170 
7171 	if (fb_div < 307200)
7172 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7173 	else
7174 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7175 
7176 	/* set PDIV_A and PDIV_B */
7177 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7178 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7179 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7180 
7181 	/* give the PLL some time to settle */
7182 	mdelay(15);
7183 
7184 	/* deassert PLL_RESET */
7185 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7186 
7187 	mdelay(15);
7188 
7189 	/* switch from bypass mode to normal mode */
7190 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7191 
7192 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7193 	if (r)
7194 		return r;
7195 
7196 	/* switch VCLK and DCLK selection */
7197 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7198 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7199 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7200 
7201 	mdelay(100);
7202 
7203 	return 0;
7204 }
7205 
7206 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7207 {
7208 	struct pci_dev *root = rdev->pdev->bus->self;
7209 	int bridge_pos, gpu_pos;
7210 	u32 speed_cntl, mask, current_data_rate;
7211 	int ret, i;
7212 	u16 tmp16;
7213 
7214 	if (radeon_pcie_gen2 == 0)
7215 		return;
7216 
7217 	if (rdev->flags & RADEON_IS_IGP)
7218 		return;
7219 
7220 	if (!(rdev->flags & RADEON_IS_PCIE))
7221 		return;
7222 
7223 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7224 	if (ret != 0)
7225 		return;
7226 
7227 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7228 		return;
7229 
7230 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7231 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7232 		LC_CURRENT_DATA_RATE_SHIFT;
7233 	if (mask & DRM_PCIE_SPEED_80) {
7234 		if (current_data_rate == 2) {
7235 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7236 			return;
7237 		}
7238 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7239 	} else if (mask & DRM_PCIE_SPEED_50) {
7240 		if (current_data_rate == 1) {
7241 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7242 			return;
7243 		}
7244 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7245 	}
7246 
7247 	bridge_pos = pci_get_pciecap_ptr(root->dev.bsddev);
7248 	if (!bridge_pos)
7249 		return;
7250 
7251 	gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev.bsddev);
7252 	if (!gpu_pos)
7253 		return;
7254 
7255 	if (mask & DRM_PCIE_SPEED_80) {
7256 		/* re-try equalization if gen3 is not already enabled */
7257 		if (current_data_rate != 2) {
7258 			u16 bridge_cfg, gpu_cfg;
7259 			u16 bridge_cfg2, gpu_cfg2;
7260 			u32 max_lw, current_lw, tmp;
7261 
7262 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7263 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7264 
7265 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7266 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7267 
7268 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7269 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7270 
7271 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7272 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7273 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7274 
7275 			if (current_lw < max_lw) {
7276 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7277 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7278 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7279 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7280 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7281 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7282 				}
7283 			}
7284 
7285 			for (i = 0; i < 10; i++) {
7286 				/* check status */
7287 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7288 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7289 					break;
7290 
7291 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7292 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7293 
7294 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7295 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7296 
7297 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7298 				tmp |= LC_SET_QUIESCE;
7299 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7300 
7301 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7302 				tmp |= LC_REDO_EQ;
7303 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7304 
7305 				mdelay(100);
7306 
7307 				/* linkctl */
7308 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7309 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7310 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7311 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7312 
7313 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7314 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7315 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7316 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7317 
7318 				/* linkctl2 */
7319 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7320 				tmp16 &= ~((1 << 4) | (7 << 9));
7321 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7322 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7323 
7324 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7325 				tmp16 &= ~((1 << 4) | (7 << 9));
7326 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7327 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7328 
7329 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7330 				tmp &= ~LC_SET_QUIESCE;
7331 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7332 			}
7333 		}
7334 	}
7335 
7336 	/* set the link speed */
7337 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7338 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7339 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7340 
7341 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7342 	tmp16 &= ~0xf;
7343 	if (mask & DRM_PCIE_SPEED_80)
7344 		tmp16 |= 3; /* gen3 */
7345 	else if (mask & DRM_PCIE_SPEED_50)
7346 		tmp16 |= 2; /* gen2 */
7347 	else
7348 		tmp16 |= 1; /* gen1 */
7349 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7350 
7351 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7352 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7353 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7354 
7355 	for (i = 0; i < rdev->usec_timeout; i++) {
7356 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7357 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7358 			break;
7359 		udelay(1);
7360 	}
7361 }
7362 
7363 static void si_program_aspm(struct radeon_device *rdev)
7364 {
7365 	u32 data, orig;
7366 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7367 	bool disable_clkreq = false;
7368 
7369 	if (radeon_aspm == 0)
7370 		return;
7371 
7372 	if (!(rdev->flags & RADEON_IS_PCIE))
7373 		return;
7374 
7375 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7376 	data &= ~LC_XMIT_N_FTS_MASK;
7377 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7378 	if (orig != data)
7379 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7380 
7381 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7382 	data |= LC_GO_TO_RECOVERY;
7383 	if (orig != data)
7384 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7385 
7386 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7387 	data |= P_IGNORE_EDB_ERR;
7388 	if (orig != data)
7389 		WREG32_PCIE(PCIE_P_CNTL, data);
7390 
7391 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7392 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7393 	data |= LC_PMI_TO_L1_DIS;
7394 	if (!disable_l0s)
7395 		data |= LC_L0S_INACTIVITY(7);
7396 
7397 	if (!disable_l1) {
7398 		data |= LC_L1_INACTIVITY(7);
7399 		data &= ~LC_PMI_TO_L1_DIS;
7400 		if (orig != data)
7401 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7402 
7403 		if (!disable_plloff_in_l1) {
7404 			bool clk_req_support;
7405 
7406 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7407 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7408 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7409 			if (orig != data)
7410 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7411 
7412 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7413 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7414 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7415 			if (orig != data)
7416 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7417 
7418 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7419 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7420 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7421 			if (orig != data)
7422 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7423 
7424 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7425 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7426 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7427 			if (orig != data)
7428 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7429 
7430 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7431 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7432 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7433 				if (orig != data)
7434 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7435 
7436 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7437 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7438 				if (orig != data)
7439 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7440 
7441 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7442 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7443 				if (orig != data)
7444 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7445 
7446 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7447 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7448 				if (orig != data)
7449 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7450 
7451 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7452 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7453 				if (orig != data)
7454 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7455 
7456 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7457 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7458 				if (orig != data)
7459 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7460 
7461 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7462 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7463 				if (orig != data)
7464 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7465 
7466 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7467 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7468 				if (orig != data)
7469 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7470 			}
7471 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7472 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7473 			data |= LC_DYN_LANES_PWR_STATE(3);
7474 			if (orig != data)
7475 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7476 
7477 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7478 			data &= ~LS2_EXIT_TIME_MASK;
7479 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7480 				data |= LS2_EXIT_TIME(5);
7481 			if (orig != data)
7482 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7483 
7484 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7485 			data &= ~LS2_EXIT_TIME_MASK;
7486 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7487 				data |= LS2_EXIT_TIME(5);
7488 			if (orig != data)
7489 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7490 
7491 			if (!disable_clkreq) {
7492 #ifdef zMN_TODO
7493 				struct pci_dev *root = rdev->pdev->bus->self;
7494 				u32 lnkcap;
7495 
7496 				clk_req_support = false;
7497 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7498 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7499 					clk_req_support = true;
7500 #else
7501 				clk_req_support = false;
7502 #endif
7503 			} else {
7504 				clk_req_support = false;
7505 			}
7506 
7507 			if (clk_req_support) {
7508 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7509 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7510 				if (orig != data)
7511 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7512 
7513 				orig = data = RREG32(THM_CLK_CNTL);
7514 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7515 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7516 				if (orig != data)
7517 					WREG32(THM_CLK_CNTL, data);
7518 
7519 				orig = data = RREG32(MISC_CLK_CNTL);
7520 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7521 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7522 				if (orig != data)
7523 					WREG32(MISC_CLK_CNTL, data);
7524 
7525 				orig = data = RREG32(CG_CLKPIN_CNTL);
7526 				data &= ~BCLK_AS_XCLK;
7527 				if (orig != data)
7528 					WREG32(CG_CLKPIN_CNTL, data);
7529 
7530 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7531 				data &= ~FORCE_BIF_REFCLK_EN;
7532 				if (orig != data)
7533 					WREG32(CG_CLKPIN_CNTL_2, data);
7534 
7535 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7536 				data &= ~MPLL_CLKOUT_SEL_MASK;
7537 				data |= MPLL_CLKOUT_SEL(4);
7538 				if (orig != data)
7539 					WREG32(MPLL_BYPASSCLK_SEL, data);
7540 
7541 				orig = data = RREG32(SPLL_CNTL_MODE);
7542 				data &= ~SPLL_REFCLK_SEL_MASK;
7543 				if (orig != data)
7544 					WREG32(SPLL_CNTL_MODE, data);
7545 			}
7546 		}
7547 	} else {
7548 		if (orig != data)
7549 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7550 	}
7551 
7552 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7553 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7554 	if (orig != data)
7555 		WREG32_PCIE(PCIE_CNTL2, data);
7556 
7557 	if (!disable_l0s) {
7558 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7559 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7560 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7561 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7562 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7563 				data &= ~LC_L0S_INACTIVITY_MASK;
7564 				if (orig != data)
7565 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7566 			}
7567 		}
7568 	}
7569 }
7570