xref: /dragonfly/sys/dev/drm/radeon/si.c (revision 820c5b08)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include <uapi_drm/radeon_drm.h>
30 #include "sid.h"
31 #include "atom.h"
32 #include "si_blit_shaders.h"
33 #include "clearstate_si.h"
34 #include "radeon_ucode.h"
35 
36 
37 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
38 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
51 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
52 MODULE_FIRMWARE("radeon/VERDE_me.bin");
53 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
54 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
56 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
63 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
64 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
68 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
69 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
70 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
71 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
72 
73 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
74 static void si_pcie_gen3_enable(struct radeon_device *rdev);
75 static void si_program_aspm(struct radeon_device *rdev);
76 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
77 					 bool enable);
78 static void si_init_pg(struct radeon_device *rdev);
79 static void si_init_cg(struct radeon_device *rdev);
80 static void si_fini_pg(struct radeon_device *rdev);
81 static void si_fini_cg(struct radeon_device *rdev);
82 static void si_rlc_stop(struct radeon_device *rdev);
83 
84 static const u32 verde_rlc_save_restore_register_list[] =
85 {
86 	(0x8000 << 16) | (0x98f4 >> 2),
87 	0x00000000,
88 	(0x8040 << 16) | (0x98f4 >> 2),
89 	0x00000000,
90 	(0x8000 << 16) | (0xe80 >> 2),
91 	0x00000000,
92 	(0x8040 << 16) | (0xe80 >> 2),
93 	0x00000000,
94 	(0x8000 << 16) | (0x89bc >> 2),
95 	0x00000000,
96 	(0x8040 << 16) | (0x89bc >> 2),
97 	0x00000000,
98 	(0x8000 << 16) | (0x8c1c >> 2),
99 	0x00000000,
100 	(0x8040 << 16) | (0x8c1c >> 2),
101 	0x00000000,
102 	(0x9c00 << 16) | (0x98f0 >> 2),
103 	0x00000000,
104 	(0x9c00 << 16) | (0xe7c >> 2),
105 	0x00000000,
106 	(0x8000 << 16) | (0x9148 >> 2),
107 	0x00000000,
108 	(0x8040 << 16) | (0x9148 >> 2),
109 	0x00000000,
110 	(0x9c00 << 16) | (0x9150 >> 2),
111 	0x00000000,
112 	(0x9c00 << 16) | (0x897c >> 2),
113 	0x00000000,
114 	(0x9c00 << 16) | (0x8d8c >> 2),
115 	0x00000000,
116 	(0x9c00 << 16) | (0xac54 >> 2),
117 	0X00000000,
118 	0x3,
119 	(0x9c00 << 16) | (0x98f8 >> 2),
120 	0x00000000,
121 	(0x9c00 << 16) | (0x9910 >> 2),
122 	0x00000000,
123 	(0x9c00 << 16) | (0x9914 >> 2),
124 	0x00000000,
125 	(0x9c00 << 16) | (0x9918 >> 2),
126 	0x00000000,
127 	(0x9c00 << 16) | (0x991c >> 2),
128 	0x00000000,
129 	(0x9c00 << 16) | (0x9920 >> 2),
130 	0x00000000,
131 	(0x9c00 << 16) | (0x9924 >> 2),
132 	0x00000000,
133 	(0x9c00 << 16) | (0x9928 >> 2),
134 	0x00000000,
135 	(0x9c00 << 16) | (0x992c >> 2),
136 	0x00000000,
137 	(0x9c00 << 16) | (0x9930 >> 2),
138 	0x00000000,
139 	(0x9c00 << 16) | (0x9934 >> 2),
140 	0x00000000,
141 	(0x9c00 << 16) | (0x9938 >> 2),
142 	0x00000000,
143 	(0x9c00 << 16) | (0x993c >> 2),
144 	0x00000000,
145 	(0x9c00 << 16) | (0x9940 >> 2),
146 	0x00000000,
147 	(0x9c00 << 16) | (0x9944 >> 2),
148 	0x00000000,
149 	(0x9c00 << 16) | (0x9948 >> 2),
150 	0x00000000,
151 	(0x9c00 << 16) | (0x994c >> 2),
152 	0x00000000,
153 	(0x9c00 << 16) | (0x9950 >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0x9954 >> 2),
156 	0x00000000,
157 	(0x9c00 << 16) | (0x9958 >> 2),
158 	0x00000000,
159 	(0x9c00 << 16) | (0x995c >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x9960 >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x9964 >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x9968 >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0x996c >> 2),
168 	0x00000000,
169 	(0x9c00 << 16) | (0x9970 >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x9974 >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x9978 >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x997c >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x9980 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9984 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9988 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x998c >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x8c00 >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x8c14 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x8c04 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x8c08 >> 2),
192 	0x00000000,
193 	(0x8000 << 16) | (0x9b7c >> 2),
194 	0x00000000,
195 	(0x8040 << 16) | (0x9b7c >> 2),
196 	0x00000000,
197 	(0x8000 << 16) | (0xe84 >> 2),
198 	0x00000000,
199 	(0x8040 << 16) | (0xe84 >> 2),
200 	0x00000000,
201 	(0x8000 << 16) | (0x89c0 >> 2),
202 	0x00000000,
203 	(0x8040 << 16) | (0x89c0 >> 2),
204 	0x00000000,
205 	(0x8000 << 16) | (0x914c >> 2),
206 	0x00000000,
207 	(0x8040 << 16) | (0x914c >> 2),
208 	0x00000000,
209 	(0x8000 << 16) | (0x8c20 >> 2),
210 	0x00000000,
211 	(0x8040 << 16) | (0x8c20 >> 2),
212 	0x00000000,
213 	(0x8000 << 16) | (0x9354 >> 2),
214 	0x00000000,
215 	(0x8040 << 16) | (0x9354 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9060 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9364 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9100 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x913c >> 2),
224 	0x00000000,
225 	(0x8000 << 16) | (0x90e0 >> 2),
226 	0x00000000,
227 	(0x8000 << 16) | (0x90e4 >> 2),
228 	0x00000000,
229 	(0x8000 << 16) | (0x90e8 >> 2),
230 	0x00000000,
231 	(0x8040 << 16) | (0x90e0 >> 2),
232 	0x00000000,
233 	(0x8040 << 16) | (0x90e4 >> 2),
234 	0x00000000,
235 	(0x8040 << 16) | (0x90e8 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x8bcc >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x8b24 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x88c4 >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x8e50 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8c0c >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x8e58 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x8e5c >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0x9508 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0x950c >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0x9494 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0xac0c >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0xac10 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0xac14 >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0xae00 >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0xac08 >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0x88d4 >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0x88c8 >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0x88cc >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x89b0 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x8b10 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x8a14 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x9830 >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x9834 >> 2),
282 	0x00000000,
283 	(0x9c00 << 16) | (0x9838 >> 2),
284 	0x00000000,
285 	(0x9c00 << 16) | (0x9a10 >> 2),
286 	0x00000000,
287 	(0x8000 << 16) | (0x9870 >> 2),
288 	0x00000000,
289 	(0x8000 << 16) | (0x9874 >> 2),
290 	0x00000000,
291 	(0x8001 << 16) | (0x9870 >> 2),
292 	0x00000000,
293 	(0x8001 << 16) | (0x9874 >> 2),
294 	0x00000000,
295 	(0x8040 << 16) | (0x9870 >> 2),
296 	0x00000000,
297 	(0x8040 << 16) | (0x9874 >> 2),
298 	0x00000000,
299 	(0x8041 << 16) | (0x9870 >> 2),
300 	0x00000000,
301 	(0x8041 << 16) | (0x9874 >> 2),
302 	0x00000000,
303 	0x00000000
304 };
305 
306 static const u32 tahiti_golden_rlc_registers[] =
307 {
308 	0xc424, 0xffffffff, 0x00601005,
309 	0xc47c, 0xffffffff, 0x10104040,
310 	0xc488, 0xffffffff, 0x0100000a,
311 	0xc314, 0xffffffff, 0x00000800,
312 	0xc30c, 0xffffffff, 0x800000f4,
313 	0xf4a8, 0xffffffff, 0x00000000
314 };
315 
316 static const u32 tahiti_golden_registers[] =
317 {
318 	0x9a10, 0x00010000, 0x00018208,
319 	0x9830, 0xffffffff, 0x00000000,
320 	0x9834, 0xf00fffff, 0x00000400,
321 	0x9838, 0x0002021c, 0x00020200,
322 	0xc78, 0x00000080, 0x00000000,
323 	0xd030, 0x000300c0, 0x00800040,
324 	0xd830, 0x000300c0, 0x00800040,
325 	0x5bb0, 0x000000f0, 0x00000070,
326 	0x5bc0, 0x00200000, 0x50100000,
327 	0x7030, 0x31000311, 0x00000011,
328 	0x277c, 0x00000003, 0x000007ff,
329 	0x240c, 0x000007ff, 0x00000000,
330 	0x8a14, 0xf000001f, 0x00000007,
331 	0x8b24, 0xffffffff, 0x00ffffff,
332 	0x8b10, 0x0000ff0f, 0x00000000,
333 	0x28a4c, 0x07ffffff, 0x4e000000,
334 	0x28350, 0x3f3f3fff, 0x2a00126a,
335 	0x30, 0x000000ff, 0x0040,
336 	0x34, 0x00000040, 0x00004040,
337 	0x9100, 0x07ffffff, 0x03000000,
338 	0x8e88, 0x01ff1f3f, 0x00000000,
339 	0x8e84, 0x01ff1f3f, 0x00000000,
340 	0x9060, 0x0000007f, 0x00000020,
341 	0x9508, 0x00010000, 0x00010000,
342 	0xac14, 0x00000200, 0x000002fb,
343 	0xac10, 0xffffffff, 0x0000543b,
344 	0xac0c, 0xffffffff, 0xa9210876,
345 	0x88d0, 0xffffffff, 0x000fff40,
346 	0x88d4, 0x0000001f, 0x00000010,
347 	0x1410, 0x20000000, 0x20fffed8,
348 	0x15c0, 0x000c0fc0, 0x000c0400
349 };
350 
351 static const u32 tahiti_golden_registers2[] =
352 {
353 	0xc64, 0x00000001, 0x00000001
354 };
355 
356 static const u32 pitcairn_golden_rlc_registers[] =
357 {
358 	0xc424, 0xffffffff, 0x00601004,
359 	0xc47c, 0xffffffff, 0x10102020,
360 	0xc488, 0xffffffff, 0x01000020,
361 	0xc314, 0xffffffff, 0x00000800,
362 	0xc30c, 0xffffffff, 0x800000a4
363 };
364 
365 static const u32 pitcairn_golden_registers[] =
366 {
367 	0x9a10, 0x00010000, 0x00018208,
368 	0x9830, 0xffffffff, 0x00000000,
369 	0x9834, 0xf00fffff, 0x00000400,
370 	0x9838, 0x0002021c, 0x00020200,
371 	0xc78, 0x00000080, 0x00000000,
372 	0xd030, 0x000300c0, 0x00800040,
373 	0xd830, 0x000300c0, 0x00800040,
374 	0x5bb0, 0x000000f0, 0x00000070,
375 	0x5bc0, 0x00200000, 0x50100000,
376 	0x7030, 0x31000311, 0x00000011,
377 	0x2ae4, 0x00073ffe, 0x000022a2,
378 	0x240c, 0x000007ff, 0x00000000,
379 	0x8a14, 0xf000001f, 0x00000007,
380 	0x8b24, 0xffffffff, 0x00ffffff,
381 	0x8b10, 0x0000ff0f, 0x00000000,
382 	0x28a4c, 0x07ffffff, 0x4e000000,
383 	0x28350, 0x3f3f3fff, 0x2a00126a,
384 	0x30, 0x000000ff, 0x0040,
385 	0x34, 0x00000040, 0x00004040,
386 	0x9100, 0x07ffffff, 0x03000000,
387 	0x9060, 0x0000007f, 0x00000020,
388 	0x9508, 0x00010000, 0x00010000,
389 	0xac14, 0x000003ff, 0x000000f7,
390 	0xac10, 0xffffffff, 0x00000000,
391 	0xac0c, 0xffffffff, 0x32761054,
392 	0x88d4, 0x0000001f, 0x00000010,
393 	0x15c0, 0x000c0fc0, 0x000c0400
394 };
395 
396 static const u32 verde_golden_rlc_registers[] =
397 {
398 	0xc424, 0xffffffff, 0x033f1005,
399 	0xc47c, 0xffffffff, 0x10808020,
400 	0xc488, 0xffffffff, 0x00800008,
401 	0xc314, 0xffffffff, 0x00001000,
402 	0xc30c, 0xffffffff, 0x80010014
403 };
404 
405 static const u32 verde_golden_registers[] =
406 {
407 	0x9a10, 0x00010000, 0x00018208,
408 	0x9830, 0xffffffff, 0x00000000,
409 	0x9834, 0xf00fffff, 0x00000400,
410 	0x9838, 0x0002021c, 0x00020200,
411 	0xc78, 0x00000080, 0x00000000,
412 	0xd030, 0x000300c0, 0x00800040,
413 	0xd030, 0x000300c0, 0x00800040,
414 	0xd830, 0x000300c0, 0x00800040,
415 	0xd830, 0x000300c0, 0x00800040,
416 	0x5bb0, 0x000000f0, 0x00000070,
417 	0x5bc0, 0x00200000, 0x50100000,
418 	0x7030, 0x31000311, 0x00000011,
419 	0x2ae4, 0x00073ffe, 0x000022a2,
420 	0x2ae4, 0x00073ffe, 0x000022a2,
421 	0x2ae4, 0x00073ffe, 0x000022a2,
422 	0x240c, 0x000007ff, 0x00000000,
423 	0x240c, 0x000007ff, 0x00000000,
424 	0x240c, 0x000007ff, 0x00000000,
425 	0x8a14, 0xf000001f, 0x00000007,
426 	0x8a14, 0xf000001f, 0x00000007,
427 	0x8a14, 0xf000001f, 0x00000007,
428 	0x8b24, 0xffffffff, 0x00ffffff,
429 	0x8b10, 0x0000ff0f, 0x00000000,
430 	0x28a4c, 0x07ffffff, 0x4e000000,
431 	0x28350, 0x3f3f3fff, 0x0000124a,
432 	0x28350, 0x3f3f3fff, 0x0000124a,
433 	0x28350, 0x3f3f3fff, 0x0000124a,
434 	0x30, 0x000000ff, 0x0040,
435 	0x34, 0x00000040, 0x00004040,
436 	0x9100, 0x07ffffff, 0x03000000,
437 	0x9100, 0x07ffffff, 0x03000000,
438 	0x8e88, 0x01ff1f3f, 0x00000000,
439 	0x8e88, 0x01ff1f3f, 0x00000000,
440 	0x8e88, 0x01ff1f3f, 0x00000000,
441 	0x8e84, 0x01ff1f3f, 0x00000000,
442 	0x8e84, 0x01ff1f3f, 0x00000000,
443 	0x8e84, 0x01ff1f3f, 0x00000000,
444 	0x9060, 0x0000007f, 0x00000020,
445 	0x9508, 0x00010000, 0x00010000,
446 	0xac14, 0x000003ff, 0x00000003,
447 	0xac14, 0x000003ff, 0x00000003,
448 	0xac14, 0x000003ff, 0x00000003,
449 	0xac10, 0xffffffff, 0x00000000,
450 	0xac10, 0xffffffff, 0x00000000,
451 	0xac10, 0xffffffff, 0x00000000,
452 	0xac0c, 0xffffffff, 0x00001032,
453 	0xac0c, 0xffffffff, 0x00001032,
454 	0xac0c, 0xffffffff, 0x00001032,
455 	0x88d4, 0x0000001f, 0x00000010,
456 	0x88d4, 0x0000001f, 0x00000010,
457 	0x88d4, 0x0000001f, 0x00000010,
458 	0x15c0, 0x000c0fc0, 0x000c0400
459 };
460 
461 static const u32 oland_golden_rlc_registers[] =
462 {
463 	0xc424, 0xffffffff, 0x00601005,
464 	0xc47c, 0xffffffff, 0x10104040,
465 	0xc488, 0xffffffff, 0x0100000a,
466 	0xc314, 0xffffffff, 0x00000800,
467 	0xc30c, 0xffffffff, 0x800000f4
468 };
469 
470 static const u32 oland_golden_registers[] =
471 {
472 	0x9a10, 0x00010000, 0x00018208,
473 	0x9830, 0xffffffff, 0x00000000,
474 	0x9834, 0xf00fffff, 0x00000400,
475 	0x9838, 0x0002021c, 0x00020200,
476 	0xc78, 0x00000080, 0x00000000,
477 	0xd030, 0x000300c0, 0x00800040,
478 	0xd830, 0x000300c0, 0x00800040,
479 	0x5bb0, 0x000000f0, 0x00000070,
480 	0x5bc0, 0x00200000, 0x50100000,
481 	0x7030, 0x31000311, 0x00000011,
482 	0x2ae4, 0x00073ffe, 0x000022a2,
483 	0x240c, 0x000007ff, 0x00000000,
484 	0x8a14, 0xf000001f, 0x00000007,
485 	0x8b24, 0xffffffff, 0x00ffffff,
486 	0x8b10, 0x0000ff0f, 0x00000000,
487 	0x28a4c, 0x07ffffff, 0x4e000000,
488 	0x28350, 0x3f3f3fff, 0x00000082,
489 	0x30, 0x000000ff, 0x0040,
490 	0x34, 0x00000040, 0x00004040,
491 	0x9100, 0x07ffffff, 0x03000000,
492 	0x9060, 0x0000007f, 0x00000020,
493 	0x9508, 0x00010000, 0x00010000,
494 	0xac14, 0x000003ff, 0x000000f3,
495 	0xac10, 0xffffffff, 0x00000000,
496 	0xac0c, 0xffffffff, 0x00003210,
497 	0x88d4, 0x0000001f, 0x00000010,
498 	0x15c0, 0x000c0fc0, 0x000c0400
499 };
500 
501 static const u32 hainan_golden_registers[] =
502 {
503 	0x9a10, 0x00010000, 0x00018208,
504 	0x9830, 0xffffffff, 0x00000000,
505 	0x9834, 0xf00fffff, 0x00000400,
506 	0x9838, 0x0002021c, 0x00020200,
507 	0xd0c0, 0xff000fff, 0x00000100,
508 	0xd030, 0x000300c0, 0x00800040,
509 	0xd8c0, 0xff000fff, 0x00000100,
510 	0xd830, 0x000300c0, 0x00800040,
511 	0x2ae4, 0x00073ffe, 0x000022a2,
512 	0x240c, 0x000007ff, 0x00000000,
513 	0x8a14, 0xf000001f, 0x00000007,
514 	0x8b24, 0xffffffff, 0x00ffffff,
515 	0x8b10, 0x0000ff0f, 0x00000000,
516 	0x28a4c, 0x07ffffff, 0x4e000000,
517 	0x28350, 0x3f3f3fff, 0x00000000,
518 	0x30, 0x000000ff, 0x0040,
519 	0x34, 0x00000040, 0x00004040,
520 	0x9100, 0x03e00000, 0x03600000,
521 	0x9060, 0x0000007f, 0x00000020,
522 	0x9508, 0x00010000, 0x00010000,
523 	0xac14, 0x000003ff, 0x000000f1,
524 	0xac10, 0xffffffff, 0x00000000,
525 	0xac0c, 0xffffffff, 0x00003210,
526 	0x88d4, 0x0000001f, 0x00000010,
527 	0x15c0, 0x000c0fc0, 0x000c0400
528 };
529 
530 static const u32 hainan_golden_registers2[] =
531 {
532 	0x98f8, 0xffffffff, 0x02010001
533 };
534 
535 static const u32 tahiti_mgcg_cgcg_init[] =
536 {
537 	0xc400, 0xffffffff, 0xfffffffc,
538 	0x802c, 0xffffffff, 0xe0000000,
539 	0x9a60, 0xffffffff, 0x00000100,
540 	0x92a4, 0xffffffff, 0x00000100,
541 	0xc164, 0xffffffff, 0x00000100,
542 	0x9774, 0xffffffff, 0x00000100,
543 	0x8984, 0xffffffff, 0x06000100,
544 	0x8a18, 0xffffffff, 0x00000100,
545 	0x92a0, 0xffffffff, 0x00000100,
546 	0xc380, 0xffffffff, 0x00000100,
547 	0x8b28, 0xffffffff, 0x00000100,
548 	0x9144, 0xffffffff, 0x00000100,
549 	0x8d88, 0xffffffff, 0x00000100,
550 	0x8d8c, 0xffffffff, 0x00000100,
551 	0x9030, 0xffffffff, 0x00000100,
552 	0x9034, 0xffffffff, 0x00000100,
553 	0x9038, 0xffffffff, 0x00000100,
554 	0x903c, 0xffffffff, 0x00000100,
555 	0xad80, 0xffffffff, 0x00000100,
556 	0xac54, 0xffffffff, 0x00000100,
557 	0x897c, 0xffffffff, 0x06000100,
558 	0x9868, 0xffffffff, 0x00000100,
559 	0x9510, 0xffffffff, 0x00000100,
560 	0xaf04, 0xffffffff, 0x00000100,
561 	0xae04, 0xffffffff, 0x00000100,
562 	0x949c, 0xffffffff, 0x00000100,
563 	0x802c, 0xffffffff, 0xe0000000,
564 	0x9160, 0xffffffff, 0x00010000,
565 	0x9164, 0xffffffff, 0x00030002,
566 	0x9168, 0xffffffff, 0x00040007,
567 	0x916c, 0xffffffff, 0x00060005,
568 	0x9170, 0xffffffff, 0x00090008,
569 	0x9174, 0xffffffff, 0x00020001,
570 	0x9178, 0xffffffff, 0x00040003,
571 	0x917c, 0xffffffff, 0x00000007,
572 	0x9180, 0xffffffff, 0x00060005,
573 	0x9184, 0xffffffff, 0x00090008,
574 	0x9188, 0xffffffff, 0x00030002,
575 	0x918c, 0xffffffff, 0x00050004,
576 	0x9190, 0xffffffff, 0x00000008,
577 	0x9194, 0xffffffff, 0x00070006,
578 	0x9198, 0xffffffff, 0x000a0009,
579 	0x919c, 0xffffffff, 0x00040003,
580 	0x91a0, 0xffffffff, 0x00060005,
581 	0x91a4, 0xffffffff, 0x00000009,
582 	0x91a8, 0xffffffff, 0x00080007,
583 	0x91ac, 0xffffffff, 0x000b000a,
584 	0x91b0, 0xffffffff, 0x00050004,
585 	0x91b4, 0xffffffff, 0x00070006,
586 	0x91b8, 0xffffffff, 0x0008000b,
587 	0x91bc, 0xffffffff, 0x000a0009,
588 	0x91c0, 0xffffffff, 0x000d000c,
589 	0x91c4, 0xffffffff, 0x00060005,
590 	0x91c8, 0xffffffff, 0x00080007,
591 	0x91cc, 0xffffffff, 0x0000000b,
592 	0x91d0, 0xffffffff, 0x000a0009,
593 	0x91d4, 0xffffffff, 0x000d000c,
594 	0x91d8, 0xffffffff, 0x00070006,
595 	0x91dc, 0xffffffff, 0x00090008,
596 	0x91e0, 0xffffffff, 0x0000000c,
597 	0x91e4, 0xffffffff, 0x000b000a,
598 	0x91e8, 0xffffffff, 0x000e000d,
599 	0x91ec, 0xffffffff, 0x00080007,
600 	0x91f0, 0xffffffff, 0x000a0009,
601 	0x91f4, 0xffffffff, 0x0000000d,
602 	0x91f8, 0xffffffff, 0x000c000b,
603 	0x91fc, 0xffffffff, 0x000f000e,
604 	0x9200, 0xffffffff, 0x00090008,
605 	0x9204, 0xffffffff, 0x000b000a,
606 	0x9208, 0xffffffff, 0x000c000f,
607 	0x920c, 0xffffffff, 0x000e000d,
608 	0x9210, 0xffffffff, 0x00110010,
609 	0x9214, 0xffffffff, 0x000a0009,
610 	0x9218, 0xffffffff, 0x000c000b,
611 	0x921c, 0xffffffff, 0x0000000f,
612 	0x9220, 0xffffffff, 0x000e000d,
613 	0x9224, 0xffffffff, 0x00110010,
614 	0x9228, 0xffffffff, 0x000b000a,
615 	0x922c, 0xffffffff, 0x000d000c,
616 	0x9230, 0xffffffff, 0x00000010,
617 	0x9234, 0xffffffff, 0x000f000e,
618 	0x9238, 0xffffffff, 0x00120011,
619 	0x923c, 0xffffffff, 0x000c000b,
620 	0x9240, 0xffffffff, 0x000e000d,
621 	0x9244, 0xffffffff, 0x00000011,
622 	0x9248, 0xffffffff, 0x0010000f,
623 	0x924c, 0xffffffff, 0x00130012,
624 	0x9250, 0xffffffff, 0x000d000c,
625 	0x9254, 0xffffffff, 0x000f000e,
626 	0x9258, 0xffffffff, 0x00100013,
627 	0x925c, 0xffffffff, 0x00120011,
628 	0x9260, 0xffffffff, 0x00150014,
629 	0x9264, 0xffffffff, 0x000e000d,
630 	0x9268, 0xffffffff, 0x0010000f,
631 	0x926c, 0xffffffff, 0x00000013,
632 	0x9270, 0xffffffff, 0x00120011,
633 	0x9274, 0xffffffff, 0x00150014,
634 	0x9278, 0xffffffff, 0x000f000e,
635 	0x927c, 0xffffffff, 0x00110010,
636 	0x9280, 0xffffffff, 0x00000014,
637 	0x9284, 0xffffffff, 0x00130012,
638 	0x9288, 0xffffffff, 0x00160015,
639 	0x928c, 0xffffffff, 0x0010000f,
640 	0x9290, 0xffffffff, 0x00120011,
641 	0x9294, 0xffffffff, 0x00000015,
642 	0x9298, 0xffffffff, 0x00140013,
643 	0x929c, 0xffffffff, 0x00170016,
644 	0x9150, 0xffffffff, 0x96940200,
645 	0x8708, 0xffffffff, 0x00900100,
646 	0xc478, 0xffffffff, 0x00000080,
647 	0xc404, 0xffffffff, 0x0020003f,
648 	0x30, 0xffffffff, 0x0000001c,
649 	0x34, 0x000f0000, 0x000f0000,
650 	0x160c, 0xffffffff, 0x00000100,
651 	0x1024, 0xffffffff, 0x00000100,
652 	0x102c, 0x00000101, 0x00000000,
653 	0x20a8, 0xffffffff, 0x00000104,
654 	0x264c, 0x000c0000, 0x000c0000,
655 	0x2648, 0x000c0000, 0x000c0000,
656 	0x55e4, 0xff000fff, 0x00000100,
657 	0x55e8, 0x00000001, 0x00000001,
658 	0x2f50, 0x00000001, 0x00000001,
659 	0x30cc, 0xc0000fff, 0x00000104,
660 	0xc1e4, 0x00000001, 0x00000001,
661 	0xd0c0, 0xfffffff0, 0x00000100,
662 	0xd8c0, 0xfffffff0, 0x00000100
663 };
664 
665 static const u32 pitcairn_mgcg_cgcg_init[] =
666 {
667 	0xc400, 0xffffffff, 0xfffffffc,
668 	0x802c, 0xffffffff, 0xe0000000,
669 	0x9a60, 0xffffffff, 0x00000100,
670 	0x92a4, 0xffffffff, 0x00000100,
671 	0xc164, 0xffffffff, 0x00000100,
672 	0x9774, 0xffffffff, 0x00000100,
673 	0x8984, 0xffffffff, 0x06000100,
674 	0x8a18, 0xffffffff, 0x00000100,
675 	0x92a0, 0xffffffff, 0x00000100,
676 	0xc380, 0xffffffff, 0x00000100,
677 	0x8b28, 0xffffffff, 0x00000100,
678 	0x9144, 0xffffffff, 0x00000100,
679 	0x8d88, 0xffffffff, 0x00000100,
680 	0x8d8c, 0xffffffff, 0x00000100,
681 	0x9030, 0xffffffff, 0x00000100,
682 	0x9034, 0xffffffff, 0x00000100,
683 	0x9038, 0xffffffff, 0x00000100,
684 	0x903c, 0xffffffff, 0x00000100,
685 	0xad80, 0xffffffff, 0x00000100,
686 	0xac54, 0xffffffff, 0x00000100,
687 	0x897c, 0xffffffff, 0x06000100,
688 	0x9868, 0xffffffff, 0x00000100,
689 	0x9510, 0xffffffff, 0x00000100,
690 	0xaf04, 0xffffffff, 0x00000100,
691 	0xae04, 0xffffffff, 0x00000100,
692 	0x949c, 0xffffffff, 0x00000100,
693 	0x802c, 0xffffffff, 0xe0000000,
694 	0x9160, 0xffffffff, 0x00010000,
695 	0x9164, 0xffffffff, 0x00030002,
696 	0x9168, 0xffffffff, 0x00040007,
697 	0x916c, 0xffffffff, 0x00060005,
698 	0x9170, 0xffffffff, 0x00090008,
699 	0x9174, 0xffffffff, 0x00020001,
700 	0x9178, 0xffffffff, 0x00040003,
701 	0x917c, 0xffffffff, 0x00000007,
702 	0x9180, 0xffffffff, 0x00060005,
703 	0x9184, 0xffffffff, 0x00090008,
704 	0x9188, 0xffffffff, 0x00030002,
705 	0x918c, 0xffffffff, 0x00050004,
706 	0x9190, 0xffffffff, 0x00000008,
707 	0x9194, 0xffffffff, 0x00070006,
708 	0x9198, 0xffffffff, 0x000a0009,
709 	0x919c, 0xffffffff, 0x00040003,
710 	0x91a0, 0xffffffff, 0x00060005,
711 	0x91a4, 0xffffffff, 0x00000009,
712 	0x91a8, 0xffffffff, 0x00080007,
713 	0x91ac, 0xffffffff, 0x000b000a,
714 	0x91b0, 0xffffffff, 0x00050004,
715 	0x91b4, 0xffffffff, 0x00070006,
716 	0x91b8, 0xffffffff, 0x0008000b,
717 	0x91bc, 0xffffffff, 0x000a0009,
718 	0x91c0, 0xffffffff, 0x000d000c,
719 	0x9200, 0xffffffff, 0x00090008,
720 	0x9204, 0xffffffff, 0x000b000a,
721 	0x9208, 0xffffffff, 0x000c000f,
722 	0x920c, 0xffffffff, 0x000e000d,
723 	0x9210, 0xffffffff, 0x00110010,
724 	0x9214, 0xffffffff, 0x000a0009,
725 	0x9218, 0xffffffff, 0x000c000b,
726 	0x921c, 0xffffffff, 0x0000000f,
727 	0x9220, 0xffffffff, 0x000e000d,
728 	0x9224, 0xffffffff, 0x00110010,
729 	0x9228, 0xffffffff, 0x000b000a,
730 	0x922c, 0xffffffff, 0x000d000c,
731 	0x9230, 0xffffffff, 0x00000010,
732 	0x9234, 0xffffffff, 0x000f000e,
733 	0x9238, 0xffffffff, 0x00120011,
734 	0x923c, 0xffffffff, 0x000c000b,
735 	0x9240, 0xffffffff, 0x000e000d,
736 	0x9244, 0xffffffff, 0x00000011,
737 	0x9248, 0xffffffff, 0x0010000f,
738 	0x924c, 0xffffffff, 0x00130012,
739 	0x9250, 0xffffffff, 0x000d000c,
740 	0x9254, 0xffffffff, 0x000f000e,
741 	0x9258, 0xffffffff, 0x00100013,
742 	0x925c, 0xffffffff, 0x00120011,
743 	0x9260, 0xffffffff, 0x00150014,
744 	0x9150, 0xffffffff, 0x96940200,
745 	0x8708, 0xffffffff, 0x00900100,
746 	0xc478, 0xffffffff, 0x00000080,
747 	0xc404, 0xffffffff, 0x0020003f,
748 	0x30, 0xffffffff, 0x0000001c,
749 	0x34, 0x000f0000, 0x000f0000,
750 	0x160c, 0xffffffff, 0x00000100,
751 	0x1024, 0xffffffff, 0x00000100,
752 	0x102c, 0x00000101, 0x00000000,
753 	0x20a8, 0xffffffff, 0x00000104,
754 	0x55e4, 0xff000fff, 0x00000100,
755 	0x55e8, 0x00000001, 0x00000001,
756 	0x2f50, 0x00000001, 0x00000001,
757 	0x30cc, 0xc0000fff, 0x00000104,
758 	0xc1e4, 0x00000001, 0x00000001,
759 	0xd0c0, 0xfffffff0, 0x00000100,
760 	0xd8c0, 0xfffffff0, 0x00000100
761 };
762 
763 static const u32 verde_mgcg_cgcg_init[] =
764 {
765 	0xc400, 0xffffffff, 0xfffffffc,
766 	0x802c, 0xffffffff, 0xe0000000,
767 	0x9a60, 0xffffffff, 0x00000100,
768 	0x92a4, 0xffffffff, 0x00000100,
769 	0xc164, 0xffffffff, 0x00000100,
770 	0x9774, 0xffffffff, 0x00000100,
771 	0x8984, 0xffffffff, 0x06000100,
772 	0x8a18, 0xffffffff, 0x00000100,
773 	0x92a0, 0xffffffff, 0x00000100,
774 	0xc380, 0xffffffff, 0x00000100,
775 	0x8b28, 0xffffffff, 0x00000100,
776 	0x9144, 0xffffffff, 0x00000100,
777 	0x8d88, 0xffffffff, 0x00000100,
778 	0x8d8c, 0xffffffff, 0x00000100,
779 	0x9030, 0xffffffff, 0x00000100,
780 	0x9034, 0xffffffff, 0x00000100,
781 	0x9038, 0xffffffff, 0x00000100,
782 	0x903c, 0xffffffff, 0x00000100,
783 	0xad80, 0xffffffff, 0x00000100,
784 	0xac54, 0xffffffff, 0x00000100,
785 	0x897c, 0xffffffff, 0x06000100,
786 	0x9868, 0xffffffff, 0x00000100,
787 	0x9510, 0xffffffff, 0x00000100,
788 	0xaf04, 0xffffffff, 0x00000100,
789 	0xae04, 0xffffffff, 0x00000100,
790 	0x949c, 0xffffffff, 0x00000100,
791 	0x802c, 0xffffffff, 0xe0000000,
792 	0x9160, 0xffffffff, 0x00010000,
793 	0x9164, 0xffffffff, 0x00030002,
794 	0x9168, 0xffffffff, 0x00040007,
795 	0x916c, 0xffffffff, 0x00060005,
796 	0x9170, 0xffffffff, 0x00090008,
797 	0x9174, 0xffffffff, 0x00020001,
798 	0x9178, 0xffffffff, 0x00040003,
799 	0x917c, 0xffffffff, 0x00000007,
800 	0x9180, 0xffffffff, 0x00060005,
801 	0x9184, 0xffffffff, 0x00090008,
802 	0x9188, 0xffffffff, 0x00030002,
803 	0x918c, 0xffffffff, 0x00050004,
804 	0x9190, 0xffffffff, 0x00000008,
805 	0x9194, 0xffffffff, 0x00070006,
806 	0x9198, 0xffffffff, 0x000a0009,
807 	0x919c, 0xffffffff, 0x00040003,
808 	0x91a0, 0xffffffff, 0x00060005,
809 	0x91a4, 0xffffffff, 0x00000009,
810 	0x91a8, 0xffffffff, 0x00080007,
811 	0x91ac, 0xffffffff, 0x000b000a,
812 	0x91b0, 0xffffffff, 0x00050004,
813 	0x91b4, 0xffffffff, 0x00070006,
814 	0x91b8, 0xffffffff, 0x0008000b,
815 	0x91bc, 0xffffffff, 0x000a0009,
816 	0x91c0, 0xffffffff, 0x000d000c,
817 	0x9200, 0xffffffff, 0x00090008,
818 	0x9204, 0xffffffff, 0x000b000a,
819 	0x9208, 0xffffffff, 0x000c000f,
820 	0x920c, 0xffffffff, 0x000e000d,
821 	0x9210, 0xffffffff, 0x00110010,
822 	0x9214, 0xffffffff, 0x000a0009,
823 	0x9218, 0xffffffff, 0x000c000b,
824 	0x921c, 0xffffffff, 0x0000000f,
825 	0x9220, 0xffffffff, 0x000e000d,
826 	0x9224, 0xffffffff, 0x00110010,
827 	0x9228, 0xffffffff, 0x000b000a,
828 	0x922c, 0xffffffff, 0x000d000c,
829 	0x9230, 0xffffffff, 0x00000010,
830 	0x9234, 0xffffffff, 0x000f000e,
831 	0x9238, 0xffffffff, 0x00120011,
832 	0x923c, 0xffffffff, 0x000c000b,
833 	0x9240, 0xffffffff, 0x000e000d,
834 	0x9244, 0xffffffff, 0x00000011,
835 	0x9248, 0xffffffff, 0x0010000f,
836 	0x924c, 0xffffffff, 0x00130012,
837 	0x9250, 0xffffffff, 0x000d000c,
838 	0x9254, 0xffffffff, 0x000f000e,
839 	0x9258, 0xffffffff, 0x00100013,
840 	0x925c, 0xffffffff, 0x00120011,
841 	0x9260, 0xffffffff, 0x00150014,
842 	0x9150, 0xffffffff, 0x96940200,
843 	0x8708, 0xffffffff, 0x00900100,
844 	0xc478, 0xffffffff, 0x00000080,
845 	0xc404, 0xffffffff, 0x0020003f,
846 	0x30, 0xffffffff, 0x0000001c,
847 	0x34, 0x000f0000, 0x000f0000,
848 	0x160c, 0xffffffff, 0x00000100,
849 	0x1024, 0xffffffff, 0x00000100,
850 	0x102c, 0x00000101, 0x00000000,
851 	0x20a8, 0xffffffff, 0x00000104,
852 	0x264c, 0x000c0000, 0x000c0000,
853 	0x2648, 0x000c0000, 0x000c0000,
854 	0x55e4, 0xff000fff, 0x00000100,
855 	0x55e8, 0x00000001, 0x00000001,
856 	0x2f50, 0x00000001, 0x00000001,
857 	0x30cc, 0xc0000fff, 0x00000104,
858 	0xc1e4, 0x00000001, 0x00000001,
859 	0xd0c0, 0xfffffff0, 0x00000100,
860 	0xd8c0, 0xfffffff0, 0x00000100
861 };
862 
863 static const u32 oland_mgcg_cgcg_init[] =
864 {
865 	0xc400, 0xffffffff, 0xfffffffc,
866 	0x802c, 0xffffffff, 0xe0000000,
867 	0x9a60, 0xffffffff, 0x00000100,
868 	0x92a4, 0xffffffff, 0x00000100,
869 	0xc164, 0xffffffff, 0x00000100,
870 	0x9774, 0xffffffff, 0x00000100,
871 	0x8984, 0xffffffff, 0x06000100,
872 	0x8a18, 0xffffffff, 0x00000100,
873 	0x92a0, 0xffffffff, 0x00000100,
874 	0xc380, 0xffffffff, 0x00000100,
875 	0x8b28, 0xffffffff, 0x00000100,
876 	0x9144, 0xffffffff, 0x00000100,
877 	0x8d88, 0xffffffff, 0x00000100,
878 	0x8d8c, 0xffffffff, 0x00000100,
879 	0x9030, 0xffffffff, 0x00000100,
880 	0x9034, 0xffffffff, 0x00000100,
881 	0x9038, 0xffffffff, 0x00000100,
882 	0x903c, 0xffffffff, 0x00000100,
883 	0xad80, 0xffffffff, 0x00000100,
884 	0xac54, 0xffffffff, 0x00000100,
885 	0x897c, 0xffffffff, 0x06000100,
886 	0x9868, 0xffffffff, 0x00000100,
887 	0x9510, 0xffffffff, 0x00000100,
888 	0xaf04, 0xffffffff, 0x00000100,
889 	0xae04, 0xffffffff, 0x00000100,
890 	0x949c, 0xffffffff, 0x00000100,
891 	0x802c, 0xffffffff, 0xe0000000,
892 	0x9160, 0xffffffff, 0x00010000,
893 	0x9164, 0xffffffff, 0x00030002,
894 	0x9168, 0xffffffff, 0x00040007,
895 	0x916c, 0xffffffff, 0x00060005,
896 	0x9170, 0xffffffff, 0x00090008,
897 	0x9174, 0xffffffff, 0x00020001,
898 	0x9178, 0xffffffff, 0x00040003,
899 	0x917c, 0xffffffff, 0x00000007,
900 	0x9180, 0xffffffff, 0x00060005,
901 	0x9184, 0xffffffff, 0x00090008,
902 	0x9188, 0xffffffff, 0x00030002,
903 	0x918c, 0xffffffff, 0x00050004,
904 	0x9190, 0xffffffff, 0x00000008,
905 	0x9194, 0xffffffff, 0x00070006,
906 	0x9198, 0xffffffff, 0x000a0009,
907 	0x919c, 0xffffffff, 0x00040003,
908 	0x91a0, 0xffffffff, 0x00060005,
909 	0x91a4, 0xffffffff, 0x00000009,
910 	0x91a8, 0xffffffff, 0x00080007,
911 	0x91ac, 0xffffffff, 0x000b000a,
912 	0x91b0, 0xffffffff, 0x00050004,
913 	0x91b4, 0xffffffff, 0x00070006,
914 	0x91b8, 0xffffffff, 0x0008000b,
915 	0x91bc, 0xffffffff, 0x000a0009,
916 	0x91c0, 0xffffffff, 0x000d000c,
917 	0x91c4, 0xffffffff, 0x00060005,
918 	0x91c8, 0xffffffff, 0x00080007,
919 	0x91cc, 0xffffffff, 0x0000000b,
920 	0x91d0, 0xffffffff, 0x000a0009,
921 	0x91d4, 0xffffffff, 0x000d000c,
922 	0x9150, 0xffffffff, 0x96940200,
923 	0x8708, 0xffffffff, 0x00900100,
924 	0xc478, 0xffffffff, 0x00000080,
925 	0xc404, 0xffffffff, 0x0020003f,
926 	0x30, 0xffffffff, 0x0000001c,
927 	0x34, 0x000f0000, 0x000f0000,
928 	0x160c, 0xffffffff, 0x00000100,
929 	0x1024, 0xffffffff, 0x00000100,
930 	0x102c, 0x00000101, 0x00000000,
931 	0x20a8, 0xffffffff, 0x00000104,
932 	0x264c, 0x000c0000, 0x000c0000,
933 	0x2648, 0x000c0000, 0x000c0000,
934 	0x55e4, 0xff000fff, 0x00000100,
935 	0x55e8, 0x00000001, 0x00000001,
936 	0x2f50, 0x00000001, 0x00000001,
937 	0x30cc, 0xc0000fff, 0x00000104,
938 	0xc1e4, 0x00000001, 0x00000001,
939 	0xd0c0, 0xfffffff0, 0x00000100,
940 	0xd8c0, 0xfffffff0, 0x00000100
941 };
942 
943 static const u32 hainan_mgcg_cgcg_init[] =
944 {
945 	0xc400, 0xffffffff, 0xfffffffc,
946 	0x802c, 0xffffffff, 0xe0000000,
947 	0x9a60, 0xffffffff, 0x00000100,
948 	0x92a4, 0xffffffff, 0x00000100,
949 	0xc164, 0xffffffff, 0x00000100,
950 	0x9774, 0xffffffff, 0x00000100,
951 	0x8984, 0xffffffff, 0x06000100,
952 	0x8a18, 0xffffffff, 0x00000100,
953 	0x92a0, 0xffffffff, 0x00000100,
954 	0xc380, 0xffffffff, 0x00000100,
955 	0x8b28, 0xffffffff, 0x00000100,
956 	0x9144, 0xffffffff, 0x00000100,
957 	0x8d88, 0xffffffff, 0x00000100,
958 	0x8d8c, 0xffffffff, 0x00000100,
959 	0x9030, 0xffffffff, 0x00000100,
960 	0x9034, 0xffffffff, 0x00000100,
961 	0x9038, 0xffffffff, 0x00000100,
962 	0x903c, 0xffffffff, 0x00000100,
963 	0xad80, 0xffffffff, 0x00000100,
964 	0xac54, 0xffffffff, 0x00000100,
965 	0x897c, 0xffffffff, 0x06000100,
966 	0x9868, 0xffffffff, 0x00000100,
967 	0x9510, 0xffffffff, 0x00000100,
968 	0xaf04, 0xffffffff, 0x00000100,
969 	0xae04, 0xffffffff, 0x00000100,
970 	0x949c, 0xffffffff, 0x00000100,
971 	0x802c, 0xffffffff, 0xe0000000,
972 	0x9160, 0xffffffff, 0x00010000,
973 	0x9164, 0xffffffff, 0x00030002,
974 	0x9168, 0xffffffff, 0x00040007,
975 	0x916c, 0xffffffff, 0x00060005,
976 	0x9170, 0xffffffff, 0x00090008,
977 	0x9174, 0xffffffff, 0x00020001,
978 	0x9178, 0xffffffff, 0x00040003,
979 	0x917c, 0xffffffff, 0x00000007,
980 	0x9180, 0xffffffff, 0x00060005,
981 	0x9184, 0xffffffff, 0x00090008,
982 	0x9188, 0xffffffff, 0x00030002,
983 	0x918c, 0xffffffff, 0x00050004,
984 	0x9190, 0xffffffff, 0x00000008,
985 	0x9194, 0xffffffff, 0x00070006,
986 	0x9198, 0xffffffff, 0x000a0009,
987 	0x919c, 0xffffffff, 0x00040003,
988 	0x91a0, 0xffffffff, 0x00060005,
989 	0x91a4, 0xffffffff, 0x00000009,
990 	0x91a8, 0xffffffff, 0x00080007,
991 	0x91ac, 0xffffffff, 0x000b000a,
992 	0x91b0, 0xffffffff, 0x00050004,
993 	0x91b4, 0xffffffff, 0x00070006,
994 	0x91b8, 0xffffffff, 0x0008000b,
995 	0x91bc, 0xffffffff, 0x000a0009,
996 	0x91c0, 0xffffffff, 0x000d000c,
997 	0x91c4, 0xffffffff, 0x00060005,
998 	0x91c8, 0xffffffff, 0x00080007,
999 	0x91cc, 0xffffffff, 0x0000000b,
1000 	0x91d0, 0xffffffff, 0x000a0009,
1001 	0x91d4, 0xffffffff, 0x000d000c,
1002 	0x9150, 0xffffffff, 0x96940200,
1003 	0x8708, 0xffffffff, 0x00900100,
1004 	0xc478, 0xffffffff, 0x00000080,
1005 	0xc404, 0xffffffff, 0x0020003f,
1006 	0x30, 0xffffffff, 0x0000001c,
1007 	0x34, 0x000f0000, 0x000f0000,
1008 	0x160c, 0xffffffff, 0x00000100,
1009 	0x1024, 0xffffffff, 0x00000100,
1010 	0x20a8, 0xffffffff, 0x00000104,
1011 	0x264c, 0x000c0000, 0x000c0000,
1012 	0x2648, 0x000c0000, 0x000c0000,
1013 	0x2f50, 0x00000001, 0x00000001,
1014 	0x30cc, 0xc0000fff, 0x00000104,
1015 	0xc1e4, 0x00000001, 0x00000001,
1016 	0xd0c0, 0xfffffff0, 0x00000100,
1017 	0xd8c0, 0xfffffff0, 0x00000100
1018 };
1019 
1020 static u32 verde_pg_init[] =
1021 {
1022 	0x353c, 0xffffffff, 0x40000,
1023 	0x3538, 0xffffffff, 0x200010ff,
1024 	0x353c, 0xffffffff, 0x0,
1025 	0x353c, 0xffffffff, 0x0,
1026 	0x353c, 0xffffffff, 0x0,
1027 	0x353c, 0xffffffff, 0x0,
1028 	0x353c, 0xffffffff, 0x0,
1029 	0x353c, 0xffffffff, 0x7007,
1030 	0x3538, 0xffffffff, 0x300010ff,
1031 	0x353c, 0xffffffff, 0x0,
1032 	0x353c, 0xffffffff, 0x0,
1033 	0x353c, 0xffffffff, 0x0,
1034 	0x353c, 0xffffffff, 0x0,
1035 	0x353c, 0xffffffff, 0x0,
1036 	0x353c, 0xffffffff, 0x400000,
1037 	0x3538, 0xffffffff, 0x100010ff,
1038 	0x353c, 0xffffffff, 0x0,
1039 	0x353c, 0xffffffff, 0x0,
1040 	0x353c, 0xffffffff, 0x0,
1041 	0x353c, 0xffffffff, 0x0,
1042 	0x353c, 0xffffffff, 0x0,
1043 	0x353c, 0xffffffff, 0x120200,
1044 	0x3538, 0xffffffff, 0x500010ff,
1045 	0x353c, 0xffffffff, 0x0,
1046 	0x353c, 0xffffffff, 0x0,
1047 	0x353c, 0xffffffff, 0x0,
1048 	0x353c, 0xffffffff, 0x0,
1049 	0x353c, 0xffffffff, 0x0,
1050 	0x353c, 0xffffffff, 0x1e1e16,
1051 	0x3538, 0xffffffff, 0x600010ff,
1052 	0x353c, 0xffffffff, 0x0,
1053 	0x353c, 0xffffffff, 0x0,
1054 	0x353c, 0xffffffff, 0x0,
1055 	0x353c, 0xffffffff, 0x0,
1056 	0x353c, 0xffffffff, 0x0,
1057 	0x353c, 0xffffffff, 0x171f1e,
1058 	0x3538, 0xffffffff, 0x700010ff,
1059 	0x353c, 0xffffffff, 0x0,
1060 	0x353c, 0xffffffff, 0x0,
1061 	0x353c, 0xffffffff, 0x0,
1062 	0x353c, 0xffffffff, 0x0,
1063 	0x353c, 0xffffffff, 0x0,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x3538, 0xffffffff, 0x9ff,
1066 	0x3500, 0xffffffff, 0x0,
1067 	0x3504, 0xffffffff, 0x10000800,
1068 	0x3504, 0xffffffff, 0xf,
1069 	0x3504, 0xffffffff, 0xf,
1070 	0x3500, 0xffffffff, 0x4,
1071 	0x3504, 0xffffffff, 0x1000051e,
1072 	0x3504, 0xffffffff, 0xffff,
1073 	0x3504, 0xffffffff, 0xffff,
1074 	0x3500, 0xffffffff, 0x8,
1075 	0x3504, 0xffffffff, 0x80500,
1076 	0x3500, 0xffffffff, 0x12,
1077 	0x3504, 0xffffffff, 0x9050c,
1078 	0x3500, 0xffffffff, 0x1d,
1079 	0x3504, 0xffffffff, 0xb052c,
1080 	0x3500, 0xffffffff, 0x2a,
1081 	0x3504, 0xffffffff, 0x1053e,
1082 	0x3500, 0xffffffff, 0x2d,
1083 	0x3504, 0xffffffff, 0x10546,
1084 	0x3500, 0xffffffff, 0x30,
1085 	0x3504, 0xffffffff, 0xa054e,
1086 	0x3500, 0xffffffff, 0x3c,
1087 	0x3504, 0xffffffff, 0x1055f,
1088 	0x3500, 0xffffffff, 0x3f,
1089 	0x3504, 0xffffffff, 0x10567,
1090 	0x3500, 0xffffffff, 0x42,
1091 	0x3504, 0xffffffff, 0x1056f,
1092 	0x3500, 0xffffffff, 0x45,
1093 	0x3504, 0xffffffff, 0x10572,
1094 	0x3500, 0xffffffff, 0x48,
1095 	0x3504, 0xffffffff, 0x20575,
1096 	0x3500, 0xffffffff, 0x4c,
1097 	0x3504, 0xffffffff, 0x190801,
1098 	0x3500, 0xffffffff, 0x67,
1099 	0x3504, 0xffffffff, 0x1082a,
1100 	0x3500, 0xffffffff, 0x6a,
1101 	0x3504, 0xffffffff, 0x1b082d,
1102 	0x3500, 0xffffffff, 0x87,
1103 	0x3504, 0xffffffff, 0x310851,
1104 	0x3500, 0xffffffff, 0xba,
1105 	0x3504, 0xffffffff, 0x891,
1106 	0x3500, 0xffffffff, 0xbc,
1107 	0x3504, 0xffffffff, 0x893,
1108 	0x3500, 0xffffffff, 0xbe,
1109 	0x3504, 0xffffffff, 0x20895,
1110 	0x3500, 0xffffffff, 0xc2,
1111 	0x3504, 0xffffffff, 0x20899,
1112 	0x3500, 0xffffffff, 0xc6,
1113 	0x3504, 0xffffffff, 0x2089d,
1114 	0x3500, 0xffffffff, 0xca,
1115 	0x3504, 0xffffffff, 0x8a1,
1116 	0x3500, 0xffffffff, 0xcc,
1117 	0x3504, 0xffffffff, 0x8a3,
1118 	0x3500, 0xffffffff, 0xce,
1119 	0x3504, 0xffffffff, 0x308a5,
1120 	0x3500, 0xffffffff, 0xd3,
1121 	0x3504, 0xffffffff, 0x6d08cd,
1122 	0x3500, 0xffffffff, 0x142,
1123 	0x3504, 0xffffffff, 0x2000095a,
1124 	0x3504, 0xffffffff, 0x1,
1125 	0x3500, 0xffffffff, 0x144,
1126 	0x3504, 0xffffffff, 0x301f095b,
1127 	0x3500, 0xffffffff, 0x165,
1128 	0x3504, 0xffffffff, 0xc094d,
1129 	0x3500, 0xffffffff, 0x173,
1130 	0x3504, 0xffffffff, 0xf096d,
1131 	0x3500, 0xffffffff, 0x184,
1132 	0x3504, 0xffffffff, 0x15097f,
1133 	0x3500, 0xffffffff, 0x19b,
1134 	0x3504, 0xffffffff, 0xc0998,
1135 	0x3500, 0xffffffff, 0x1a9,
1136 	0x3504, 0xffffffff, 0x409a7,
1137 	0x3500, 0xffffffff, 0x1af,
1138 	0x3504, 0xffffffff, 0xcdc,
1139 	0x3500, 0xffffffff, 0x1b1,
1140 	0x3504, 0xffffffff, 0x800,
1141 	0x3508, 0xffffffff, 0x6c9b2000,
1142 	0x3510, 0xfc00, 0x2000,
1143 	0x3544, 0xffffffff, 0xfc0,
1144 	0x28d4, 0x00000100, 0x100
1145 };
1146 
1147 static void si_init_golden_registers(struct radeon_device *rdev)
1148 {
1149 	switch (rdev->family) {
1150 	case CHIP_TAHITI:
1151 		radeon_program_register_sequence(rdev,
1152 						 tahiti_golden_registers,
1153 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1154 		radeon_program_register_sequence(rdev,
1155 						 tahiti_golden_rlc_registers,
1156 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1157 		radeon_program_register_sequence(rdev,
1158 						 tahiti_mgcg_cgcg_init,
1159 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1160 		radeon_program_register_sequence(rdev,
1161 						 tahiti_golden_registers2,
1162 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1163 		break;
1164 	case CHIP_PITCAIRN:
1165 		radeon_program_register_sequence(rdev,
1166 						 pitcairn_golden_registers,
1167 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1168 		radeon_program_register_sequence(rdev,
1169 						 pitcairn_golden_rlc_registers,
1170 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1171 		radeon_program_register_sequence(rdev,
1172 						 pitcairn_mgcg_cgcg_init,
1173 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1174 		break;
1175 	case CHIP_VERDE:
1176 		radeon_program_register_sequence(rdev,
1177 						 verde_golden_registers,
1178 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1179 		radeon_program_register_sequence(rdev,
1180 						 verde_golden_rlc_registers,
1181 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1182 		radeon_program_register_sequence(rdev,
1183 						 verde_mgcg_cgcg_init,
1184 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1185 		radeon_program_register_sequence(rdev,
1186 						 verde_pg_init,
1187 						 (const u32)ARRAY_SIZE(verde_pg_init));
1188 		break;
1189 	case CHIP_OLAND:
1190 		radeon_program_register_sequence(rdev,
1191 						 oland_golden_registers,
1192 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1193 		radeon_program_register_sequence(rdev,
1194 						 oland_golden_rlc_registers,
1195 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1196 		radeon_program_register_sequence(rdev,
1197 						 oland_mgcg_cgcg_init,
1198 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1199 		break;
1200 	case CHIP_HAINAN:
1201 		radeon_program_register_sequence(rdev,
1202 						 hainan_golden_registers,
1203 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1204 		radeon_program_register_sequence(rdev,
1205 						 hainan_golden_registers2,
1206 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1207 		radeon_program_register_sequence(rdev,
1208 						 hainan_mgcg_cgcg_init,
1209 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1210 		break;
1211 	default:
1212 		break;
1213 	}
1214 }
1215 
1216 #define PCIE_BUS_CLK                10000
1217 #define TCLK                        (PCIE_BUS_CLK / 10)
1218 
1219 /**
1220  * si_get_xclk - get the xclk
1221  *
1222  * @rdev: radeon_device pointer
1223  *
1224  * Returns the reference clock used by the gfx engine
1225  * (SI).
1226  */
1227 u32 si_get_xclk(struct radeon_device *rdev)
1228 {
1229         u32 reference_clock = rdev->clock.spll.reference_freq;
1230 	u32 tmp;
1231 
1232 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1233 	if (tmp & MUX_TCLK_TO_XCLK)
1234 		return TCLK;
1235 
1236 	tmp = RREG32(CG_CLKPIN_CNTL);
1237 	if (tmp & XTALIN_DIVIDE)
1238 		return reference_clock / 4;
1239 
1240 	return reference_clock;
1241 }
1242 
1243 /* get temperature in millidegrees */
1244 int si_get_temp(struct radeon_device *rdev)
1245 {
1246 	u32 temp;
1247 	int actual_temp = 0;
1248 
1249 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1250 		CTF_TEMP_SHIFT;
1251 
1252 	if (temp & 0x200)
1253 		actual_temp = 255;
1254 	else
1255 		actual_temp = temp & 0x1ff;
1256 
1257 	actual_temp = (actual_temp * 1000);
1258 
1259 	return actual_temp;
1260 }
1261 
1262 #define TAHITI_IO_MC_REGS_SIZE 36
1263 
1264 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1265 	{0x0000006f, 0x03044000},
1266 	{0x00000070, 0x0480c018},
1267 	{0x00000071, 0x00000040},
1268 	{0x00000072, 0x01000000},
1269 	{0x00000074, 0x000000ff},
1270 	{0x00000075, 0x00143400},
1271 	{0x00000076, 0x08ec0800},
1272 	{0x00000077, 0x040000cc},
1273 	{0x00000079, 0x00000000},
1274 	{0x0000007a, 0x21000409},
1275 	{0x0000007c, 0x00000000},
1276 	{0x0000007d, 0xe8000000},
1277 	{0x0000007e, 0x044408a8},
1278 	{0x0000007f, 0x00000003},
1279 	{0x00000080, 0x00000000},
1280 	{0x00000081, 0x01000000},
1281 	{0x00000082, 0x02000000},
1282 	{0x00000083, 0x00000000},
1283 	{0x00000084, 0xe3f3e4f4},
1284 	{0x00000085, 0x00052024},
1285 	{0x00000087, 0x00000000},
1286 	{0x00000088, 0x66036603},
1287 	{0x00000089, 0x01000000},
1288 	{0x0000008b, 0x1c0a0000},
1289 	{0x0000008c, 0xff010000},
1290 	{0x0000008e, 0xffffefff},
1291 	{0x0000008f, 0xfff3efff},
1292 	{0x00000090, 0xfff3efbf},
1293 	{0x00000094, 0x00101101},
1294 	{0x00000095, 0x00000fff},
1295 	{0x00000096, 0x00116fff},
1296 	{0x00000097, 0x60010000},
1297 	{0x00000098, 0x10010000},
1298 	{0x00000099, 0x00006000},
1299 	{0x0000009a, 0x00001000},
1300 	{0x0000009f, 0x00a77400}
1301 };
1302 
1303 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1304 	{0x0000006f, 0x03044000},
1305 	{0x00000070, 0x0480c018},
1306 	{0x00000071, 0x00000040},
1307 	{0x00000072, 0x01000000},
1308 	{0x00000074, 0x000000ff},
1309 	{0x00000075, 0x00143400},
1310 	{0x00000076, 0x08ec0800},
1311 	{0x00000077, 0x040000cc},
1312 	{0x00000079, 0x00000000},
1313 	{0x0000007a, 0x21000409},
1314 	{0x0000007c, 0x00000000},
1315 	{0x0000007d, 0xe8000000},
1316 	{0x0000007e, 0x044408a8},
1317 	{0x0000007f, 0x00000003},
1318 	{0x00000080, 0x00000000},
1319 	{0x00000081, 0x01000000},
1320 	{0x00000082, 0x02000000},
1321 	{0x00000083, 0x00000000},
1322 	{0x00000084, 0xe3f3e4f4},
1323 	{0x00000085, 0x00052024},
1324 	{0x00000087, 0x00000000},
1325 	{0x00000088, 0x66036603},
1326 	{0x00000089, 0x01000000},
1327 	{0x0000008b, 0x1c0a0000},
1328 	{0x0000008c, 0xff010000},
1329 	{0x0000008e, 0xffffefff},
1330 	{0x0000008f, 0xfff3efff},
1331 	{0x00000090, 0xfff3efbf},
1332 	{0x00000094, 0x00101101},
1333 	{0x00000095, 0x00000fff},
1334 	{0x00000096, 0x00116fff},
1335 	{0x00000097, 0x60010000},
1336 	{0x00000098, 0x10010000},
1337 	{0x00000099, 0x00006000},
1338 	{0x0000009a, 0x00001000},
1339 	{0x0000009f, 0x00a47400}
1340 };
1341 
1342 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1343 	{0x0000006f, 0x03044000},
1344 	{0x00000070, 0x0480c018},
1345 	{0x00000071, 0x00000040},
1346 	{0x00000072, 0x01000000},
1347 	{0x00000074, 0x000000ff},
1348 	{0x00000075, 0x00143400},
1349 	{0x00000076, 0x08ec0800},
1350 	{0x00000077, 0x040000cc},
1351 	{0x00000079, 0x00000000},
1352 	{0x0000007a, 0x21000409},
1353 	{0x0000007c, 0x00000000},
1354 	{0x0000007d, 0xe8000000},
1355 	{0x0000007e, 0x044408a8},
1356 	{0x0000007f, 0x00000003},
1357 	{0x00000080, 0x00000000},
1358 	{0x00000081, 0x01000000},
1359 	{0x00000082, 0x02000000},
1360 	{0x00000083, 0x00000000},
1361 	{0x00000084, 0xe3f3e4f4},
1362 	{0x00000085, 0x00052024},
1363 	{0x00000087, 0x00000000},
1364 	{0x00000088, 0x66036603},
1365 	{0x00000089, 0x01000000},
1366 	{0x0000008b, 0x1c0a0000},
1367 	{0x0000008c, 0xff010000},
1368 	{0x0000008e, 0xffffefff},
1369 	{0x0000008f, 0xfff3efff},
1370 	{0x00000090, 0xfff3efbf},
1371 	{0x00000094, 0x00101101},
1372 	{0x00000095, 0x00000fff},
1373 	{0x00000096, 0x00116fff},
1374 	{0x00000097, 0x60010000},
1375 	{0x00000098, 0x10010000},
1376 	{0x00000099, 0x00006000},
1377 	{0x0000009a, 0x00001000},
1378 	{0x0000009f, 0x00a37400}
1379 };
1380 
1381 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1382 	{0x0000006f, 0x03044000},
1383 	{0x00000070, 0x0480c018},
1384 	{0x00000071, 0x00000040},
1385 	{0x00000072, 0x01000000},
1386 	{0x00000074, 0x000000ff},
1387 	{0x00000075, 0x00143400},
1388 	{0x00000076, 0x08ec0800},
1389 	{0x00000077, 0x040000cc},
1390 	{0x00000079, 0x00000000},
1391 	{0x0000007a, 0x21000409},
1392 	{0x0000007c, 0x00000000},
1393 	{0x0000007d, 0xe8000000},
1394 	{0x0000007e, 0x044408a8},
1395 	{0x0000007f, 0x00000003},
1396 	{0x00000080, 0x00000000},
1397 	{0x00000081, 0x01000000},
1398 	{0x00000082, 0x02000000},
1399 	{0x00000083, 0x00000000},
1400 	{0x00000084, 0xe3f3e4f4},
1401 	{0x00000085, 0x00052024},
1402 	{0x00000087, 0x00000000},
1403 	{0x00000088, 0x66036603},
1404 	{0x00000089, 0x01000000},
1405 	{0x0000008b, 0x1c0a0000},
1406 	{0x0000008c, 0xff010000},
1407 	{0x0000008e, 0xffffefff},
1408 	{0x0000008f, 0xfff3efff},
1409 	{0x00000090, 0xfff3efbf},
1410 	{0x00000094, 0x00101101},
1411 	{0x00000095, 0x00000fff},
1412 	{0x00000096, 0x00116fff},
1413 	{0x00000097, 0x60010000},
1414 	{0x00000098, 0x10010000},
1415 	{0x00000099, 0x00006000},
1416 	{0x0000009a, 0x00001000},
1417 	{0x0000009f, 0x00a17730}
1418 };
1419 
1420 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1421 	{0x0000006f, 0x03044000},
1422 	{0x00000070, 0x0480c018},
1423 	{0x00000071, 0x00000040},
1424 	{0x00000072, 0x01000000},
1425 	{0x00000074, 0x000000ff},
1426 	{0x00000075, 0x00143400},
1427 	{0x00000076, 0x08ec0800},
1428 	{0x00000077, 0x040000cc},
1429 	{0x00000079, 0x00000000},
1430 	{0x0000007a, 0x21000409},
1431 	{0x0000007c, 0x00000000},
1432 	{0x0000007d, 0xe8000000},
1433 	{0x0000007e, 0x044408a8},
1434 	{0x0000007f, 0x00000003},
1435 	{0x00000080, 0x00000000},
1436 	{0x00000081, 0x01000000},
1437 	{0x00000082, 0x02000000},
1438 	{0x00000083, 0x00000000},
1439 	{0x00000084, 0xe3f3e4f4},
1440 	{0x00000085, 0x00052024},
1441 	{0x00000087, 0x00000000},
1442 	{0x00000088, 0x66036603},
1443 	{0x00000089, 0x01000000},
1444 	{0x0000008b, 0x1c0a0000},
1445 	{0x0000008c, 0xff010000},
1446 	{0x0000008e, 0xffffefff},
1447 	{0x0000008f, 0xfff3efff},
1448 	{0x00000090, 0xfff3efbf},
1449 	{0x00000094, 0x00101101},
1450 	{0x00000095, 0x00000fff},
1451 	{0x00000096, 0x00116fff},
1452 	{0x00000097, 0x60010000},
1453 	{0x00000098, 0x10010000},
1454 	{0x00000099, 0x00006000},
1455 	{0x0000009a, 0x00001000},
1456 	{0x0000009f, 0x00a07730}
1457 };
1458 
1459 /* ucode loading */
1460 int si_mc_load_microcode(struct radeon_device *rdev)
1461 {
1462 	const __be32 *fw_data;
1463 	u32 running, blackout = 0;
1464 	u32 *io_mc_regs;
1465 	int i, regs_size, ucode_size;
1466 
1467 	if (!rdev->mc_fw)
1468 		return -EINVAL;
1469 
1470 	ucode_size = rdev->mc_fw->datasize / 4;
1471 
1472 	switch (rdev->family) {
1473 	case CHIP_TAHITI:
1474 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1475 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1476 		break;
1477 	case CHIP_PITCAIRN:
1478 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1479 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1480 		break;
1481 	case CHIP_VERDE:
1482 	default:
1483 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1484 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1485 		break;
1486 	case CHIP_OLAND:
1487 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1488 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1489 		break;
1490 	case CHIP_HAINAN:
1491 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1492 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1493 		break;
1494 	}
1495 
1496 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1497 
1498 	if (running == 0) {
1499 		if (running) {
1500 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1501 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1502 		}
1503 
1504 		/* reset the engine and set to writable */
1505 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1506 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1507 
1508 		/* load mc io regs */
1509 		for (i = 0; i < regs_size; i++) {
1510 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1511 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1512 		}
1513 		/* load the MC ucode */
1514 		fw_data = (const __be32 *)rdev->mc_fw->data;
1515 		for (i = 0; i < ucode_size; i++)
1516 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1517 
1518 		/* put the engine back into the active state */
1519 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1520 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1521 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1522 
1523 		/* wait for training to complete */
1524 		for (i = 0; i < rdev->usec_timeout; i++) {
1525 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1526 				break;
1527 			udelay(1);
1528 		}
1529 		for (i = 0; i < rdev->usec_timeout; i++) {
1530 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1531 				break;
1532 			udelay(1);
1533 		}
1534 
1535 		if (running)
1536 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1537 	}
1538 
1539 	return 0;
1540 }
1541 
1542 static int si_init_microcode(struct radeon_device *rdev)
1543 {
1544 	const char *chip_name;
1545 	const char *rlc_chip_name;
1546 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1547 	size_t smc_req_size, mc2_req_size;
1548 	char fw_name[30];
1549 	int err;
1550 
1551 	DRM_DEBUG("\n");
1552 
1553 	switch (rdev->family) {
1554 	case CHIP_TAHITI:
1555 		chip_name = "TAHITI";
1556 		rlc_chip_name = "TAHITI";
1557 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1558 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1559 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1560 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1561 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1562 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1563 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1564 		break;
1565 	case CHIP_PITCAIRN:
1566 		chip_name = "PITCAIRN";
1567 		rlc_chip_name = "PITCAIRN";
1568 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1569 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1570 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1571 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1572 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1573 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1574 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1575 		break;
1576 	case CHIP_VERDE:
1577 		chip_name = "VERDE";
1578 		rlc_chip_name = "VERDE";
1579 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1580 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1581 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1582 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1583 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1584 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1585 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1586 		break;
1587 	case CHIP_OLAND:
1588 		chip_name = "OLAND";
1589 		rlc_chip_name = "OLAND";
1590 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1591 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1592 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1593 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1594 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1595 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1596 		break;
1597 	case CHIP_HAINAN:
1598 		chip_name = "HAINAN";
1599 		rlc_chip_name = "HAINAN";
1600 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1601 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1602 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1603 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1604 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1605 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1606 		break;
1607 	default: BUG();
1608 	}
1609 
1610 	DRM_INFO("Loading %s Microcode\n", chip_name);
1611 
1612 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1613 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1614 	if (err)
1615 		goto out;
1616 	if (rdev->pfp_fw->datasize != pfp_req_size) {
1617 		printk(KERN_ERR
1618 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1619 		       rdev->pfp_fw->datasize, fw_name);
1620 		err = -EINVAL;
1621 		goto out;
1622 	}
1623 
1624 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1625 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1626 	if (err)
1627 		goto out;
1628 	if (rdev->me_fw->datasize != me_req_size) {
1629 		printk(KERN_ERR
1630 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1631 		       rdev->me_fw->datasize, fw_name);
1632 		err = -EINVAL;
1633 	}
1634 
1635 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1636 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1637 	if (err)
1638 		goto out;
1639 	if (rdev->ce_fw->datasize != ce_req_size) {
1640 		printk(KERN_ERR
1641 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1642 		       rdev->ce_fw->datasize, fw_name);
1643 		err = -EINVAL;
1644 	}
1645 
1646 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", rlc_chip_name);
1647 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1648 	if (err)
1649 		goto out;
1650 	if (rdev->rlc_fw->datasize != rlc_req_size) {
1651 		printk(KERN_ERR
1652 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1653 		       rdev->rlc_fw->datasize, fw_name);
1654 		err = -EINVAL;
1655 	}
1656 
1657 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
1658 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1659 	if (err) {
1660 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1661 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1662 		if (err)
1663 			goto out;
1664 	}
1665 	if ((rdev->mc_fw->datasize != mc_req_size) &&
1666 	    (rdev->mc_fw->datasize != mc2_req_size)) {
1667 		printk(KERN_ERR
1668 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1669 		       rdev->mc_fw->datasize, fw_name);
1670 		err = -EINVAL;
1671 	}
1672 	DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
1673 
1674 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1675 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1676 	if (err) {
1677 		printk(KERN_ERR
1678 		       "smc: error loading firmware \"%s\"\n",
1679 		       fw_name);
1680 		release_firmware(rdev->smc_fw);
1681 		rdev->smc_fw = NULL;
1682 		err = 0;
1683 	} else if (rdev->smc_fw->datasize != smc_req_size) {
1684 		printk(KERN_ERR
1685 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1686 		       rdev->smc_fw->datasize, fw_name);
1687 		err = -EINVAL;
1688 	}
1689 
1690 out:
1691 	if (err) {
1692 		if (err != -EINVAL)
1693 			printk(KERN_ERR
1694 			       "si_cp: Failed to load firmware \"%s\"\n",
1695 			       fw_name);
1696 		release_firmware(rdev->pfp_fw);
1697 		rdev->pfp_fw = NULL;
1698 		release_firmware(rdev->me_fw);
1699 		rdev->me_fw = NULL;
1700 		release_firmware(rdev->ce_fw);
1701 		rdev->ce_fw = NULL;
1702 		release_firmware(rdev->rlc_fw);
1703 		rdev->rlc_fw = NULL;
1704 		release_firmware(rdev->mc_fw);
1705 		rdev->mc_fw = NULL;
1706 		release_firmware(rdev->smc_fw);
1707 		rdev->smc_fw = NULL;
1708 	}
1709 	return err;
1710 }
1711 
1712 /**
1713  * si_fini_microcode - drop the firmwares image references
1714  *
1715  * @rdev: radeon_device pointer
1716  *
1717  * Drop the pfp, me, rlc, mc and ce firmware image references.
1718  * Called at driver shutdown.
1719  */
1720 static void si_fini_microcode(struct radeon_device *rdev)
1721 {
1722 	release_firmware(rdev->pfp_fw);
1723 	rdev->pfp_fw = NULL;
1724 	release_firmware(rdev->me_fw);
1725 	rdev->me_fw = NULL;
1726 	release_firmware(rdev->rlc_fw);
1727 	rdev->rlc_fw = NULL;
1728 	release_firmware(rdev->mc_fw);
1729 	rdev->mc_fw = NULL;
1730 	release_firmware(rdev->smc_fw);
1731 	rdev->smc_fw = NULL;
1732 	release_firmware(rdev->ce_fw);
1733 	rdev->ce_fw = NULL;
1734 }
1735 
1736 /* watermark setup */
1737 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1738 				   struct radeon_crtc *radeon_crtc,
1739 				   struct drm_display_mode *mode,
1740 				   struct drm_display_mode *other_mode)
1741 {
1742 	u32 tmp, buffer_alloc, i;
1743 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1744 	/*
1745 	 * Line Buffer Setup
1746 	 * There are 3 line buffers, each one shared by 2 display controllers.
1747 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1748 	 * the display controllers.  The paritioning is done via one of four
1749 	 * preset allocations specified in bits 21:20:
1750 	 *  0 - half lb
1751 	 *  2 - whole lb, other crtc must be disabled
1752 	 */
1753 	/* this can get tricky if we have two large displays on a paired group
1754 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1755 	 * non-linked crtcs for maximum line buffer allocation.
1756 	 */
1757 	if (radeon_crtc->base.enabled && mode) {
1758 		if (other_mode) {
1759 			tmp = 0; /* 1/2 */
1760 			buffer_alloc = 1;
1761 		} else {
1762 			tmp = 2; /* whole */
1763 			buffer_alloc = 2;
1764 		}
1765 	} else {
1766 		tmp = 0;
1767 		buffer_alloc = 0;
1768 	}
1769 
1770 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1771 	       DC_LB_MEMORY_CONFIG(tmp));
1772 
1773 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1774 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1775 	for (i = 0; i < rdev->usec_timeout; i++) {
1776 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1777 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1778 			break;
1779 		udelay(1);
1780 	}
1781 
1782 	if (radeon_crtc->base.enabled && mode) {
1783 		switch (tmp) {
1784 		case 0:
1785 		default:
1786 			return 4096 * 2;
1787 		case 2:
1788 			return 8192 * 2;
1789 		}
1790 	}
1791 
1792 	/* controller not enabled, so no lb used */
1793 	return 0;
1794 }
1795 
1796 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1797 {
1798 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1799 
1800 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1801 	case 0:
1802 	default:
1803 		return 1;
1804 	case 1:
1805 		return 2;
1806 	case 2:
1807 		return 4;
1808 	case 3:
1809 		return 8;
1810 	case 4:
1811 		return 3;
1812 	case 5:
1813 		return 6;
1814 	case 6:
1815 		return 10;
1816 	case 7:
1817 		return 12;
1818 	case 8:
1819 		return 16;
1820 	}
1821 }
1822 
1823 struct dce6_wm_params {
1824 	u32 dram_channels; /* number of dram channels */
1825 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1826 	u32 sclk;          /* engine clock in kHz */
1827 	u32 disp_clk;      /* display clock in kHz */
1828 	u32 src_width;     /* viewport width */
1829 	u32 active_time;   /* active display time in ns */
1830 	u32 blank_time;    /* blank time in ns */
1831 	bool interlaced;    /* mode is interlaced */
1832 	fixed20_12 vsc;    /* vertical scale ratio */
1833 	u32 num_heads;     /* number of active crtcs */
1834 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1835 	u32 lb_size;       /* line buffer allocated to pipe */
1836 	u32 vtaps;         /* vertical scaler taps */
1837 };
1838 
1839 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1840 {
1841 	/* Calculate raw DRAM Bandwidth */
1842 	fixed20_12 dram_efficiency; /* 0.7 */
1843 	fixed20_12 yclk, dram_channels, bandwidth;
1844 	fixed20_12 a;
1845 
1846 	a.full = dfixed_const(1000);
1847 	yclk.full = dfixed_const(wm->yclk);
1848 	yclk.full = dfixed_div(yclk, a);
1849 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1850 	a.full = dfixed_const(10);
1851 	dram_efficiency.full = dfixed_const(7);
1852 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1853 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1854 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1855 
1856 	return dfixed_trunc(bandwidth);
1857 }
1858 
1859 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1860 {
1861 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1862 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1863 	fixed20_12 yclk, dram_channels, bandwidth;
1864 	fixed20_12 a;
1865 
1866 	a.full = dfixed_const(1000);
1867 	yclk.full = dfixed_const(wm->yclk);
1868 	yclk.full = dfixed_div(yclk, a);
1869 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1870 	a.full = dfixed_const(10);
1871 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1872 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1873 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1874 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1875 
1876 	return dfixed_trunc(bandwidth);
1877 }
1878 
1879 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1880 {
1881 	/* Calculate the display Data return Bandwidth */
1882 	fixed20_12 return_efficiency; /* 0.8 */
1883 	fixed20_12 sclk, bandwidth;
1884 	fixed20_12 a;
1885 
1886 	a.full = dfixed_const(1000);
1887 	sclk.full = dfixed_const(wm->sclk);
1888 	sclk.full = dfixed_div(sclk, a);
1889 	a.full = dfixed_const(10);
1890 	return_efficiency.full = dfixed_const(8);
1891 	return_efficiency.full = dfixed_div(return_efficiency, a);
1892 	a.full = dfixed_const(32);
1893 	bandwidth.full = dfixed_mul(a, sclk);
1894 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1895 
1896 	return dfixed_trunc(bandwidth);
1897 }
1898 
1899 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1900 {
1901 	return 32;
1902 }
1903 
1904 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1905 {
1906 	/* Calculate the DMIF Request Bandwidth */
1907 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1908 	fixed20_12 disp_clk, sclk, bandwidth;
1909 	fixed20_12 a, b1, b2;
1910 	u32 min_bandwidth;
1911 
1912 	a.full = dfixed_const(1000);
1913 	disp_clk.full = dfixed_const(wm->disp_clk);
1914 	disp_clk.full = dfixed_div(disp_clk, a);
1915 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1916 	b1.full = dfixed_mul(a, disp_clk);
1917 
1918 	a.full = dfixed_const(1000);
1919 	sclk.full = dfixed_const(wm->sclk);
1920 	sclk.full = dfixed_div(sclk, a);
1921 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1922 	b2.full = dfixed_mul(a, sclk);
1923 
1924 	a.full = dfixed_const(10);
1925 	disp_clk_request_efficiency.full = dfixed_const(8);
1926 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1927 
1928 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1929 
1930 	a.full = dfixed_const(min_bandwidth);
1931 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1932 
1933 	return dfixed_trunc(bandwidth);
1934 }
1935 
1936 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1937 {
1938 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1939 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1940 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1941 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1942 
1943 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1944 }
1945 
1946 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1947 {
1948 	/* Calculate the display mode Average Bandwidth
1949 	 * DisplayMode should contain the source and destination dimensions,
1950 	 * timing, etc.
1951 	 */
1952 	fixed20_12 bpp;
1953 	fixed20_12 line_time;
1954 	fixed20_12 src_width;
1955 	fixed20_12 bandwidth;
1956 	fixed20_12 a;
1957 
1958 	a.full = dfixed_const(1000);
1959 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1960 	line_time.full = dfixed_div(line_time, a);
1961 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1962 	src_width.full = dfixed_const(wm->src_width);
1963 	bandwidth.full = dfixed_mul(src_width, bpp);
1964 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1965 	bandwidth.full = dfixed_div(bandwidth, line_time);
1966 
1967 	return dfixed_trunc(bandwidth);
1968 }
1969 
1970 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1971 {
1972 	/* First calcualte the latency in ns */
1973 	u32 mc_latency = 2000; /* 2000 ns. */
1974 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1975 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1976 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1977 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1978 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1979 		(wm->num_heads * cursor_line_pair_return_time);
1980 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1981 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1982 	u32 tmp, dmif_size = 12288;
1983 	fixed20_12 a, b, c;
1984 
1985 	if (wm->num_heads == 0)
1986 		return 0;
1987 
1988 	a.full = dfixed_const(2);
1989 	b.full = dfixed_const(1);
1990 	if ((wm->vsc.full > a.full) ||
1991 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1992 	    (wm->vtaps >= 5) ||
1993 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1994 		max_src_lines_per_dst_line = 4;
1995 	else
1996 		max_src_lines_per_dst_line = 2;
1997 
1998 	a.full = dfixed_const(available_bandwidth);
1999 	b.full = dfixed_const(wm->num_heads);
2000 	a.full = dfixed_div(a, b);
2001 
2002 	b.full = dfixed_const(mc_latency + 512);
2003 	c.full = dfixed_const(wm->disp_clk);
2004 	b.full = dfixed_div(b, c);
2005 
2006 	c.full = dfixed_const(dmif_size);
2007 	b.full = dfixed_div(c, b);
2008 
2009 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2010 
2011 	b.full = dfixed_const(1000);
2012 	c.full = dfixed_const(wm->disp_clk);
2013 	b.full = dfixed_div(c, b);
2014 	c.full = dfixed_const(wm->bytes_per_pixel);
2015 	b.full = dfixed_mul(b, c);
2016 
2017 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2018 
2019 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2020 	b.full = dfixed_const(1000);
2021 	c.full = dfixed_const(lb_fill_bw);
2022 	b.full = dfixed_div(c, b);
2023 	a.full = dfixed_div(a, b);
2024 	line_fill_time = dfixed_trunc(a);
2025 
2026 	if (line_fill_time < wm->active_time)
2027 		return latency;
2028 	else
2029 		return latency + (line_fill_time - wm->active_time);
2030 
2031 }
2032 
2033 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2034 {
2035 	if (dce6_average_bandwidth(wm) <=
2036 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2037 		return true;
2038 	else
2039 		return false;
2040 };
2041 
2042 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2043 {
2044 	if (dce6_average_bandwidth(wm) <=
2045 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2046 		return true;
2047 	else
2048 		return false;
2049 };
2050 
2051 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2052 {
2053 	u32 lb_partitions = wm->lb_size / wm->src_width;
2054 	u32 line_time = wm->active_time + wm->blank_time;
2055 	u32 latency_tolerant_lines;
2056 	u32 latency_hiding;
2057 	fixed20_12 a;
2058 
2059 	a.full = dfixed_const(1);
2060 	if (wm->vsc.full > a.full)
2061 		latency_tolerant_lines = 1;
2062 	else {
2063 		if (lb_partitions <= (wm->vtaps + 1))
2064 			latency_tolerant_lines = 1;
2065 		else
2066 			latency_tolerant_lines = 2;
2067 	}
2068 
2069 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2070 
2071 	if (dce6_latency_watermark(wm) <= latency_hiding)
2072 		return true;
2073 	else
2074 		return false;
2075 }
2076 
2077 static void dce6_program_watermarks(struct radeon_device *rdev,
2078 					 struct radeon_crtc *radeon_crtc,
2079 					 u32 lb_size, u32 num_heads)
2080 {
2081 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2082 	struct dce6_wm_params wm_low, wm_high;
2083 	u32 dram_channels;
2084 	u32 pixel_period;
2085 	u32 line_time = 0;
2086 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2087 	u32 priority_a_mark = 0, priority_b_mark = 0;
2088 	u32 priority_a_cnt = PRIORITY_OFF;
2089 	u32 priority_b_cnt = PRIORITY_OFF;
2090 	u32 tmp, arb_control3;
2091 	fixed20_12 a, b, c;
2092 
2093 	if (radeon_crtc->base.enabled && num_heads && mode) {
2094 		pixel_period = 1000000 / (u32)mode->clock;
2095 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2096 		priority_a_cnt = 0;
2097 		priority_b_cnt = 0;
2098 
2099 		if (rdev->family == CHIP_ARUBA)
2100 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2101 		else
2102 			dram_channels = si_get_number_of_dram_channels(rdev);
2103 
2104 		/* watermark for high clocks */
2105 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2106 			wm_high.yclk =
2107 				radeon_dpm_get_mclk(rdev, false) * 10;
2108 			wm_high.sclk =
2109 				radeon_dpm_get_sclk(rdev, false) * 10;
2110 		} else {
2111 			wm_high.yclk = rdev->pm.current_mclk * 10;
2112 			wm_high.sclk = rdev->pm.current_sclk * 10;
2113 		}
2114 
2115 		wm_high.disp_clk = mode->clock;
2116 		wm_high.src_width = mode->crtc_hdisplay;
2117 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2118 		wm_high.blank_time = line_time - wm_high.active_time;
2119 		wm_high.interlaced = false;
2120 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2121 			wm_high.interlaced = true;
2122 		wm_high.vsc = radeon_crtc->vsc;
2123 		wm_high.vtaps = 1;
2124 		if (radeon_crtc->rmx_type != RMX_OFF)
2125 			wm_high.vtaps = 2;
2126 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2127 		wm_high.lb_size = lb_size;
2128 		wm_high.dram_channels = dram_channels;
2129 		wm_high.num_heads = num_heads;
2130 
2131 		/* watermark for low clocks */
2132 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2133 			wm_low.yclk =
2134 				radeon_dpm_get_mclk(rdev, true) * 10;
2135 			wm_low.sclk =
2136 				radeon_dpm_get_sclk(rdev, true) * 10;
2137 		} else {
2138 			wm_low.yclk = rdev->pm.current_mclk * 10;
2139 			wm_low.sclk = rdev->pm.current_sclk * 10;
2140 		}
2141 
2142 		wm_low.disp_clk = mode->clock;
2143 		wm_low.src_width = mode->crtc_hdisplay;
2144 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2145 		wm_low.blank_time = line_time - wm_low.active_time;
2146 		wm_low.interlaced = false;
2147 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2148 			wm_low.interlaced = true;
2149 		wm_low.vsc = radeon_crtc->vsc;
2150 		wm_low.vtaps = 1;
2151 		if (radeon_crtc->rmx_type != RMX_OFF)
2152 			wm_low.vtaps = 2;
2153 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2154 		wm_low.lb_size = lb_size;
2155 		wm_low.dram_channels = dram_channels;
2156 		wm_low.num_heads = num_heads;
2157 
2158 		/* set for high clocks */
2159 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2160 		/* set for low clocks */
2161 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2162 
2163 		/* possibly force display priority to high */
2164 		/* should really do this at mode validation time... */
2165 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2166 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2167 		    !dce6_check_latency_hiding(&wm_high) ||
2168 		    (rdev->disp_priority == 2)) {
2169 			DRM_DEBUG_KMS("force priority to high\n");
2170 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2171 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2172 		}
2173 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2174 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2175 		    !dce6_check_latency_hiding(&wm_low) ||
2176 		    (rdev->disp_priority == 2)) {
2177 			DRM_DEBUG_KMS("force priority to high\n");
2178 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2179 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2180 		}
2181 
2182 		a.full = dfixed_const(1000);
2183 		b.full = dfixed_const(mode->clock);
2184 		b.full = dfixed_div(b, a);
2185 		c.full = dfixed_const(latency_watermark_a);
2186 		c.full = dfixed_mul(c, b);
2187 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2188 		c.full = dfixed_div(c, a);
2189 		a.full = dfixed_const(16);
2190 		c.full = dfixed_div(c, a);
2191 		priority_a_mark = dfixed_trunc(c);
2192 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2193 
2194 		a.full = dfixed_const(1000);
2195 		b.full = dfixed_const(mode->clock);
2196 		b.full = dfixed_div(b, a);
2197 		c.full = dfixed_const(latency_watermark_b);
2198 		c.full = dfixed_mul(c, b);
2199 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2200 		c.full = dfixed_div(c, a);
2201 		a.full = dfixed_const(16);
2202 		c.full = dfixed_div(c, a);
2203 		priority_b_mark = dfixed_trunc(c);
2204 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2205 	}
2206 
2207 	/* select wm A */
2208 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2209 	tmp = arb_control3;
2210 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2211 	tmp |= LATENCY_WATERMARK_MASK(1);
2212 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2213 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2214 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2215 		LATENCY_HIGH_WATERMARK(line_time)));
2216 	/* select wm B */
2217 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2218 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2219 	tmp |= LATENCY_WATERMARK_MASK(2);
2220 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2221 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2222 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2223 		LATENCY_HIGH_WATERMARK(line_time)));
2224 	/* restore original selection */
2225 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2226 
2227 	/* write the priority marks */
2228 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2229 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2230 
2231 	/* save values for DPM */
2232 	radeon_crtc->line_time = line_time;
2233 	radeon_crtc->wm_high = latency_watermark_a;
2234 	radeon_crtc->wm_low = latency_watermark_b;
2235 }
2236 
2237 void dce6_bandwidth_update(struct radeon_device *rdev)
2238 {
2239 	struct drm_display_mode *mode0 = NULL;
2240 	struct drm_display_mode *mode1 = NULL;
2241 	u32 num_heads = 0, lb_size;
2242 	int i;
2243 
2244 	if (!rdev->mode_info.mode_config_initialized)
2245 		return;
2246 
2247 	radeon_update_display_priority(rdev);
2248 
2249 	for (i = 0; i < rdev->num_crtc; i++) {
2250 		if (rdev->mode_info.crtcs[i]->base.enabled)
2251 			num_heads++;
2252 	}
2253 	for (i = 0; i < rdev->num_crtc; i += 2) {
2254 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2255 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2256 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2257 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2258 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2259 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2260 	}
2261 }
2262 
2263 /*
2264  * Core functions
2265  */
2266 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2267 {
2268 	const u32 num_tile_mode_states = 32;
2269 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2270 
2271 	switch (rdev->config.si.mem_row_size_in_kb) {
2272 	case 1:
2273 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2274 		break;
2275 	case 2:
2276 	default:
2277 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2278 		break;
2279 	case 4:
2280 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2281 		break;
2282 	}
2283 
2284 	if ((rdev->family == CHIP_TAHITI) ||
2285 	    (rdev->family == CHIP_PITCAIRN)) {
2286 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2287 			switch (reg_offset) {
2288 			case 0:  /* non-AA compressed depth or any compressed stencil */
2289 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2290 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2291 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2292 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2293 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2294 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2297 				break;
2298 			case 1:  /* 2xAA/4xAA compressed depth only */
2299 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2300 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2301 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2302 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2303 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2304 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2307 				break;
2308 			case 2:  /* 8xAA compressed depth only */
2309 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2310 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2311 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2312 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2313 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2314 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2316 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2317 				break;
2318 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2319 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2321 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2322 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2323 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2324 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2325 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2326 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2327 				break;
2328 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2329 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2330 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2331 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2332 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2333 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2334 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2336 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2337 				break;
2338 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2339 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2341 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2342 						 TILE_SPLIT(split_equal_to_row_size) |
2343 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2344 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2346 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2347 				break;
2348 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2349 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2350 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2351 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2352 						 TILE_SPLIT(split_equal_to_row_size) |
2353 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2354 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2356 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2357 				break;
2358 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2359 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2361 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2362 						 TILE_SPLIT(split_equal_to_row_size) |
2363 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2364 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2366 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2367 				break;
2368 			case 8:  /* 1D and 1D Array Surfaces */
2369 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2370 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2372 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2373 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2374 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2376 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2377 				break;
2378 			case 9:  /* Displayable maps. */
2379 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2380 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2381 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2382 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2383 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2384 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2385 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2386 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2387 				break;
2388 			case 10:  /* Display 8bpp. */
2389 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2392 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2393 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2394 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2395 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2396 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2397 				break;
2398 			case 11:  /* Display 16bpp. */
2399 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2401 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2402 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2403 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2404 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2405 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2406 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2407 				break;
2408 			case 12:  /* Display 32bpp. */
2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2411 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2412 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2413 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2414 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2416 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2417 				break;
2418 			case 13:  /* Thin. */
2419 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2420 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2421 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2422 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2423 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2424 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2425 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2426 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2427 				break;
2428 			case 14:  /* Thin 8 bpp. */
2429 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2431 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2432 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2433 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2434 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2436 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2437 				break;
2438 			case 15:  /* Thin 16 bpp. */
2439 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2440 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2441 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2442 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2443 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2444 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2446 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2447 				break;
2448 			case 16:  /* Thin 32 bpp. */
2449 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2450 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2451 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2452 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2453 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2454 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2457 				break;
2458 			case 17:  /* Thin 64 bpp. */
2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2461 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2462 						 TILE_SPLIT(split_equal_to_row_size) |
2463 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2464 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2467 				break;
2468 			case 21:  /* 8 bpp PRT. */
2469 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2470 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2471 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2472 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2473 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2474 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2475 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2476 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2477 				break;
2478 			case 22:  /* 16 bpp PRT */
2479 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2481 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2482 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2483 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2484 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2486 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2487 				break;
2488 			case 23:  /* 32 bpp PRT */
2489 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2490 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2491 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2492 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2493 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2494 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2496 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2497 				break;
2498 			case 24:  /* 64 bpp PRT */
2499 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2501 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2502 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2503 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2504 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2507 				break;
2508 			case 25:  /* 128 bpp PRT */
2509 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2511 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2512 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2513 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2514 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2517 				break;
2518 			default:
2519 				gb_tile_moden = 0;
2520 				break;
2521 			}
2522 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2523 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2524 		}
2525 	} else if ((rdev->family == CHIP_VERDE) ||
2526 		   (rdev->family == CHIP_OLAND) ||
2527 		   (rdev->family == CHIP_HAINAN)) {
2528 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2529 			switch (reg_offset) {
2530 			case 0:  /* non-AA compressed depth or any compressed stencil */
2531 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2533 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2534 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2535 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2536 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2538 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2539 				break;
2540 			case 1:  /* 2xAA/4xAA compressed depth only */
2541 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2544 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2545 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2546 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2548 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2549 				break;
2550 			case 2:  /* 8xAA compressed depth only */
2551 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2553 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2554 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2555 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2556 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2558 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2559 				break;
2560 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2561 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2563 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2564 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2565 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2566 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2568 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2569 				break;
2570 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2571 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2572 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2573 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2574 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2575 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2576 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2577 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2578 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2579 				break;
2580 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2581 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2583 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2584 						 TILE_SPLIT(split_equal_to_row_size) |
2585 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2586 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2588 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2589 				break;
2590 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2591 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2593 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2594 						 TILE_SPLIT(split_equal_to_row_size) |
2595 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2596 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2598 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2599 				break;
2600 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2601 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2603 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2604 						 TILE_SPLIT(split_equal_to_row_size) |
2605 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2606 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2609 				break;
2610 			case 8:  /* 1D and 1D Array Surfaces */
2611 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2612 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2613 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2614 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2615 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2616 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2618 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2619 				break;
2620 			case 9:  /* Displayable maps. */
2621 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2622 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2624 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2625 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2626 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2628 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2629 				break;
2630 			case 10:  /* Display 8bpp. */
2631 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2633 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2634 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2635 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2636 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2638 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2639 				break;
2640 			case 11:  /* Display 16bpp. */
2641 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2643 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2644 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2645 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2646 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2648 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2649 				break;
2650 			case 12:  /* Display 32bpp. */
2651 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2653 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2654 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2655 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2656 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2659 				break;
2660 			case 13:  /* Thin. */
2661 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2662 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2663 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2664 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2665 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2666 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2668 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2669 				break;
2670 			case 14:  /* Thin 8 bpp. */
2671 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2673 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2674 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2675 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2676 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2678 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2679 				break;
2680 			case 15:  /* Thin 16 bpp. */
2681 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2683 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2684 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2685 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2686 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2688 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2689 				break;
2690 			case 16:  /* Thin 32 bpp. */
2691 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2693 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2694 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2695 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2696 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2699 				break;
2700 			case 17:  /* Thin 64 bpp. */
2701 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2703 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2704 						 TILE_SPLIT(split_equal_to_row_size) |
2705 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2706 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2708 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2709 				break;
2710 			case 21:  /* 8 bpp PRT. */
2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2713 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2714 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2715 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2716 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2717 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2718 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2719 				break;
2720 			case 22:  /* 16 bpp PRT */
2721 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2723 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2724 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2725 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2726 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2729 				break;
2730 			case 23:  /* 32 bpp PRT */
2731 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2733 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2734 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2736 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2738 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2739 				break;
2740 			case 24:  /* 64 bpp PRT */
2741 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2743 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2744 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2745 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2746 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2749 				break;
2750 			case 25:  /* 128 bpp PRT */
2751 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2752 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2753 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2754 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2755 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2756 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2759 				break;
2760 			default:
2761 				gb_tile_moden = 0;
2762 				break;
2763 			}
2764 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2765 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2766 		}
2767 	} else
2768 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2769 }
2770 
2771 static void si_select_se_sh(struct radeon_device *rdev,
2772 			    u32 se_num, u32 sh_num)
2773 {
2774 	u32 data = INSTANCE_BROADCAST_WRITES;
2775 
2776 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2777 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2778 	else if (se_num == 0xffffffff)
2779 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2780 	else if (sh_num == 0xffffffff)
2781 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2782 	else
2783 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2784 	WREG32(GRBM_GFX_INDEX, data);
2785 }
2786 
2787 static u32 si_create_bitmask(u32 bit_width)
2788 {
2789 	u32 i, mask = 0;
2790 
2791 	for (i = 0; i < bit_width; i++) {
2792 		mask <<= 1;
2793 		mask |= 1;
2794 	}
2795 	return mask;
2796 }
2797 
2798 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2799 {
2800 	u32 data, mask;
2801 
2802 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2803 	if (data & 1)
2804 		data &= INACTIVE_CUS_MASK;
2805 	else
2806 		data = 0;
2807 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2808 
2809 	data >>= INACTIVE_CUS_SHIFT;
2810 
2811 	mask = si_create_bitmask(cu_per_sh);
2812 
2813 	return ~data & mask;
2814 }
2815 
2816 static void si_setup_spi(struct radeon_device *rdev,
2817 			 u32 se_num, u32 sh_per_se,
2818 			 u32 cu_per_sh)
2819 {
2820 	int i, j, k;
2821 	u32 data, mask, active_cu;
2822 
2823 	for (i = 0; i < se_num; i++) {
2824 		for (j = 0; j < sh_per_se; j++) {
2825 			si_select_se_sh(rdev, i, j);
2826 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2827 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2828 
2829 			mask = 1;
2830 			for (k = 0; k < 16; k++) {
2831 				mask <<= k;
2832 				if (active_cu & mask) {
2833 					data &= ~mask;
2834 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2835 					break;
2836 				}
2837 			}
2838 		}
2839 	}
2840 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2841 }
2842 
2843 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2844 			      u32 max_rb_num_per_se,
2845 			      u32 sh_per_se)
2846 {
2847 	u32 data, mask;
2848 
2849 	data = RREG32(CC_RB_BACKEND_DISABLE);
2850 	if (data & 1)
2851 		data &= BACKEND_DISABLE_MASK;
2852 	else
2853 		data = 0;
2854 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2855 
2856 	data >>= BACKEND_DISABLE_SHIFT;
2857 
2858 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
2859 
2860 	return data & mask;
2861 }
2862 
2863 static void si_setup_rb(struct radeon_device *rdev,
2864 			u32 se_num, u32 sh_per_se,
2865 			u32 max_rb_num_per_se)
2866 {
2867 	int i, j;
2868 	u32 data, mask;
2869 	u32 disabled_rbs = 0;
2870 	u32 enabled_rbs = 0;
2871 
2872 	for (i = 0; i < se_num; i++) {
2873 		for (j = 0; j < sh_per_se; j++) {
2874 			si_select_se_sh(rdev, i, j);
2875 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
2876 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2877 		}
2878 	}
2879 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2880 
2881 	mask = 1;
2882 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2883 		if (!(disabled_rbs & mask))
2884 			enabled_rbs |= mask;
2885 		mask <<= 1;
2886 	}
2887 
2888 	rdev->config.si.backend_enable_mask = enabled_rbs;
2889 
2890 	for (i = 0; i < se_num; i++) {
2891 		si_select_se_sh(rdev, i, 0xffffffff);
2892 		data = 0;
2893 		for (j = 0; j < sh_per_se; j++) {
2894 			switch (enabled_rbs & 3) {
2895 			case 1:
2896 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2897 				break;
2898 			case 2:
2899 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2900 				break;
2901 			case 3:
2902 			default:
2903 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2904 				break;
2905 			}
2906 			enabled_rbs >>= 2;
2907 		}
2908 		WREG32(PA_SC_RASTER_CONFIG, data);
2909 	}
2910 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2911 }
2912 
2913 static void si_gpu_init(struct radeon_device *rdev)
2914 {
2915 	u32 gb_addr_config = 0;
2916 	u32 mc_shared_chmap, mc_arb_ramcfg;
2917 	u32 sx_debug_1;
2918 	u32 hdp_host_path_cntl;
2919 	u32 tmp;
2920 	int i, j;
2921 
2922 	switch (rdev->family) {
2923 	case CHIP_TAHITI:
2924 		rdev->config.si.max_shader_engines = 2;
2925 		rdev->config.si.max_tile_pipes = 12;
2926 		rdev->config.si.max_cu_per_sh = 8;
2927 		rdev->config.si.max_sh_per_se = 2;
2928 		rdev->config.si.max_backends_per_se = 4;
2929 		rdev->config.si.max_texture_channel_caches = 12;
2930 		rdev->config.si.max_gprs = 256;
2931 		rdev->config.si.max_gs_threads = 32;
2932 		rdev->config.si.max_hw_contexts = 8;
2933 
2934 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2935 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2936 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2937 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2938 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2939 		break;
2940 	case CHIP_PITCAIRN:
2941 		rdev->config.si.max_shader_engines = 2;
2942 		rdev->config.si.max_tile_pipes = 8;
2943 		rdev->config.si.max_cu_per_sh = 5;
2944 		rdev->config.si.max_sh_per_se = 2;
2945 		rdev->config.si.max_backends_per_se = 4;
2946 		rdev->config.si.max_texture_channel_caches = 8;
2947 		rdev->config.si.max_gprs = 256;
2948 		rdev->config.si.max_gs_threads = 32;
2949 		rdev->config.si.max_hw_contexts = 8;
2950 
2951 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2952 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2953 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2954 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2955 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2956 		break;
2957 	case CHIP_VERDE:
2958 	default:
2959 		rdev->config.si.max_shader_engines = 1;
2960 		rdev->config.si.max_tile_pipes = 4;
2961 		rdev->config.si.max_cu_per_sh = 5;
2962 		rdev->config.si.max_sh_per_se = 2;
2963 		rdev->config.si.max_backends_per_se = 4;
2964 		rdev->config.si.max_texture_channel_caches = 4;
2965 		rdev->config.si.max_gprs = 256;
2966 		rdev->config.si.max_gs_threads = 32;
2967 		rdev->config.si.max_hw_contexts = 8;
2968 
2969 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2970 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2971 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2972 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2973 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2974 		break;
2975 	case CHIP_OLAND:
2976 		rdev->config.si.max_shader_engines = 1;
2977 		rdev->config.si.max_tile_pipes = 4;
2978 		rdev->config.si.max_cu_per_sh = 6;
2979 		rdev->config.si.max_sh_per_se = 1;
2980 		rdev->config.si.max_backends_per_se = 2;
2981 		rdev->config.si.max_texture_channel_caches = 4;
2982 		rdev->config.si.max_gprs = 256;
2983 		rdev->config.si.max_gs_threads = 16;
2984 		rdev->config.si.max_hw_contexts = 8;
2985 
2986 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2987 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2988 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2989 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2990 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2991 		break;
2992 	case CHIP_HAINAN:
2993 		rdev->config.si.max_shader_engines = 1;
2994 		rdev->config.si.max_tile_pipes = 4;
2995 		rdev->config.si.max_cu_per_sh = 5;
2996 		rdev->config.si.max_sh_per_se = 1;
2997 		rdev->config.si.max_backends_per_se = 1;
2998 		rdev->config.si.max_texture_channel_caches = 2;
2999 		rdev->config.si.max_gprs = 256;
3000 		rdev->config.si.max_gs_threads = 16;
3001 		rdev->config.si.max_hw_contexts = 8;
3002 
3003 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3004 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3005 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3006 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3007 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3008 		break;
3009 	}
3010 
3011 	/* Initialize HDP */
3012 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3013 		WREG32((0x2c14 + j), 0x00000000);
3014 		WREG32((0x2c18 + j), 0x00000000);
3015 		WREG32((0x2c1c + j), 0x00000000);
3016 		WREG32((0x2c20 + j), 0x00000000);
3017 		WREG32((0x2c24 + j), 0x00000000);
3018 	}
3019 
3020 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3021 
3022 	evergreen_fix_pci_max_read_req_size(rdev);
3023 
3024 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3025 
3026 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3027 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3028 
3029 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3030 	rdev->config.si.mem_max_burst_length_bytes = 256;
3031 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3032 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3033 	if (rdev->config.si.mem_row_size_in_kb > 4)
3034 		rdev->config.si.mem_row_size_in_kb = 4;
3035 	/* XXX use MC settings? */
3036 	rdev->config.si.shader_engine_tile_size = 32;
3037 	rdev->config.si.num_gpus = 1;
3038 	rdev->config.si.multi_gpu_tile_size = 64;
3039 
3040 	/* fix up row size */
3041 	gb_addr_config &= ~ROW_SIZE_MASK;
3042 	switch (rdev->config.si.mem_row_size_in_kb) {
3043 	case 1:
3044 	default:
3045 		gb_addr_config |= ROW_SIZE(0);
3046 		break;
3047 	case 2:
3048 		gb_addr_config |= ROW_SIZE(1);
3049 		break;
3050 	case 4:
3051 		gb_addr_config |= ROW_SIZE(2);
3052 		break;
3053 	}
3054 
3055 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3056 	 * not have bank info, so create a custom tiling dword.
3057 	 * bits 3:0   num_pipes
3058 	 * bits 7:4   num_banks
3059 	 * bits 11:8  group_size
3060 	 * bits 15:12 row_size
3061 	 */
3062 	rdev->config.si.tile_config = 0;
3063 	switch (rdev->config.si.num_tile_pipes) {
3064 	case 1:
3065 		rdev->config.si.tile_config |= (0 << 0);
3066 		break;
3067 	case 2:
3068 		rdev->config.si.tile_config |= (1 << 0);
3069 		break;
3070 	case 4:
3071 		rdev->config.si.tile_config |= (2 << 0);
3072 		break;
3073 	case 8:
3074 	default:
3075 		/* XXX what about 12? */
3076 		rdev->config.si.tile_config |= (3 << 0);
3077 		break;
3078 	}
3079 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3080 	case 0: /* four banks */
3081 		rdev->config.si.tile_config |= 0 << 4;
3082 		break;
3083 	case 1: /* eight banks */
3084 		rdev->config.si.tile_config |= 1 << 4;
3085 		break;
3086 	case 2: /* sixteen banks */
3087 	default:
3088 		rdev->config.si.tile_config |= 2 << 4;
3089 		break;
3090 	}
3091 	rdev->config.si.tile_config |=
3092 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3093 	rdev->config.si.tile_config |=
3094 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3095 
3096 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3097 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3098 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3099 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3100 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3101 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3102 	if (rdev->has_uvd) {
3103 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3104 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3105 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3106 	}
3107 
3108 	si_tiling_mode_table_init(rdev);
3109 
3110 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3111 		    rdev->config.si.max_sh_per_se,
3112 		    rdev->config.si.max_backends_per_se);
3113 
3114 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3115 		     rdev->config.si.max_sh_per_se,
3116 		     rdev->config.si.max_cu_per_sh);
3117 
3118 	rdev->config.si.active_cus = 0;
3119 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3120 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3121 			rdev->config.si.active_cus +=
3122 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3123 		}
3124 	}
3125 
3126 	/* set HW defaults for 3D engine */
3127 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3128 				     ROQ_IB2_START(0x2b)));
3129 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3130 
3131 	sx_debug_1 = RREG32(SX_DEBUG_1);
3132 	WREG32(SX_DEBUG_1, sx_debug_1);
3133 
3134 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3135 
3136 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3137 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3138 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3139 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3140 
3141 	WREG32(VGT_NUM_INSTANCES, 1);
3142 
3143 	WREG32(CP_PERFMON_CNTL, 0);
3144 
3145 	WREG32(SQ_CONFIG, 0);
3146 
3147 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3148 					  FORCE_EOV_MAX_REZ_CNT(255)));
3149 
3150 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3151 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3152 
3153 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3154 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3155 
3156 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3157 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3158 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3159 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3160 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3161 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3162 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3163 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3164 
3165 	tmp = RREG32(HDP_MISC_CNTL);
3166 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3167 	WREG32(HDP_MISC_CNTL, tmp);
3168 
3169 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3170 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3171 
3172 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3173 
3174 	udelay(50);
3175 }
3176 
3177 /*
3178  * GPU scratch registers helpers function.
3179  */
3180 static void si_scratch_init(struct radeon_device *rdev)
3181 {
3182 	int i;
3183 
3184 	rdev->scratch.num_reg = 7;
3185 	rdev->scratch.reg_base = SCRATCH_REG0;
3186 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3187 		rdev->scratch.free[i] = true;
3188 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3189 	}
3190 }
3191 
3192 void si_fence_ring_emit(struct radeon_device *rdev,
3193 			struct radeon_fence *fence)
3194 {
3195 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3196 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3197 
3198 	/* flush read cache over gart */
3199 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3200 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3201 	radeon_ring_write(ring, 0);
3202 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3203 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3204 			  PACKET3_TC_ACTION_ENA |
3205 			  PACKET3_SH_KCACHE_ACTION_ENA |
3206 			  PACKET3_SH_ICACHE_ACTION_ENA);
3207 	radeon_ring_write(ring, 0xFFFFFFFF);
3208 	radeon_ring_write(ring, 0);
3209 	radeon_ring_write(ring, 10); /* poll interval */
3210 	/* EVENT_WRITE_EOP - flush caches, send int */
3211 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3212 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3213 	radeon_ring_write(ring, lower_32_bits(addr));
3214 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3215 	radeon_ring_write(ring, fence->seq);
3216 	radeon_ring_write(ring, 0);
3217 }
3218 
3219 /*
3220  * IB stuff
3221  */
3222 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3223 {
3224 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3225 	u32 header;
3226 
3227 	if (ib->is_const_ib) {
3228 		/* set switch buffer packet before const IB */
3229 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3230 		radeon_ring_write(ring, 0);
3231 
3232 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3233 	} else {
3234 		u32 next_rptr;
3235 		if (ring->rptr_save_reg) {
3236 			next_rptr = ring->wptr + 3 + 4 + 8;
3237 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3238 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3239 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3240 			radeon_ring_write(ring, next_rptr);
3241 		} else if (rdev->wb.enabled) {
3242 			next_rptr = ring->wptr + 5 + 4 + 8;
3243 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3244 			radeon_ring_write(ring, (1 << 8));
3245 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3246 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3247 			radeon_ring_write(ring, next_rptr);
3248 		}
3249 
3250 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3251 	}
3252 
3253 	radeon_ring_write(ring, header);
3254 	radeon_ring_write(ring,
3255 #ifdef __BIG_ENDIAN
3256 			  (2 << 0) |
3257 #endif
3258 			  (ib->gpu_addr & 0xFFFFFFFC));
3259 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3260 	radeon_ring_write(ring, ib->length_dw |
3261 			  (ib->vm ? (ib->vm->id << 24) : 0));
3262 
3263 	if (!ib->is_const_ib) {
3264 		/* flush read cache over gart for this vmid */
3265 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3266 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3267 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3268 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3269 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3270 				  PACKET3_TC_ACTION_ENA |
3271 				  PACKET3_SH_KCACHE_ACTION_ENA |
3272 				  PACKET3_SH_ICACHE_ACTION_ENA);
3273 		radeon_ring_write(ring, 0xFFFFFFFF);
3274 		radeon_ring_write(ring, 0);
3275 		radeon_ring_write(ring, 10); /* poll interval */
3276 	}
3277 }
3278 
3279 /*
3280  * CP.
3281  */
3282 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3283 {
3284 	if (enable)
3285 		WREG32(CP_ME_CNTL, 0);
3286 	else {
3287 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3288 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3289 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3290 		WREG32(SCRATCH_UMSK, 0);
3291 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3292 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3293 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3294 	}
3295 	udelay(50);
3296 }
3297 
3298 static int si_cp_load_microcode(struct radeon_device *rdev)
3299 {
3300 	const __be32 *fw_data;
3301 	int i;
3302 
3303 	if (!rdev->me_fw || !rdev->pfp_fw)
3304 		return -EINVAL;
3305 
3306 	si_cp_enable(rdev, false);
3307 
3308 	/* PFP */
3309 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3310 	WREG32(CP_PFP_UCODE_ADDR, 0);
3311 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3312 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3313 	WREG32(CP_PFP_UCODE_ADDR, 0);
3314 
3315 	/* CE */
3316 	fw_data = (const __be32 *)rdev->ce_fw->data;
3317 	WREG32(CP_CE_UCODE_ADDR, 0);
3318 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3319 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3320 	WREG32(CP_CE_UCODE_ADDR, 0);
3321 
3322 	/* ME */
3323 	fw_data = (const __be32 *)rdev->me_fw->data;
3324 	WREG32(CP_ME_RAM_WADDR, 0);
3325 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3326 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3327 	WREG32(CP_ME_RAM_WADDR, 0);
3328 
3329 	WREG32(CP_PFP_UCODE_ADDR, 0);
3330 	WREG32(CP_CE_UCODE_ADDR, 0);
3331 	WREG32(CP_ME_RAM_WADDR, 0);
3332 	WREG32(CP_ME_RAM_RADDR, 0);
3333 	return 0;
3334 }
3335 
3336 static int si_cp_start(struct radeon_device *rdev)
3337 {
3338 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3339 	int r, i;
3340 
3341 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3342 	if (r) {
3343 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3344 		return r;
3345 	}
3346 	/* init the CP */
3347 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3348 	radeon_ring_write(ring, 0x1);
3349 	radeon_ring_write(ring, 0x0);
3350 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3351 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3352 	radeon_ring_write(ring, 0);
3353 	radeon_ring_write(ring, 0);
3354 
3355 	/* init the CE partitions */
3356 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3357 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3358 	radeon_ring_write(ring, 0xc000);
3359 	radeon_ring_write(ring, 0xe000);
3360 	radeon_ring_unlock_commit(rdev, ring, false);
3361 
3362 	si_cp_enable(rdev, true);
3363 
3364 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3365 	if (r) {
3366 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3367 		return r;
3368 	}
3369 
3370 	/* setup clear context state */
3371 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3372 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3373 
3374 	for (i = 0; i < si_default_size; i++)
3375 		radeon_ring_write(ring, si_default_state[i]);
3376 
3377 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3378 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3379 
3380 	/* set clear context state */
3381 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3382 	radeon_ring_write(ring, 0);
3383 
3384 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3385 	radeon_ring_write(ring, 0x00000316);
3386 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3387 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3388 
3389 	radeon_ring_unlock_commit(rdev, ring, false);
3390 
3391 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3392 		ring = &rdev->ring[i];
3393 		r = radeon_ring_lock(rdev, ring, 2);
3394 
3395 		/* clear the compute context state */
3396 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3397 		radeon_ring_write(ring, 0);
3398 
3399 		radeon_ring_unlock_commit(rdev, ring, false);
3400 	}
3401 
3402 	return 0;
3403 }
3404 
3405 static void si_cp_fini(struct radeon_device *rdev)
3406 {
3407 	struct radeon_ring *ring;
3408 	si_cp_enable(rdev, false);
3409 
3410 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3411 	radeon_ring_fini(rdev, ring);
3412 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3413 
3414 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3415 	radeon_ring_fini(rdev, ring);
3416 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3417 
3418 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3419 	radeon_ring_fini(rdev, ring);
3420 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3421 }
3422 
3423 static int si_cp_resume(struct radeon_device *rdev)
3424 {
3425 	struct radeon_ring *ring;
3426 	u32 tmp;
3427 	u32 rb_bufsz;
3428 	int r;
3429 
3430 	si_enable_gui_idle_interrupt(rdev, false);
3431 
3432 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3433 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3434 
3435 	/* Set the write pointer delay */
3436 	WREG32(CP_RB_WPTR_DELAY, 0);
3437 
3438 	WREG32(CP_DEBUG, 0);
3439 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3440 
3441 	/* ring 0 - compute and gfx */
3442 	/* Set ring buffer size */
3443 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3444 	rb_bufsz = order_base_2(ring->ring_size / 8);
3445 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3446 #ifdef __BIG_ENDIAN
3447 	tmp |= BUF_SWAP_32BIT;
3448 #endif
3449 	WREG32(CP_RB0_CNTL, tmp);
3450 
3451 	/* Initialize the ring buffer's read and write pointers */
3452 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3453 	ring->wptr = 0;
3454 	WREG32(CP_RB0_WPTR, ring->wptr);
3455 
3456 	/* set the wb address whether it's enabled or not */
3457 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3458 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3459 
3460 	if (rdev->wb.enabled)
3461 		WREG32(SCRATCH_UMSK, 0xff);
3462 	else {
3463 		tmp |= RB_NO_UPDATE;
3464 		WREG32(SCRATCH_UMSK, 0);
3465 	}
3466 
3467 	mdelay(1);
3468 	WREG32(CP_RB0_CNTL, tmp);
3469 
3470 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3471 
3472 	/* ring1  - compute only */
3473 	/* Set ring buffer size */
3474 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3475 	rb_bufsz = order_base_2(ring->ring_size / 8);
3476 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3477 #ifdef __BIG_ENDIAN
3478 	tmp |= BUF_SWAP_32BIT;
3479 #endif
3480 	WREG32(CP_RB1_CNTL, tmp);
3481 
3482 	/* Initialize the ring buffer's read and write pointers */
3483 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3484 	ring->wptr = 0;
3485 	WREG32(CP_RB1_WPTR, ring->wptr);
3486 
3487 	/* set the wb address whether it's enabled or not */
3488 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3489 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3490 
3491 	mdelay(1);
3492 	WREG32(CP_RB1_CNTL, tmp);
3493 
3494 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3495 
3496 	/* ring2 - compute only */
3497 	/* Set ring buffer size */
3498 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3499 	rb_bufsz = order_base_2(ring->ring_size / 8);
3500 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3501 #ifdef __BIG_ENDIAN
3502 	tmp |= BUF_SWAP_32BIT;
3503 #endif
3504 	WREG32(CP_RB2_CNTL, tmp);
3505 
3506 	/* Initialize the ring buffer's read and write pointers */
3507 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3508 	ring->wptr = 0;
3509 	WREG32(CP_RB2_WPTR, ring->wptr);
3510 
3511 	/* set the wb address whether it's enabled or not */
3512 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3513 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3514 
3515 	mdelay(1);
3516 	WREG32(CP_RB2_CNTL, tmp);
3517 
3518 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3519 
3520 	/* start the rings */
3521 	si_cp_start(rdev);
3522 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3523 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3524 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3525 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3526 	if (r) {
3527 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3528 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3529 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3530 		return r;
3531 	}
3532 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3533 	if (r) {
3534 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3535 	}
3536 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3537 	if (r) {
3538 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3539 	}
3540 
3541 	si_enable_gui_idle_interrupt(rdev, true);
3542 
3543 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3544 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3545 
3546 	return 0;
3547 }
3548 
3549 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3550 {
3551 	u32 reset_mask = 0;
3552 	u32 tmp;
3553 
3554 	/* GRBM_STATUS */
3555 	tmp = RREG32(GRBM_STATUS);
3556 	if (tmp & (PA_BUSY | SC_BUSY |
3557 		   BCI_BUSY | SX_BUSY |
3558 		   TA_BUSY | VGT_BUSY |
3559 		   DB_BUSY | CB_BUSY |
3560 		   GDS_BUSY | SPI_BUSY |
3561 		   IA_BUSY | IA_BUSY_NO_DMA))
3562 		reset_mask |= RADEON_RESET_GFX;
3563 
3564 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3565 		   CP_BUSY | CP_COHERENCY_BUSY))
3566 		reset_mask |= RADEON_RESET_CP;
3567 
3568 	if (tmp & GRBM_EE_BUSY)
3569 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3570 
3571 	/* GRBM_STATUS2 */
3572 	tmp = RREG32(GRBM_STATUS2);
3573 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3574 		reset_mask |= RADEON_RESET_RLC;
3575 
3576 	/* DMA_STATUS_REG 0 */
3577 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3578 	if (!(tmp & DMA_IDLE))
3579 		reset_mask |= RADEON_RESET_DMA;
3580 
3581 	/* DMA_STATUS_REG 1 */
3582 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3583 	if (!(tmp & DMA_IDLE))
3584 		reset_mask |= RADEON_RESET_DMA1;
3585 
3586 	/* SRBM_STATUS2 */
3587 	tmp = RREG32(SRBM_STATUS2);
3588 	if (tmp & DMA_BUSY)
3589 		reset_mask |= RADEON_RESET_DMA;
3590 
3591 	if (tmp & DMA1_BUSY)
3592 		reset_mask |= RADEON_RESET_DMA1;
3593 
3594 	/* SRBM_STATUS */
3595 	tmp = RREG32(SRBM_STATUS);
3596 
3597 	if (tmp & IH_BUSY)
3598 		reset_mask |= RADEON_RESET_IH;
3599 
3600 	if (tmp & SEM_BUSY)
3601 		reset_mask |= RADEON_RESET_SEM;
3602 
3603 	if (tmp & GRBM_RQ_PENDING)
3604 		reset_mask |= RADEON_RESET_GRBM;
3605 
3606 	if (tmp & VMC_BUSY)
3607 		reset_mask |= RADEON_RESET_VMC;
3608 
3609 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3610 		   MCC_BUSY | MCD_BUSY))
3611 		reset_mask |= RADEON_RESET_MC;
3612 
3613 	if (evergreen_is_display_hung(rdev))
3614 		reset_mask |= RADEON_RESET_DISPLAY;
3615 
3616 	/* VM_L2_STATUS */
3617 	tmp = RREG32(VM_L2_STATUS);
3618 	if (tmp & L2_BUSY)
3619 		reset_mask |= RADEON_RESET_VMC;
3620 
3621 	/* Skip MC reset as it's mostly likely not hung, just busy */
3622 	if (reset_mask & RADEON_RESET_MC) {
3623 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3624 		reset_mask &= ~RADEON_RESET_MC;
3625 	}
3626 
3627 	return reset_mask;
3628 }
3629 
3630 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3631 {
3632 	struct evergreen_mc_save save;
3633 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3634 	u32 tmp;
3635 
3636 	if (reset_mask == 0)
3637 		return;
3638 
3639 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3640 
3641 	evergreen_print_gpu_status_regs(rdev);
3642 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3643 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3644 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3645 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3646 
3647 	/* disable PG/CG */
3648 	si_fini_pg(rdev);
3649 	si_fini_cg(rdev);
3650 
3651 	/* stop the rlc */
3652 	si_rlc_stop(rdev);
3653 
3654 	/* Disable CP parsing/prefetching */
3655 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3656 
3657 	if (reset_mask & RADEON_RESET_DMA) {
3658 		/* dma0 */
3659 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3660 		tmp &= ~DMA_RB_ENABLE;
3661 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3662 	}
3663 	if (reset_mask & RADEON_RESET_DMA1) {
3664 		/* dma1 */
3665 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3666 		tmp &= ~DMA_RB_ENABLE;
3667 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3668 	}
3669 
3670 	udelay(50);
3671 
3672 	evergreen_mc_stop(rdev, &save);
3673 	if (evergreen_mc_wait_for_idle(rdev)) {
3674 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3675 	}
3676 
3677 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3678 		grbm_soft_reset = SOFT_RESET_CB |
3679 			SOFT_RESET_DB |
3680 			SOFT_RESET_GDS |
3681 			SOFT_RESET_PA |
3682 			SOFT_RESET_SC |
3683 			SOFT_RESET_BCI |
3684 			SOFT_RESET_SPI |
3685 			SOFT_RESET_SX |
3686 			SOFT_RESET_TC |
3687 			SOFT_RESET_TA |
3688 			SOFT_RESET_VGT |
3689 			SOFT_RESET_IA;
3690 	}
3691 
3692 	if (reset_mask & RADEON_RESET_CP) {
3693 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3694 
3695 		srbm_soft_reset |= SOFT_RESET_GRBM;
3696 	}
3697 
3698 	if (reset_mask & RADEON_RESET_DMA)
3699 		srbm_soft_reset |= SOFT_RESET_DMA;
3700 
3701 	if (reset_mask & RADEON_RESET_DMA1)
3702 		srbm_soft_reset |= SOFT_RESET_DMA1;
3703 
3704 	if (reset_mask & RADEON_RESET_DISPLAY)
3705 		srbm_soft_reset |= SOFT_RESET_DC;
3706 
3707 	if (reset_mask & RADEON_RESET_RLC)
3708 		grbm_soft_reset |= SOFT_RESET_RLC;
3709 
3710 	if (reset_mask & RADEON_RESET_SEM)
3711 		srbm_soft_reset |= SOFT_RESET_SEM;
3712 
3713 	if (reset_mask & RADEON_RESET_IH)
3714 		srbm_soft_reset |= SOFT_RESET_IH;
3715 
3716 	if (reset_mask & RADEON_RESET_GRBM)
3717 		srbm_soft_reset |= SOFT_RESET_GRBM;
3718 
3719 	if (reset_mask & RADEON_RESET_VMC)
3720 		srbm_soft_reset |= SOFT_RESET_VMC;
3721 
3722 	if (reset_mask & RADEON_RESET_MC)
3723 		srbm_soft_reset |= SOFT_RESET_MC;
3724 
3725 	if (grbm_soft_reset) {
3726 		tmp = RREG32(GRBM_SOFT_RESET);
3727 		tmp |= grbm_soft_reset;
3728 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3729 		WREG32(GRBM_SOFT_RESET, tmp);
3730 		tmp = RREG32(GRBM_SOFT_RESET);
3731 
3732 		udelay(50);
3733 
3734 		tmp &= ~grbm_soft_reset;
3735 		WREG32(GRBM_SOFT_RESET, tmp);
3736 		tmp = RREG32(GRBM_SOFT_RESET);
3737 	}
3738 
3739 	if (srbm_soft_reset) {
3740 		tmp = RREG32(SRBM_SOFT_RESET);
3741 		tmp |= srbm_soft_reset;
3742 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3743 		WREG32(SRBM_SOFT_RESET, tmp);
3744 		tmp = RREG32(SRBM_SOFT_RESET);
3745 
3746 		udelay(50);
3747 
3748 		tmp &= ~srbm_soft_reset;
3749 		WREG32(SRBM_SOFT_RESET, tmp);
3750 		tmp = RREG32(SRBM_SOFT_RESET);
3751 	}
3752 
3753 	/* Wait a little for things to settle down */
3754 	udelay(50);
3755 
3756 	evergreen_mc_resume(rdev, &save);
3757 	udelay(50);
3758 
3759 	evergreen_print_gpu_status_regs(rdev);
3760 }
3761 
3762 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3763 {
3764 	u32 tmp, i;
3765 
3766 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3767 	tmp |= SPLL_BYPASS_EN;
3768 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3769 
3770 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3771 	tmp |= SPLL_CTLREQ_CHG;
3772 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3773 
3774 	for (i = 0; i < rdev->usec_timeout; i++) {
3775 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3776 			break;
3777 		udelay(1);
3778 	}
3779 
3780 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3781 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3782 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3783 
3784 	tmp = RREG32(MPLL_CNTL_MODE);
3785 	tmp &= ~MPLL_MCLK_SEL;
3786 	WREG32(MPLL_CNTL_MODE, tmp);
3787 }
3788 
3789 static void si_spll_powerdown(struct radeon_device *rdev)
3790 {
3791 	u32 tmp;
3792 
3793 	tmp = RREG32(SPLL_CNTL_MODE);
3794 	tmp |= SPLL_SW_DIR_CONTROL;
3795 	WREG32(SPLL_CNTL_MODE, tmp);
3796 
3797 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3798 	tmp |= SPLL_RESET;
3799 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3800 
3801 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3802 	tmp |= SPLL_SLEEP;
3803 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3804 
3805 	tmp = RREG32(SPLL_CNTL_MODE);
3806 	tmp &= ~SPLL_SW_DIR_CONTROL;
3807 	WREG32(SPLL_CNTL_MODE, tmp);
3808 }
3809 
3810 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3811 {
3812 	struct evergreen_mc_save save;
3813 	u32 tmp, i;
3814 
3815 	dev_info(rdev->dev, "GPU pci config reset\n");
3816 
3817 	/* disable dpm? */
3818 
3819 	/* disable cg/pg */
3820 	si_fini_pg(rdev);
3821 	si_fini_cg(rdev);
3822 
3823 	/* Disable CP parsing/prefetching */
3824 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3825 	/* dma0 */
3826 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3827 	tmp &= ~DMA_RB_ENABLE;
3828 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3829 	/* dma1 */
3830 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3831 	tmp &= ~DMA_RB_ENABLE;
3832 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3833 	/* XXX other engines? */
3834 
3835 	/* halt the rlc, disable cp internal ints */
3836 	si_rlc_stop(rdev);
3837 
3838 	udelay(50);
3839 
3840 	/* disable mem access */
3841 	evergreen_mc_stop(rdev, &save);
3842 	if (evergreen_mc_wait_for_idle(rdev)) {
3843 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
3844 	}
3845 
3846 	/* set mclk/sclk to bypass */
3847 	si_set_clk_bypass_mode(rdev);
3848 	/* powerdown spll */
3849 	si_spll_powerdown(rdev);
3850 	/* disable BM */
3851 	pci_disable_busmaster(rdev->pdev->dev);
3852 	/* reset */
3853 	radeon_pci_config_reset(rdev);
3854 	/* wait for asic to come out of reset */
3855 	for (i = 0; i < rdev->usec_timeout; i++) {
3856 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
3857 			break;
3858 		udelay(1);
3859 	}
3860 }
3861 
3862 int si_asic_reset(struct radeon_device *rdev)
3863 {
3864 	u32 reset_mask;
3865 
3866 	reset_mask = si_gpu_check_soft_reset(rdev);
3867 
3868 	if (reset_mask)
3869 		r600_set_bios_scratch_engine_hung(rdev, true);
3870 
3871 	/* try soft reset */
3872 	si_gpu_soft_reset(rdev, reset_mask);
3873 
3874 	reset_mask = si_gpu_check_soft_reset(rdev);
3875 
3876 	/* try pci config reset */
3877 	if (reset_mask && radeon_hard_reset)
3878 		si_gpu_pci_config_reset(rdev);
3879 
3880 	reset_mask = si_gpu_check_soft_reset(rdev);
3881 
3882 	if (!reset_mask)
3883 		r600_set_bios_scratch_engine_hung(rdev, false);
3884 
3885 	return 0;
3886 }
3887 
3888 /**
3889  * si_gfx_is_lockup - Check if the GFX engine is locked up
3890  *
3891  * @rdev: radeon_device pointer
3892  * @ring: radeon_ring structure holding ring information
3893  *
3894  * Check if the GFX engine is locked up.
3895  * Returns true if the engine appears to be locked up, false if not.
3896  */
3897 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3898 {
3899 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3900 
3901 	if (!(reset_mask & (RADEON_RESET_GFX |
3902 			    RADEON_RESET_COMPUTE |
3903 			    RADEON_RESET_CP))) {
3904 		radeon_ring_lockup_update(rdev, ring);
3905 		return false;
3906 	}
3907 	return radeon_ring_test_lockup(rdev, ring);
3908 }
3909 
3910 /* MC */
3911 static void si_mc_program(struct radeon_device *rdev)
3912 {
3913 	struct evergreen_mc_save save;
3914 	u32 tmp;
3915 	int i, j;
3916 
3917 	/* Initialize HDP */
3918 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3919 		WREG32((0x2c14 + j), 0x00000000);
3920 		WREG32((0x2c18 + j), 0x00000000);
3921 		WREG32((0x2c1c + j), 0x00000000);
3922 		WREG32((0x2c20 + j), 0x00000000);
3923 		WREG32((0x2c24 + j), 0x00000000);
3924 	}
3925 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3926 
3927 	evergreen_mc_stop(rdev, &save);
3928 	if (radeon_mc_wait_for_idle(rdev)) {
3929 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3930 	}
3931 	if (!ASIC_IS_NODCE(rdev))
3932 		/* Lockout access through VGA aperture*/
3933 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3934 	/* Update configuration */
3935 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3936 	       rdev->mc.vram_start >> 12);
3937 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3938 	       rdev->mc.vram_end >> 12);
3939 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3940 	       rdev->vram_scratch.gpu_addr >> 12);
3941 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3942 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3943 	WREG32(MC_VM_FB_LOCATION, tmp);
3944 	/* XXX double check these! */
3945 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3946 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3947 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3948 	WREG32(MC_VM_AGP_BASE, 0);
3949 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3950 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3951 	if (radeon_mc_wait_for_idle(rdev)) {
3952 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3953 	}
3954 	evergreen_mc_resume(rdev, &save);
3955 	if (!ASIC_IS_NODCE(rdev)) {
3956 		/* we need to own VRAM, so turn off the VGA renderer here
3957 		 * to stop it overwriting our objects */
3958 		rv515_vga_render_disable(rdev);
3959 	}
3960 }
3961 
3962 void si_vram_gtt_location(struct radeon_device *rdev,
3963 			  struct radeon_mc *mc)
3964 {
3965 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3966 		/* leave room for at least 1024M GTT */
3967 		dev_warn(rdev->dev, "limiting VRAM\n");
3968 		mc->real_vram_size = 0xFFC0000000ULL;
3969 		mc->mc_vram_size = 0xFFC0000000ULL;
3970 	}
3971 	radeon_vram_location(rdev, &rdev->mc, 0);
3972 	rdev->mc.gtt_base_align = 0;
3973 	radeon_gtt_location(rdev, mc);
3974 }
3975 
3976 static int si_mc_init(struct radeon_device *rdev)
3977 {
3978 	u32 tmp;
3979 	int chansize, numchan;
3980 
3981 	/* Get VRAM informations */
3982 	rdev->mc.vram_is_ddr = true;
3983 	tmp = RREG32(MC_ARB_RAMCFG);
3984 	if (tmp & CHANSIZE_OVERRIDE) {
3985 		chansize = 16;
3986 	} else if (tmp & CHANSIZE_MASK) {
3987 		chansize = 64;
3988 	} else {
3989 		chansize = 32;
3990 	}
3991 	tmp = RREG32(MC_SHARED_CHMAP);
3992 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3993 	case 0:
3994 	default:
3995 		numchan = 1;
3996 		break;
3997 	case 1:
3998 		numchan = 2;
3999 		break;
4000 	case 2:
4001 		numchan = 4;
4002 		break;
4003 	case 3:
4004 		numchan = 8;
4005 		break;
4006 	case 4:
4007 		numchan = 3;
4008 		break;
4009 	case 5:
4010 		numchan = 6;
4011 		break;
4012 	case 6:
4013 		numchan = 10;
4014 		break;
4015 	case 7:
4016 		numchan = 12;
4017 		break;
4018 	case 8:
4019 		numchan = 16;
4020 		break;
4021 	}
4022 	rdev->mc.vram_width = numchan * chansize;
4023 	/* Could aper size report 0 ? */
4024 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4025 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4026 	/* size in MB on si */
4027 	tmp = RREG32(CONFIG_MEMSIZE);
4028 	/* some boards may have garbage in the upper 16 bits */
4029 	if (tmp & 0xffff0000) {
4030 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4031 		if (tmp & 0xffff)
4032 			tmp &= 0xffff;
4033 	}
4034 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4035 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4036 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4037 	si_vram_gtt_location(rdev, &rdev->mc);
4038 	radeon_update_bandwidth_info(rdev);
4039 
4040 	return 0;
4041 }
4042 
4043 /*
4044  * GART
4045  */
4046 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4047 {
4048 	/* flush hdp cache */
4049 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4050 
4051 	/* bits 0-15 are the VM contexts0-15 */
4052 	WREG32(VM_INVALIDATE_REQUEST, 1);
4053 }
4054 
4055 static int si_pcie_gart_enable(struct radeon_device *rdev)
4056 {
4057 	int r, i;
4058 
4059 	if (rdev->gart.robj == NULL) {
4060 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4061 		return -EINVAL;
4062 	}
4063 	r = radeon_gart_table_vram_pin(rdev);
4064 	if (r)
4065 		return r;
4066 	/* Setup TLB control */
4067 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4068 	       (0xA << 7) |
4069 	       ENABLE_L1_TLB |
4070 	       ENABLE_L1_FRAGMENT_PROCESSING |
4071 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4072 	       ENABLE_ADVANCED_DRIVER_MODEL |
4073 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4074 	/* Setup L2 cache */
4075 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4076 	       ENABLE_L2_FRAGMENT_PROCESSING |
4077 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4078 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4079 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4080 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4081 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4082 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4083 	       BANK_SELECT(4) |
4084 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4085 	/* setup context0 */
4086 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4087 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4088 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4089 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4090 			(u32)(rdev->dummy_page.addr >> 12));
4091 	WREG32(VM_CONTEXT0_CNTL2, 0);
4092 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4093 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4094 
4095 	WREG32(0x15D4, 0);
4096 	WREG32(0x15D8, 0);
4097 	WREG32(0x15DC, 0);
4098 
4099 	/* empty context1-15 */
4100 	/* set vm size, must be a multiple of 4 */
4101 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4102 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4103 	/* Assign the pt base to something valid for now; the pts used for
4104 	 * the VMs are determined by the application and setup and assigned
4105 	 * on the fly in the vm part of radeon_gart.c
4106 	 */
4107 	for (i = 1; i < 16; i++) {
4108 		if (i < 8)
4109 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4110 			       rdev->vm_manager.saved_table_addr[i]);
4111 		else
4112 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4113 			       rdev->vm_manager.saved_table_addr[i]);
4114 	}
4115 
4116 	/* enable context1-15 */
4117 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4118 	       (u32)(rdev->dummy_page.addr >> 12));
4119 	WREG32(VM_CONTEXT1_CNTL2, 4);
4120 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4121 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4122 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4123 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4124 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4125 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4126 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4127 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4128 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4129 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4130 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4131 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4132 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4133 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4134 
4135 	si_pcie_gart_tlb_flush(rdev);
4136 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4137 		 (unsigned)(rdev->mc.gtt_size >> 20),
4138 		 (unsigned long long)rdev->gart.table_addr);
4139 	rdev->gart.ready = true;
4140 	return 0;
4141 }
4142 
4143 static void si_pcie_gart_disable(struct radeon_device *rdev)
4144 {
4145 	unsigned i;
4146 
4147 	for (i = 1; i < 16; ++i) {
4148 		uint32_t reg;
4149 		if (i < 8)
4150 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4151 		else
4152 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4153 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4154 	}
4155 
4156 	/* Disable all tables */
4157 	WREG32(VM_CONTEXT0_CNTL, 0);
4158 	WREG32(VM_CONTEXT1_CNTL, 0);
4159 	/* Setup TLB control */
4160 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4161 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4162 	/* Setup L2 cache */
4163 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4164 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4165 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4166 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4167 	WREG32(VM_L2_CNTL2, 0);
4168 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4169 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4170 	radeon_gart_table_vram_unpin(rdev);
4171 }
4172 
4173 static void si_pcie_gart_fini(struct radeon_device *rdev)
4174 {
4175 	si_pcie_gart_disable(rdev);
4176 	radeon_gart_table_vram_free(rdev);
4177 	radeon_gart_fini(rdev);
4178 }
4179 
4180 /* vm parser */
4181 static bool si_vm_reg_valid(u32 reg)
4182 {
4183 	/* context regs are fine */
4184 	if (reg >= 0x28000)
4185 		return true;
4186 
4187 	/* check config regs */
4188 	switch (reg) {
4189 	case GRBM_GFX_INDEX:
4190 	case CP_STRMOUT_CNTL:
4191 	case VGT_VTX_VECT_EJECT_REG:
4192 	case VGT_CACHE_INVALIDATION:
4193 	case VGT_ESGS_RING_SIZE:
4194 	case VGT_GSVS_RING_SIZE:
4195 	case VGT_GS_VERTEX_REUSE:
4196 	case VGT_PRIMITIVE_TYPE:
4197 	case VGT_INDEX_TYPE:
4198 	case VGT_NUM_INDICES:
4199 	case VGT_NUM_INSTANCES:
4200 	case VGT_TF_RING_SIZE:
4201 	case VGT_HS_OFFCHIP_PARAM:
4202 	case VGT_TF_MEMORY_BASE:
4203 	case PA_CL_ENHANCE:
4204 	case PA_SU_LINE_STIPPLE_VALUE:
4205 	case PA_SC_LINE_STIPPLE_STATE:
4206 	case PA_SC_ENHANCE:
4207 	case SQC_CACHES:
4208 	case SPI_STATIC_THREAD_MGMT_1:
4209 	case SPI_STATIC_THREAD_MGMT_2:
4210 	case SPI_STATIC_THREAD_MGMT_3:
4211 	case SPI_PS_MAX_WAVE_ID:
4212 	case SPI_CONFIG_CNTL:
4213 	case SPI_CONFIG_CNTL_1:
4214 	case TA_CNTL_AUX:
4215 		return true;
4216 	default:
4217 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4218 		return false;
4219 	}
4220 }
4221 
4222 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4223 				  u32 *ib, struct radeon_cs_packet *pkt)
4224 {
4225 	switch (pkt->opcode) {
4226 	case PACKET3_NOP:
4227 	case PACKET3_SET_BASE:
4228 	case PACKET3_SET_CE_DE_COUNTERS:
4229 	case PACKET3_LOAD_CONST_RAM:
4230 	case PACKET3_WRITE_CONST_RAM:
4231 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4232 	case PACKET3_DUMP_CONST_RAM:
4233 	case PACKET3_INCREMENT_CE_COUNTER:
4234 	case PACKET3_WAIT_ON_DE_COUNTER:
4235 	case PACKET3_CE_WRITE:
4236 		break;
4237 	default:
4238 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4239 		return -EINVAL;
4240 	}
4241 	return 0;
4242 }
4243 
4244 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4245 {
4246 	u32 start_reg, reg, i;
4247 	u32 command = ib[idx + 4];
4248 	u32 info = ib[idx + 1];
4249 	u32 idx_value = ib[idx];
4250 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4251 		/* src address space is register */
4252 		if (((info & 0x60000000) >> 29) == 0) {
4253 			start_reg = idx_value << 2;
4254 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4255 				reg = start_reg;
4256 				if (!si_vm_reg_valid(reg)) {
4257 					DRM_ERROR("CP DMA Bad SRC register\n");
4258 					return -EINVAL;
4259 				}
4260 			} else {
4261 				for (i = 0; i < (command & 0x1fffff); i++) {
4262 					reg = start_reg + (4 * i);
4263 					if (!si_vm_reg_valid(reg)) {
4264 						DRM_ERROR("CP DMA Bad SRC register\n");
4265 						return -EINVAL;
4266 					}
4267 				}
4268 			}
4269 		}
4270 	}
4271 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4272 		/* dst address space is register */
4273 		if (((info & 0x00300000) >> 20) == 0) {
4274 			start_reg = ib[idx + 2];
4275 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4276 				reg = start_reg;
4277 				if (!si_vm_reg_valid(reg)) {
4278 					DRM_ERROR("CP DMA Bad DST register\n");
4279 					return -EINVAL;
4280 				}
4281 			} else {
4282 				for (i = 0; i < (command & 0x1fffff); i++) {
4283 					reg = start_reg + (4 * i);
4284 				if (!si_vm_reg_valid(reg)) {
4285 						DRM_ERROR("CP DMA Bad DST register\n");
4286 						return -EINVAL;
4287 					}
4288 				}
4289 			}
4290 		}
4291 	}
4292 	return 0;
4293 }
4294 
4295 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4296 				   u32 *ib, struct radeon_cs_packet *pkt)
4297 {
4298 	int r;
4299 	u32 idx = pkt->idx + 1;
4300 	u32 idx_value = ib[idx];
4301 	u32 start_reg, end_reg, reg, i;
4302 
4303 	switch (pkt->opcode) {
4304 	case PACKET3_NOP:
4305 	case PACKET3_SET_BASE:
4306 	case PACKET3_CLEAR_STATE:
4307 	case PACKET3_INDEX_BUFFER_SIZE:
4308 	case PACKET3_DISPATCH_DIRECT:
4309 	case PACKET3_DISPATCH_INDIRECT:
4310 	case PACKET3_ALLOC_GDS:
4311 	case PACKET3_WRITE_GDS_RAM:
4312 	case PACKET3_ATOMIC_GDS:
4313 	case PACKET3_ATOMIC:
4314 	case PACKET3_OCCLUSION_QUERY:
4315 	case PACKET3_SET_PREDICATION:
4316 	case PACKET3_COND_EXEC:
4317 	case PACKET3_PRED_EXEC:
4318 	case PACKET3_DRAW_INDIRECT:
4319 	case PACKET3_DRAW_INDEX_INDIRECT:
4320 	case PACKET3_INDEX_BASE:
4321 	case PACKET3_DRAW_INDEX_2:
4322 	case PACKET3_CONTEXT_CONTROL:
4323 	case PACKET3_INDEX_TYPE:
4324 	case PACKET3_DRAW_INDIRECT_MULTI:
4325 	case PACKET3_DRAW_INDEX_AUTO:
4326 	case PACKET3_DRAW_INDEX_IMMD:
4327 	case PACKET3_NUM_INSTANCES:
4328 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4329 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4330 	case PACKET3_DRAW_INDEX_OFFSET_2:
4331 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4332 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4333 	case PACKET3_MPEG_INDEX:
4334 	case PACKET3_WAIT_REG_MEM:
4335 	case PACKET3_MEM_WRITE:
4336 	case PACKET3_PFP_SYNC_ME:
4337 	case PACKET3_SURFACE_SYNC:
4338 	case PACKET3_EVENT_WRITE:
4339 	case PACKET3_EVENT_WRITE_EOP:
4340 	case PACKET3_EVENT_WRITE_EOS:
4341 	case PACKET3_SET_CONTEXT_REG:
4342 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4343 	case PACKET3_SET_SH_REG:
4344 	case PACKET3_SET_SH_REG_OFFSET:
4345 	case PACKET3_INCREMENT_DE_COUNTER:
4346 	case PACKET3_WAIT_ON_CE_COUNTER:
4347 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4348 	case PACKET3_ME_WRITE:
4349 		break;
4350 	case PACKET3_COPY_DATA:
4351 		if ((idx_value & 0xf00) == 0) {
4352 			reg = ib[idx + 3] * 4;
4353 			if (!si_vm_reg_valid(reg))
4354 				return -EINVAL;
4355 		}
4356 		break;
4357 	case PACKET3_WRITE_DATA:
4358 		if ((idx_value & 0xf00) == 0) {
4359 			start_reg = ib[idx + 1] * 4;
4360 			if (idx_value & 0x10000) {
4361 				if (!si_vm_reg_valid(start_reg))
4362 					return -EINVAL;
4363 			} else {
4364 				for (i = 0; i < (pkt->count - 2); i++) {
4365 					reg = start_reg + (4 * i);
4366 					if (!si_vm_reg_valid(reg))
4367 						return -EINVAL;
4368 				}
4369 			}
4370 		}
4371 		break;
4372 	case PACKET3_COND_WRITE:
4373 		if (idx_value & 0x100) {
4374 			reg = ib[idx + 5] * 4;
4375 			if (!si_vm_reg_valid(reg))
4376 				return -EINVAL;
4377 		}
4378 		break;
4379 	case PACKET3_COPY_DW:
4380 		if (idx_value & 0x2) {
4381 			reg = ib[idx + 3] * 4;
4382 			if (!si_vm_reg_valid(reg))
4383 				return -EINVAL;
4384 		}
4385 		break;
4386 	case PACKET3_SET_CONFIG_REG:
4387 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4388 		end_reg = 4 * pkt->count + start_reg - 4;
4389 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4390 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4391 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4392 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4393 			return -EINVAL;
4394 		}
4395 		for (i = 0; i < pkt->count; i++) {
4396 			reg = start_reg + (4 * i);
4397 			if (!si_vm_reg_valid(reg))
4398 				return -EINVAL;
4399 		}
4400 		break;
4401 	case PACKET3_CP_DMA:
4402 		r = si_vm_packet3_cp_dma_check(ib, idx);
4403 		if (r)
4404 			return r;
4405 		break;
4406 	default:
4407 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4408 		return -EINVAL;
4409 	}
4410 	return 0;
4411 }
4412 
4413 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4414 				       u32 *ib, struct radeon_cs_packet *pkt)
4415 {
4416 	int r;
4417 	u32 idx = pkt->idx + 1;
4418 	u32 idx_value = ib[idx];
4419 	u32 start_reg, reg, i;
4420 
4421 	switch (pkt->opcode) {
4422 	case PACKET3_NOP:
4423 	case PACKET3_SET_BASE:
4424 	case PACKET3_CLEAR_STATE:
4425 	case PACKET3_DISPATCH_DIRECT:
4426 	case PACKET3_DISPATCH_INDIRECT:
4427 	case PACKET3_ALLOC_GDS:
4428 	case PACKET3_WRITE_GDS_RAM:
4429 	case PACKET3_ATOMIC_GDS:
4430 	case PACKET3_ATOMIC:
4431 	case PACKET3_OCCLUSION_QUERY:
4432 	case PACKET3_SET_PREDICATION:
4433 	case PACKET3_COND_EXEC:
4434 	case PACKET3_PRED_EXEC:
4435 	case PACKET3_CONTEXT_CONTROL:
4436 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4437 	case PACKET3_WAIT_REG_MEM:
4438 	case PACKET3_MEM_WRITE:
4439 	case PACKET3_PFP_SYNC_ME:
4440 	case PACKET3_SURFACE_SYNC:
4441 	case PACKET3_EVENT_WRITE:
4442 	case PACKET3_EVENT_WRITE_EOP:
4443 	case PACKET3_EVENT_WRITE_EOS:
4444 	case PACKET3_SET_CONTEXT_REG:
4445 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4446 	case PACKET3_SET_SH_REG:
4447 	case PACKET3_SET_SH_REG_OFFSET:
4448 	case PACKET3_INCREMENT_DE_COUNTER:
4449 	case PACKET3_WAIT_ON_CE_COUNTER:
4450 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4451 	case PACKET3_ME_WRITE:
4452 		break;
4453 	case PACKET3_COPY_DATA:
4454 		if ((idx_value & 0xf00) == 0) {
4455 			reg = ib[idx + 3] * 4;
4456 			if (!si_vm_reg_valid(reg))
4457 				return -EINVAL;
4458 		}
4459 		break;
4460 	case PACKET3_WRITE_DATA:
4461 		if ((idx_value & 0xf00) == 0) {
4462 			start_reg = ib[idx + 1] * 4;
4463 			if (idx_value & 0x10000) {
4464 				if (!si_vm_reg_valid(start_reg))
4465 					return -EINVAL;
4466 			} else {
4467 				for (i = 0; i < (pkt->count - 2); i++) {
4468 					reg = start_reg + (4 * i);
4469 					if (!si_vm_reg_valid(reg))
4470 						return -EINVAL;
4471 				}
4472 			}
4473 		}
4474 		break;
4475 	case PACKET3_COND_WRITE:
4476 		if (idx_value & 0x100) {
4477 			reg = ib[idx + 5] * 4;
4478 			if (!si_vm_reg_valid(reg))
4479 				return -EINVAL;
4480 		}
4481 		break;
4482 	case PACKET3_COPY_DW:
4483 		if (idx_value & 0x2) {
4484 			reg = ib[idx + 3] * 4;
4485 			if (!si_vm_reg_valid(reg))
4486 				return -EINVAL;
4487 		}
4488 		break;
4489 	case PACKET3_CP_DMA:
4490 		r = si_vm_packet3_cp_dma_check(ib, idx);
4491 		if (r)
4492 			return r;
4493 		break;
4494 	default:
4495 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4496 		return -EINVAL;
4497 	}
4498 	return 0;
4499 }
4500 
4501 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4502 {
4503 	int ret = 0;
4504 	u32 idx = 0, i;
4505 	struct radeon_cs_packet pkt;
4506 
4507 	do {
4508 		pkt.idx = idx;
4509 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4510 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4511 		pkt.one_reg_wr = 0;
4512 		switch (pkt.type) {
4513 		case RADEON_PACKET_TYPE0:
4514 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4515 			for (i = 0; i < ib->length_dw; i++) {
4516 				if (i == idx)
4517 					printk("\t0x%08x <---\n", ib->ptr[i]);
4518 				else
4519 					printk("\t0x%08x\n", ib->ptr[i]);
4520 			}
4521 			ret = -EINVAL;
4522 			break;
4523 		case RADEON_PACKET_TYPE2:
4524 			idx += 1;
4525 			break;
4526 		case RADEON_PACKET_TYPE3:
4527 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4528 			if (ib->is_const_ib)
4529 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4530 			else {
4531 				switch (ib->ring) {
4532 				case RADEON_RING_TYPE_GFX_INDEX:
4533 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4534 					break;
4535 				case CAYMAN_RING_TYPE_CP1_INDEX:
4536 				case CAYMAN_RING_TYPE_CP2_INDEX:
4537 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4538 					break;
4539 				default:
4540 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4541 					ret = -EINVAL;
4542 					break;
4543 				}
4544 			}
4545 			idx += pkt.count + 2;
4546 			break;
4547 		default:
4548 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4549 			ret = -EINVAL;
4550 			break;
4551 		}
4552 		if (ret)
4553 			break;
4554 	} while (idx < ib->length_dw);
4555 
4556 	return ret;
4557 }
4558 
4559 /*
4560  * vm
4561  */
4562 int si_vm_init(struct radeon_device *rdev)
4563 {
4564 	/* number of VMs */
4565 	rdev->vm_manager.nvm = 16;
4566 	/* base offset of vram pages */
4567 	rdev->vm_manager.vram_base_offset = 0;
4568 
4569 	return 0;
4570 }
4571 
4572 void si_vm_fini(struct radeon_device *rdev)
4573 {
4574 }
4575 
4576 /**
4577  * si_vm_decode_fault - print human readable fault info
4578  *
4579  * @rdev: radeon_device pointer
4580  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4581  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4582  *
4583  * Print human readable fault information (SI).
4584  */
4585 static void si_vm_decode_fault(struct radeon_device *rdev,
4586 			       u32 status, u32 addr)
4587 {
4588 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4589 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4590 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4591 	char *block;
4592 
4593 	if (rdev->family == CHIP_TAHITI) {
4594 		switch (mc_id) {
4595 		case 160:
4596 		case 144:
4597 		case 96:
4598 		case 80:
4599 		case 224:
4600 		case 208:
4601 		case 32:
4602 		case 16:
4603 			block = "CB";
4604 			break;
4605 		case 161:
4606 		case 145:
4607 		case 97:
4608 		case 81:
4609 		case 225:
4610 		case 209:
4611 		case 33:
4612 		case 17:
4613 			block = "CB_FMASK";
4614 			break;
4615 		case 162:
4616 		case 146:
4617 		case 98:
4618 		case 82:
4619 		case 226:
4620 		case 210:
4621 		case 34:
4622 		case 18:
4623 			block = "CB_CMASK";
4624 			break;
4625 		case 163:
4626 		case 147:
4627 		case 99:
4628 		case 83:
4629 		case 227:
4630 		case 211:
4631 		case 35:
4632 		case 19:
4633 			block = "CB_IMMED";
4634 			break;
4635 		case 164:
4636 		case 148:
4637 		case 100:
4638 		case 84:
4639 		case 228:
4640 		case 212:
4641 		case 36:
4642 		case 20:
4643 			block = "DB";
4644 			break;
4645 		case 165:
4646 		case 149:
4647 		case 101:
4648 		case 85:
4649 		case 229:
4650 		case 213:
4651 		case 37:
4652 		case 21:
4653 			block = "DB_HTILE";
4654 			break;
4655 		case 167:
4656 		case 151:
4657 		case 103:
4658 		case 87:
4659 		case 231:
4660 		case 215:
4661 		case 39:
4662 		case 23:
4663 			block = "DB_STEN";
4664 			break;
4665 		case 72:
4666 		case 68:
4667 		case 64:
4668 		case 8:
4669 		case 4:
4670 		case 0:
4671 		case 136:
4672 		case 132:
4673 		case 128:
4674 		case 200:
4675 		case 196:
4676 		case 192:
4677 			block = "TC";
4678 			break;
4679 		case 112:
4680 		case 48:
4681 			block = "CP";
4682 			break;
4683 		case 49:
4684 		case 177:
4685 		case 50:
4686 		case 178:
4687 			block = "SH";
4688 			break;
4689 		case 53:
4690 		case 190:
4691 			block = "VGT";
4692 			break;
4693 		case 117:
4694 			block = "IH";
4695 			break;
4696 		case 51:
4697 		case 115:
4698 			block = "RLC";
4699 			break;
4700 		case 119:
4701 		case 183:
4702 			block = "DMA0";
4703 			break;
4704 		case 61:
4705 			block = "DMA1";
4706 			break;
4707 		case 248:
4708 		case 120:
4709 			block = "HDP";
4710 			break;
4711 		default:
4712 			block = "unknown";
4713 			break;
4714 		}
4715 	} else {
4716 		switch (mc_id) {
4717 		case 32:
4718 		case 16:
4719 		case 96:
4720 		case 80:
4721 		case 160:
4722 		case 144:
4723 		case 224:
4724 		case 208:
4725 			block = "CB";
4726 			break;
4727 		case 33:
4728 		case 17:
4729 		case 97:
4730 		case 81:
4731 		case 161:
4732 		case 145:
4733 		case 225:
4734 		case 209:
4735 			block = "CB_FMASK";
4736 			break;
4737 		case 34:
4738 		case 18:
4739 		case 98:
4740 		case 82:
4741 		case 162:
4742 		case 146:
4743 		case 226:
4744 		case 210:
4745 			block = "CB_CMASK";
4746 			break;
4747 		case 35:
4748 		case 19:
4749 		case 99:
4750 		case 83:
4751 		case 163:
4752 		case 147:
4753 		case 227:
4754 		case 211:
4755 			block = "CB_IMMED";
4756 			break;
4757 		case 36:
4758 		case 20:
4759 		case 100:
4760 		case 84:
4761 		case 164:
4762 		case 148:
4763 		case 228:
4764 		case 212:
4765 			block = "DB";
4766 			break;
4767 		case 37:
4768 		case 21:
4769 		case 101:
4770 		case 85:
4771 		case 165:
4772 		case 149:
4773 		case 229:
4774 		case 213:
4775 			block = "DB_HTILE";
4776 			break;
4777 		case 39:
4778 		case 23:
4779 		case 103:
4780 		case 87:
4781 		case 167:
4782 		case 151:
4783 		case 231:
4784 		case 215:
4785 			block = "DB_STEN";
4786 			break;
4787 		case 72:
4788 		case 68:
4789 		case 8:
4790 		case 4:
4791 		case 136:
4792 		case 132:
4793 		case 200:
4794 		case 196:
4795 			block = "TC";
4796 			break;
4797 		case 112:
4798 		case 48:
4799 			block = "CP";
4800 			break;
4801 		case 49:
4802 		case 177:
4803 		case 50:
4804 		case 178:
4805 			block = "SH";
4806 			break;
4807 		case 53:
4808 			block = "VGT";
4809 			break;
4810 		case 117:
4811 			block = "IH";
4812 			break;
4813 		case 51:
4814 		case 115:
4815 			block = "RLC";
4816 			break;
4817 		case 119:
4818 		case 183:
4819 			block = "DMA0";
4820 			break;
4821 		case 61:
4822 			block = "DMA1";
4823 			break;
4824 		case 248:
4825 		case 120:
4826 			block = "HDP";
4827 			break;
4828 		default:
4829 			block = "unknown";
4830 			break;
4831 		}
4832 	}
4833 
4834 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4835 	       protections, vmid, addr,
4836 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4837 	       block, mc_id);
4838 }
4839 
4840 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4841 {
4842 	struct radeon_ring *ring = &rdev->ring[ridx];
4843 
4844 	if (vm == NULL)
4845 		return;
4846 
4847 	/* write new base address */
4848 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4849 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4850 				 WRITE_DATA_DST_SEL(0)));
4851 
4852 	if (vm->id < 8) {
4853 		radeon_ring_write(ring,
4854 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4855 	} else {
4856 		radeon_ring_write(ring,
4857 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4858 	}
4859 	radeon_ring_write(ring, 0);
4860 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4861 
4862 	/* flush hdp cache */
4863 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4864 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4865 				 WRITE_DATA_DST_SEL(0)));
4866 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4867 	radeon_ring_write(ring, 0);
4868 	radeon_ring_write(ring, 0x1);
4869 
4870 	/* bits 0-15 are the VM contexts0-15 */
4871 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4872 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4873 				 WRITE_DATA_DST_SEL(0)));
4874 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4875 	radeon_ring_write(ring, 0);
4876 	radeon_ring_write(ring, 1 << vm->id);
4877 
4878 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4879 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4880 	radeon_ring_write(ring, 0x0);
4881 }
4882 
4883 /*
4884  *  Power and clock gating
4885  */
4886 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4887 {
4888 	int i;
4889 
4890 	for (i = 0; i < rdev->usec_timeout; i++) {
4891 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4892 			break;
4893 		udelay(1);
4894 	}
4895 
4896 	for (i = 0; i < rdev->usec_timeout; i++) {
4897 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4898 			break;
4899 		udelay(1);
4900 	}
4901 }
4902 
4903 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4904 					 bool enable)
4905 {
4906 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4907 	u32 mask;
4908 	int i;
4909 
4910 	if (enable)
4911 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4912 	else
4913 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4914 	WREG32(CP_INT_CNTL_RING0, tmp);
4915 
4916 	if (!enable) {
4917 		/* read a gfx register */
4918 		tmp = RREG32(DB_DEPTH_INFO);
4919 
4920 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4921 		for (i = 0; i < rdev->usec_timeout; i++) {
4922 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4923 				break;
4924 			udelay(1);
4925 		}
4926 	}
4927 }
4928 
4929 static void si_set_uvd_dcm(struct radeon_device *rdev,
4930 			   bool sw_mode)
4931 {
4932 	u32 tmp, tmp2;
4933 
4934 	tmp = RREG32(UVD_CGC_CTRL);
4935 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4936 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4937 
4938 	if (sw_mode) {
4939 		tmp &= ~0x7ffff800;
4940 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4941 	} else {
4942 		tmp |= 0x7ffff800;
4943 		tmp2 = 0;
4944 	}
4945 
4946 	WREG32(UVD_CGC_CTRL, tmp);
4947 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4948 }
4949 
4950 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4951 {
4952 	bool hw_mode = true;
4953 
4954 	if (hw_mode) {
4955 		si_set_uvd_dcm(rdev, false);
4956 	} else {
4957 		u32 tmp = RREG32(UVD_CGC_CTRL);
4958 		tmp &= ~DCM;
4959 		WREG32(UVD_CGC_CTRL, tmp);
4960 	}
4961 }
4962 
4963 static u32 si_halt_rlc(struct radeon_device *rdev)
4964 {
4965 	u32 data, orig;
4966 
4967 	orig = data = RREG32(RLC_CNTL);
4968 
4969 	if (data & RLC_ENABLE) {
4970 		data &= ~RLC_ENABLE;
4971 		WREG32(RLC_CNTL, data);
4972 
4973 		si_wait_for_rlc_serdes(rdev);
4974 	}
4975 
4976 	return orig;
4977 }
4978 
4979 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4980 {
4981 	u32 tmp;
4982 
4983 	tmp = RREG32(RLC_CNTL);
4984 	if (tmp != rlc)
4985 		WREG32(RLC_CNTL, rlc);
4986 }
4987 
4988 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4989 {
4990 	u32 data, orig;
4991 
4992 	orig = data = RREG32(DMA_PG);
4993 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4994 		data |= PG_CNTL_ENABLE;
4995 	else
4996 		data &= ~PG_CNTL_ENABLE;
4997 	if (orig != data)
4998 		WREG32(DMA_PG, data);
4999 }
5000 
5001 static void si_init_dma_pg(struct radeon_device *rdev)
5002 {
5003 	u32 tmp;
5004 
5005 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5006 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5007 
5008 	for (tmp = 0; tmp < 5; tmp++)
5009 		WREG32(DMA_PGFSM_WRITE, 0);
5010 }
5011 
5012 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5013 			       bool enable)
5014 {
5015 	u32 tmp;
5016 
5017 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5018 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5019 		WREG32(RLC_TTOP_D, tmp);
5020 
5021 		tmp = RREG32(RLC_PG_CNTL);
5022 		tmp |= GFX_PG_ENABLE;
5023 		WREG32(RLC_PG_CNTL, tmp);
5024 
5025 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5026 		tmp |= AUTO_PG_EN;
5027 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5028 	} else {
5029 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5030 		tmp &= ~AUTO_PG_EN;
5031 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5032 
5033 		tmp = RREG32(DB_RENDER_CONTROL);
5034 	}
5035 }
5036 
5037 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5038 {
5039 	u32 tmp;
5040 
5041 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5042 
5043 	tmp = RREG32(RLC_PG_CNTL);
5044 	tmp |= GFX_PG_SRC;
5045 	WREG32(RLC_PG_CNTL, tmp);
5046 
5047 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5048 
5049 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5050 
5051 	tmp &= ~GRBM_REG_SGIT_MASK;
5052 	tmp |= GRBM_REG_SGIT(0x700);
5053 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5054 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5055 }
5056 
5057 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5058 {
5059 	u32 mask = 0, tmp, tmp1;
5060 	int i;
5061 
5062 	si_select_se_sh(rdev, se, sh);
5063 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5064 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5065 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5066 
5067 	tmp &= 0xffff0000;
5068 
5069 	tmp |= tmp1;
5070 	tmp >>= 16;
5071 
5072 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5073 		mask <<= 1;
5074 		mask |= 1;
5075 	}
5076 
5077 	return (~tmp) & mask;
5078 }
5079 
5080 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5081 {
5082 	u32 i, j, k, active_cu_number = 0;
5083 	u32 mask, counter, cu_bitmap;
5084 	u32 tmp = 0;
5085 
5086 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5087 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5088 			mask = 1;
5089 			cu_bitmap = 0;
5090 			counter  = 0;
5091 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5092 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5093 					if (counter < 2)
5094 						cu_bitmap |= mask;
5095 					counter++;
5096 				}
5097 				mask <<= 1;
5098 			}
5099 
5100 			active_cu_number += counter;
5101 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5102 		}
5103 	}
5104 
5105 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5106 
5107 	tmp = RREG32(RLC_MAX_PG_CU);
5108 	tmp &= ~MAX_PU_CU_MASK;
5109 	tmp |= MAX_PU_CU(active_cu_number);
5110 	WREG32(RLC_MAX_PG_CU, tmp);
5111 }
5112 
5113 static void si_enable_cgcg(struct radeon_device *rdev,
5114 			   bool enable)
5115 {
5116 	u32 data, orig, tmp;
5117 
5118 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5119 
5120 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5121 		si_enable_gui_idle_interrupt(rdev, true);
5122 
5123 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5124 
5125 		tmp = si_halt_rlc(rdev);
5126 
5127 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5128 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5129 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5130 
5131 		si_wait_for_rlc_serdes(rdev);
5132 
5133 		si_update_rlc(rdev, tmp);
5134 
5135 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5136 
5137 		data |= CGCG_EN | CGLS_EN;
5138 	} else {
5139 		si_enable_gui_idle_interrupt(rdev, false);
5140 
5141 		RREG32(CB_CGTT_SCLK_CTRL);
5142 		RREG32(CB_CGTT_SCLK_CTRL);
5143 		RREG32(CB_CGTT_SCLK_CTRL);
5144 		RREG32(CB_CGTT_SCLK_CTRL);
5145 
5146 		data &= ~(CGCG_EN | CGLS_EN);
5147 	}
5148 
5149 	if (orig != data)
5150 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5151 }
5152 
5153 static void si_enable_mgcg(struct radeon_device *rdev,
5154 			   bool enable)
5155 {
5156 	u32 data, orig, tmp = 0;
5157 
5158 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5159 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5160 		data = 0x96940200;
5161 		if (orig != data)
5162 			WREG32(CGTS_SM_CTRL_REG, data);
5163 
5164 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5165 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5166 			data |= CP_MEM_LS_EN;
5167 			if (orig != data)
5168 				WREG32(CP_MEM_SLP_CNTL, data);
5169 		}
5170 
5171 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5172 		data &= 0xffffffc0;
5173 		if (orig != data)
5174 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5175 
5176 		tmp = si_halt_rlc(rdev);
5177 
5178 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5179 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5180 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5181 
5182 		si_update_rlc(rdev, tmp);
5183 	} else {
5184 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5185 		data |= 0x00000003;
5186 		if (orig != data)
5187 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5188 
5189 		data = RREG32(CP_MEM_SLP_CNTL);
5190 		if (data & CP_MEM_LS_EN) {
5191 			data &= ~CP_MEM_LS_EN;
5192 			WREG32(CP_MEM_SLP_CNTL, data);
5193 		}
5194 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5195 		data |= LS_OVERRIDE | OVERRIDE;
5196 		if (orig != data)
5197 			WREG32(CGTS_SM_CTRL_REG, data);
5198 
5199 		tmp = si_halt_rlc(rdev);
5200 
5201 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5202 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5203 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5204 
5205 		si_update_rlc(rdev, tmp);
5206 	}
5207 }
5208 
5209 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5210 			       bool enable)
5211 {
5212 	u32 orig, data, tmp;
5213 
5214 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5215 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5216 		tmp |= 0x3fff;
5217 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5218 
5219 		orig = data = RREG32(UVD_CGC_CTRL);
5220 		data |= DCM;
5221 		if (orig != data)
5222 			WREG32(UVD_CGC_CTRL, data);
5223 
5224 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5225 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5226 	} else {
5227 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5228 		tmp &= ~0x3fff;
5229 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5230 
5231 		orig = data = RREG32(UVD_CGC_CTRL);
5232 		data &= ~DCM;
5233 		if (orig != data)
5234 			WREG32(UVD_CGC_CTRL, data);
5235 
5236 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5237 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5238 	}
5239 }
5240 
5241 static const u32 mc_cg_registers[] =
5242 {
5243 	MC_HUB_MISC_HUB_CG,
5244 	MC_HUB_MISC_SIP_CG,
5245 	MC_HUB_MISC_VM_CG,
5246 	MC_XPB_CLK_GAT,
5247 	ATC_MISC_CG,
5248 	MC_CITF_MISC_WR_CG,
5249 	MC_CITF_MISC_RD_CG,
5250 	MC_CITF_MISC_VM_CG,
5251 	VM_L2_CG,
5252 };
5253 
5254 static void si_enable_mc_ls(struct radeon_device *rdev,
5255 			    bool enable)
5256 {
5257 	int i;
5258 	u32 orig, data;
5259 
5260 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5261 		orig = data = RREG32(mc_cg_registers[i]);
5262 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5263 			data |= MC_LS_ENABLE;
5264 		else
5265 			data &= ~MC_LS_ENABLE;
5266 		if (data != orig)
5267 			WREG32(mc_cg_registers[i], data);
5268 	}
5269 }
5270 
5271 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5272 			       bool enable)
5273 {
5274 	int i;
5275 	u32 orig, data;
5276 
5277 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5278 		orig = data = RREG32(mc_cg_registers[i]);
5279 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5280 			data |= MC_CG_ENABLE;
5281 		else
5282 			data &= ~MC_CG_ENABLE;
5283 		if (data != orig)
5284 			WREG32(mc_cg_registers[i], data);
5285 	}
5286 }
5287 
5288 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5289 			       bool enable)
5290 {
5291 	u32 orig, data, offset;
5292 	int i;
5293 
5294 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5295 		for (i = 0; i < 2; i++) {
5296 			if (i == 0)
5297 				offset = DMA0_REGISTER_OFFSET;
5298 			else
5299 				offset = DMA1_REGISTER_OFFSET;
5300 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5301 			data &= ~MEM_POWER_OVERRIDE;
5302 			if (data != orig)
5303 				WREG32(DMA_POWER_CNTL + offset, data);
5304 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5305 		}
5306 	} else {
5307 		for (i = 0; i < 2; i++) {
5308 			if (i == 0)
5309 				offset = DMA0_REGISTER_OFFSET;
5310 			else
5311 				offset = DMA1_REGISTER_OFFSET;
5312 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5313 			data |= MEM_POWER_OVERRIDE;
5314 			if (data != orig)
5315 				WREG32(DMA_POWER_CNTL + offset, data);
5316 
5317 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5318 			data = 0xff000000;
5319 			if (data != orig)
5320 				WREG32(DMA_CLK_CTRL + offset, data);
5321 		}
5322 	}
5323 }
5324 
5325 static void si_enable_bif_mgls(struct radeon_device *rdev,
5326 			       bool enable)
5327 {
5328 	u32 orig, data;
5329 
5330 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5331 
5332 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5333 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5334 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5335 	else
5336 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5337 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5338 
5339 	if (orig != data)
5340 		WREG32_PCIE(PCIE_CNTL2, data);
5341 }
5342 
5343 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5344 			       bool enable)
5345 {
5346 	u32 orig, data;
5347 
5348 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5349 
5350 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5351 		data &= ~CLOCK_GATING_DIS;
5352 	else
5353 		data |= CLOCK_GATING_DIS;
5354 
5355 	if (orig != data)
5356 		WREG32(HDP_HOST_PATH_CNTL, data);
5357 }
5358 
5359 static void si_enable_hdp_ls(struct radeon_device *rdev,
5360 			     bool enable)
5361 {
5362 	u32 orig, data;
5363 
5364 	orig = data = RREG32(HDP_MEM_POWER_LS);
5365 
5366 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5367 		data |= HDP_LS_ENABLE;
5368 	else
5369 		data &= ~HDP_LS_ENABLE;
5370 
5371 	if (orig != data)
5372 		WREG32(HDP_MEM_POWER_LS, data);
5373 }
5374 
5375 static void si_update_cg(struct radeon_device *rdev,
5376 			 u32 block, bool enable)
5377 {
5378 	if (block & RADEON_CG_BLOCK_GFX) {
5379 		si_enable_gui_idle_interrupt(rdev, false);
5380 		/* order matters! */
5381 		if (enable) {
5382 			si_enable_mgcg(rdev, true);
5383 			si_enable_cgcg(rdev, true);
5384 		} else {
5385 			si_enable_cgcg(rdev, false);
5386 			si_enable_mgcg(rdev, false);
5387 		}
5388 		si_enable_gui_idle_interrupt(rdev, true);
5389 	}
5390 
5391 	if (block & RADEON_CG_BLOCK_MC) {
5392 		si_enable_mc_mgcg(rdev, enable);
5393 		si_enable_mc_ls(rdev, enable);
5394 	}
5395 
5396 	if (block & RADEON_CG_BLOCK_SDMA) {
5397 		si_enable_dma_mgcg(rdev, enable);
5398 	}
5399 
5400 	if (block & RADEON_CG_BLOCK_BIF) {
5401 		si_enable_bif_mgls(rdev, enable);
5402 	}
5403 
5404 	if (block & RADEON_CG_BLOCK_UVD) {
5405 		if (rdev->has_uvd) {
5406 			si_enable_uvd_mgcg(rdev, enable);
5407 		}
5408 	}
5409 
5410 	if (block & RADEON_CG_BLOCK_HDP) {
5411 		si_enable_hdp_mgcg(rdev, enable);
5412 		si_enable_hdp_ls(rdev, enable);
5413 	}
5414 }
5415 
5416 static void si_init_cg(struct radeon_device *rdev)
5417 {
5418 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5419 			    RADEON_CG_BLOCK_MC |
5420 			    RADEON_CG_BLOCK_SDMA |
5421 			    RADEON_CG_BLOCK_BIF |
5422 			    RADEON_CG_BLOCK_HDP), true);
5423 	if (rdev->has_uvd) {
5424 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5425 		si_init_uvd_internal_cg(rdev);
5426 	}
5427 }
5428 
5429 static void si_fini_cg(struct radeon_device *rdev)
5430 {
5431 	if (rdev->has_uvd) {
5432 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5433 	}
5434 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5435 			    RADEON_CG_BLOCK_MC |
5436 			    RADEON_CG_BLOCK_SDMA |
5437 			    RADEON_CG_BLOCK_BIF |
5438 			    RADEON_CG_BLOCK_HDP), false);
5439 }
5440 
5441 u32 si_get_csb_size(struct radeon_device *rdev)
5442 {
5443 	u32 count = 0;
5444 	const struct cs_section_def *sect = NULL;
5445 	const struct cs_extent_def *ext = NULL;
5446 
5447 	if (rdev->rlc.cs_data == NULL)
5448 		return 0;
5449 
5450 	/* begin clear state */
5451 	count += 2;
5452 	/* context control state */
5453 	count += 3;
5454 
5455 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5456 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5457 			if (sect->id == SECT_CONTEXT)
5458 				count += 2 + ext->reg_count;
5459 			else
5460 				return 0;
5461 		}
5462 	}
5463 	/* pa_sc_raster_config */
5464 	count += 3;
5465 	/* end clear state */
5466 	count += 2;
5467 	/* clear state */
5468 	count += 2;
5469 
5470 	return count;
5471 }
5472 
5473 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5474 {
5475 	u32 count = 0, i;
5476 	const struct cs_section_def *sect = NULL;
5477 	const struct cs_extent_def *ext = NULL;
5478 
5479 	if (rdev->rlc.cs_data == NULL)
5480 		return;
5481 	if (buffer == NULL)
5482 		return;
5483 
5484 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5485 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5486 
5487 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5488 	buffer[count++] = cpu_to_le32(0x80000000);
5489 	buffer[count++] = cpu_to_le32(0x80000000);
5490 
5491 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5492 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5493 			if (sect->id == SECT_CONTEXT) {
5494 				buffer[count++] =
5495 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5496 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5497 				for (i = 0; i < ext->reg_count; i++)
5498 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5499 			} else {
5500 				return;
5501 			}
5502 		}
5503 	}
5504 
5505 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5506 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5507 	switch (rdev->family) {
5508 	case CHIP_TAHITI:
5509 	case CHIP_PITCAIRN:
5510 		buffer[count++] = cpu_to_le32(0x2a00126a);
5511 		break;
5512 	case CHIP_VERDE:
5513 		buffer[count++] = cpu_to_le32(0x0000124a);
5514 		break;
5515 	case CHIP_OLAND:
5516 		buffer[count++] = cpu_to_le32(0x00000082);
5517 		break;
5518 	case CHIP_HAINAN:
5519 		buffer[count++] = cpu_to_le32(0x00000000);
5520 		break;
5521 	default:
5522 		buffer[count++] = cpu_to_le32(0x00000000);
5523 		break;
5524 	}
5525 
5526 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5527 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5528 
5529 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5530 	buffer[count++] = cpu_to_le32(0);
5531 }
5532 
5533 static void si_init_pg(struct radeon_device *rdev)
5534 {
5535 	if (rdev->pg_flags) {
5536 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5537 			si_init_dma_pg(rdev);
5538 		}
5539 		si_init_ao_cu_mask(rdev);
5540 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5541 			si_init_gfx_cgpg(rdev);
5542 		} else {
5543 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5544 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5545 		}
5546 		si_enable_dma_pg(rdev, true);
5547 		si_enable_gfx_cgpg(rdev, true);
5548 	} else {
5549 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5550 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5551 	}
5552 }
5553 
5554 static void si_fini_pg(struct radeon_device *rdev)
5555 {
5556 	if (rdev->pg_flags) {
5557 		si_enable_dma_pg(rdev, false);
5558 		si_enable_gfx_cgpg(rdev, false);
5559 	}
5560 }
5561 
5562 /*
5563  * RLC
5564  */
5565 void si_rlc_reset(struct radeon_device *rdev)
5566 {
5567 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5568 
5569 	tmp |= SOFT_RESET_RLC;
5570 	WREG32(GRBM_SOFT_RESET, tmp);
5571 	udelay(50);
5572 	tmp &= ~SOFT_RESET_RLC;
5573 	WREG32(GRBM_SOFT_RESET, tmp);
5574 	udelay(50);
5575 }
5576 
5577 static void si_rlc_stop(struct radeon_device *rdev)
5578 {
5579 	WREG32(RLC_CNTL, 0);
5580 
5581 	si_enable_gui_idle_interrupt(rdev, false);
5582 
5583 	si_wait_for_rlc_serdes(rdev);
5584 }
5585 
5586 static void si_rlc_start(struct radeon_device *rdev)
5587 {
5588 	WREG32(RLC_CNTL, RLC_ENABLE);
5589 
5590 	si_enable_gui_idle_interrupt(rdev, true);
5591 
5592 	udelay(50);
5593 }
5594 
5595 static bool si_lbpw_supported(struct radeon_device *rdev)
5596 {
5597 	u32 tmp;
5598 
5599 	/* Enable LBPW only for DDR3 */
5600 	tmp = RREG32(MC_SEQ_MISC0);
5601 	if ((tmp & 0xF0000000) == 0xB0000000)
5602 		return true;
5603 	return false;
5604 }
5605 
5606 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5607 {
5608 	u32 tmp;
5609 
5610 	tmp = RREG32(RLC_LB_CNTL);
5611 	if (enable)
5612 		tmp |= LOAD_BALANCE_ENABLE;
5613 	else
5614 		tmp &= ~LOAD_BALANCE_ENABLE;
5615 	WREG32(RLC_LB_CNTL, tmp);
5616 
5617 	if (!enable) {
5618 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5619 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5620 	}
5621 }
5622 
5623 static int si_rlc_resume(struct radeon_device *rdev)
5624 {
5625 	u32 i;
5626 	const __be32 *fw_data;
5627 
5628 	if (!rdev->rlc_fw)
5629 		return -EINVAL;
5630 
5631 	si_rlc_stop(rdev);
5632 
5633 	si_rlc_reset(rdev);
5634 
5635 	si_init_pg(rdev);
5636 
5637 	si_init_cg(rdev);
5638 
5639 	WREG32(RLC_RL_BASE, 0);
5640 	WREG32(RLC_RL_SIZE, 0);
5641 	WREG32(RLC_LB_CNTL, 0);
5642 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5643 	WREG32(RLC_LB_CNTR_INIT, 0);
5644 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5645 
5646 	WREG32(RLC_MC_CNTL, 0);
5647 	WREG32(RLC_UCODE_CNTL, 0);
5648 
5649 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5650 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5651 		WREG32(RLC_UCODE_ADDR, i);
5652 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5653 	}
5654 	WREG32(RLC_UCODE_ADDR, 0);
5655 
5656 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5657 
5658 	si_rlc_start(rdev);
5659 
5660 	return 0;
5661 }
5662 
5663 static void si_enable_interrupts(struct radeon_device *rdev)
5664 {
5665 	u32 ih_cntl = RREG32(IH_CNTL);
5666 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5667 
5668 	ih_cntl |= ENABLE_INTR;
5669 	ih_rb_cntl |= IH_RB_ENABLE;
5670 	WREG32(IH_CNTL, ih_cntl);
5671 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5672 	rdev->ih.enabled = true;
5673 }
5674 
5675 static void si_disable_interrupts(struct radeon_device *rdev)
5676 {
5677 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5678 	u32 ih_cntl = RREG32(IH_CNTL);
5679 
5680 	ih_rb_cntl &= ~IH_RB_ENABLE;
5681 	ih_cntl &= ~ENABLE_INTR;
5682 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5683 	WREG32(IH_CNTL, ih_cntl);
5684 	/* set rptr, wptr to 0 */
5685 	WREG32(IH_RB_RPTR, 0);
5686 	WREG32(IH_RB_WPTR, 0);
5687 	rdev->ih.enabled = false;
5688 	rdev->ih.rptr = 0;
5689 }
5690 
5691 static void si_disable_interrupt_state(struct radeon_device *rdev)
5692 {
5693 	u32 tmp;
5694 
5695 	tmp = RREG32(CP_INT_CNTL_RING0) &
5696 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5697 	WREG32(CP_INT_CNTL_RING0, tmp);
5698 	WREG32(CP_INT_CNTL_RING1, 0);
5699 	WREG32(CP_INT_CNTL_RING2, 0);
5700 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5701 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5702 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5703 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5704 	WREG32(GRBM_INT_CNTL, 0);
5705 	if (rdev->num_crtc >= 2) {
5706 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5707 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5708 	}
5709 	if (rdev->num_crtc >= 4) {
5710 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5711 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5712 	}
5713 	if (rdev->num_crtc >= 6) {
5714 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5715 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5716 	}
5717 
5718 	if (rdev->num_crtc >= 2) {
5719 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5720 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5721 	}
5722 	if (rdev->num_crtc >= 4) {
5723 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5724 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5725 	}
5726 	if (rdev->num_crtc >= 6) {
5727 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5728 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5729 	}
5730 
5731 	if (!ASIC_IS_NODCE(rdev)) {
5732 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5733 
5734 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5735 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5736 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5737 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5738 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5739 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5740 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5741 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5742 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5743 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5744 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5745 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5746 	}
5747 }
5748 
5749 static int si_irq_init(struct radeon_device *rdev)
5750 {
5751 	int ret = 0;
5752 	int rb_bufsz;
5753 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5754 
5755 	/* allocate ring */
5756 	ret = r600_ih_ring_alloc(rdev);
5757 	if (ret)
5758 		return ret;
5759 
5760 	/* disable irqs */
5761 	si_disable_interrupts(rdev);
5762 
5763 	/* init rlc */
5764 	ret = si_rlc_resume(rdev);
5765 	if (ret) {
5766 		r600_ih_ring_fini(rdev);
5767 		return ret;
5768 	}
5769 
5770 	/* setup interrupt control */
5771 	/* set dummy read address to ring address */
5772 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5773 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5774 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5775 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5776 	 */
5777 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5778 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5779 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5780 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5781 
5782 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5783 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5784 
5785 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5786 		      IH_WPTR_OVERFLOW_CLEAR |
5787 		      (rb_bufsz << 1));
5788 
5789 	if (rdev->wb.enabled)
5790 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5791 
5792 	/* set the writeback address whether it's enabled or not */
5793 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5794 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5795 
5796 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5797 
5798 	/* set rptr, wptr to 0 */
5799 	WREG32(IH_RB_RPTR, 0);
5800 	WREG32(IH_RB_WPTR, 0);
5801 
5802 	/* Default settings for IH_CNTL (disabled at first) */
5803 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5804 	/* RPTR_REARM only works if msi's are enabled */
5805 	if (rdev->msi_enabled)
5806 		ih_cntl |= RPTR_REARM;
5807 	WREG32(IH_CNTL, ih_cntl);
5808 
5809 	/* force the active interrupt state to all disabled */
5810 	si_disable_interrupt_state(rdev);
5811 
5812 	pci_enable_busmaster(rdev->dev);
5813 
5814 	/* enable irqs */
5815 	si_enable_interrupts(rdev);
5816 
5817 	return ret;
5818 }
5819 
5820 int si_irq_set(struct radeon_device *rdev)
5821 {
5822 	u32 cp_int_cntl;
5823 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5824 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5825 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5826 	u32 grbm_int_cntl = 0;
5827 	u32 dma_cntl, dma_cntl1;
5828 	u32 thermal_int = 0;
5829 
5830 	if (!rdev->irq.installed) {
5831 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5832 		return -EINVAL;
5833 	}
5834 	/* don't enable anything if the ih is disabled */
5835 	if (!rdev->ih.enabled) {
5836 		si_disable_interrupts(rdev);
5837 		/* force the active interrupt state to all disabled */
5838 		si_disable_interrupt_state(rdev);
5839 		return 0;
5840 	}
5841 
5842 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5843 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5844 
5845 	if (!ASIC_IS_NODCE(rdev)) {
5846 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5847 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5848 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5849 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5850 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5851 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5852 	}
5853 
5854 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5855 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5856 
5857 	thermal_int = RREG32(CG_THERMAL_INT) &
5858 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5859 
5860 	/* enable CP interrupts on all rings */
5861 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5862 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5863 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5864 	}
5865 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5866 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5867 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5868 	}
5869 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5870 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5871 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5872 	}
5873 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5874 		DRM_DEBUG("si_irq_set: sw int dma\n");
5875 		dma_cntl |= TRAP_ENABLE;
5876 	}
5877 
5878 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5879 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5880 		dma_cntl1 |= TRAP_ENABLE;
5881 	}
5882 	if (rdev->irq.crtc_vblank_int[0] ||
5883 	    atomic_read(&rdev->irq.pflip[0])) {
5884 		DRM_DEBUG("si_irq_set: vblank 0\n");
5885 		crtc1 |= VBLANK_INT_MASK;
5886 	}
5887 	if (rdev->irq.crtc_vblank_int[1] ||
5888 	    atomic_read(&rdev->irq.pflip[1])) {
5889 		DRM_DEBUG("si_irq_set: vblank 1\n");
5890 		crtc2 |= VBLANK_INT_MASK;
5891 	}
5892 	if (rdev->irq.crtc_vblank_int[2] ||
5893 	    atomic_read(&rdev->irq.pflip[2])) {
5894 		DRM_DEBUG("si_irq_set: vblank 2\n");
5895 		crtc3 |= VBLANK_INT_MASK;
5896 	}
5897 	if (rdev->irq.crtc_vblank_int[3] ||
5898 	    atomic_read(&rdev->irq.pflip[3])) {
5899 		DRM_DEBUG("si_irq_set: vblank 3\n");
5900 		crtc4 |= VBLANK_INT_MASK;
5901 	}
5902 	if (rdev->irq.crtc_vblank_int[4] ||
5903 	    atomic_read(&rdev->irq.pflip[4])) {
5904 		DRM_DEBUG("si_irq_set: vblank 4\n");
5905 		crtc5 |= VBLANK_INT_MASK;
5906 	}
5907 	if (rdev->irq.crtc_vblank_int[5] ||
5908 	    atomic_read(&rdev->irq.pflip[5])) {
5909 		DRM_DEBUG("si_irq_set: vblank 5\n");
5910 		crtc6 |= VBLANK_INT_MASK;
5911 	}
5912 	if (rdev->irq.hpd[0]) {
5913 		DRM_DEBUG("si_irq_set: hpd 1\n");
5914 		hpd1 |= DC_HPDx_INT_EN;
5915 	}
5916 	if (rdev->irq.hpd[1]) {
5917 		DRM_DEBUG("si_irq_set: hpd 2\n");
5918 		hpd2 |= DC_HPDx_INT_EN;
5919 	}
5920 	if (rdev->irq.hpd[2]) {
5921 		DRM_DEBUG("si_irq_set: hpd 3\n");
5922 		hpd3 |= DC_HPDx_INT_EN;
5923 	}
5924 	if (rdev->irq.hpd[3]) {
5925 		DRM_DEBUG("si_irq_set: hpd 4\n");
5926 		hpd4 |= DC_HPDx_INT_EN;
5927 	}
5928 	if (rdev->irq.hpd[4]) {
5929 		DRM_DEBUG("si_irq_set: hpd 5\n");
5930 		hpd5 |= DC_HPDx_INT_EN;
5931 	}
5932 	if (rdev->irq.hpd[5]) {
5933 		DRM_DEBUG("si_irq_set: hpd 6\n");
5934 		hpd6 |= DC_HPDx_INT_EN;
5935 	}
5936 
5937 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5938 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5939 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5940 
5941 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5942 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5943 
5944 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5945 
5946 	if (rdev->irq.dpm_thermal) {
5947 		DRM_DEBUG("dpm thermal\n");
5948 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5949 	}
5950 
5951 	if (rdev->num_crtc >= 2) {
5952 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5953 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5954 	}
5955 	if (rdev->num_crtc >= 4) {
5956 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5957 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5958 	}
5959 	if (rdev->num_crtc >= 6) {
5960 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5961 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5962 	}
5963 
5964 	if (rdev->num_crtc >= 2) {
5965 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
5966 		       GRPH_PFLIP_INT_MASK);
5967 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
5968 		       GRPH_PFLIP_INT_MASK);
5969 	}
5970 	if (rdev->num_crtc >= 4) {
5971 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
5972 		       GRPH_PFLIP_INT_MASK);
5973 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
5974 		       GRPH_PFLIP_INT_MASK);
5975 	}
5976 	if (rdev->num_crtc >= 6) {
5977 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
5978 		       GRPH_PFLIP_INT_MASK);
5979 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
5980 		       GRPH_PFLIP_INT_MASK);
5981 	}
5982 
5983 	if (!ASIC_IS_NODCE(rdev)) {
5984 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5985 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5986 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5987 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5988 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5989 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5990 	}
5991 
5992 	WREG32(CG_THERMAL_INT, thermal_int);
5993 
5994 	return 0;
5995 }
5996 
5997 static inline void si_irq_ack(struct radeon_device *rdev)
5998 {
5999 	u32 tmp;
6000 
6001 	if (ASIC_IS_NODCE(rdev))
6002 		return;
6003 
6004 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6005 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6006 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6007 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6008 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6009 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6010 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6011 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6012 	if (rdev->num_crtc >= 4) {
6013 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6014 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6015 	}
6016 	if (rdev->num_crtc >= 6) {
6017 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6018 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6019 	}
6020 
6021 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6022 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6023 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6024 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6025 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6026 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6027 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6028 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6029 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6030 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6031 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6032 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6033 
6034 	if (rdev->num_crtc >= 4) {
6035 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6036 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6037 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6038 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6039 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6040 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6041 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6042 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6043 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6044 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6045 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6046 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6047 	}
6048 
6049 	if (rdev->num_crtc >= 6) {
6050 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6051 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6052 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6053 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6054 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6055 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6056 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6057 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6058 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6059 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6060 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6061 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6062 	}
6063 
6064 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6065 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6066 		tmp |= DC_HPDx_INT_ACK;
6067 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6068 	}
6069 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6070 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6071 		tmp |= DC_HPDx_INT_ACK;
6072 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6073 	}
6074 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6075 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6076 		tmp |= DC_HPDx_INT_ACK;
6077 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6078 	}
6079 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6080 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6081 		tmp |= DC_HPDx_INT_ACK;
6082 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6083 	}
6084 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6085 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6086 		tmp |= DC_HPDx_INT_ACK;
6087 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6088 	}
6089 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6090 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6091 		tmp |= DC_HPDx_INT_ACK;
6092 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6093 	}
6094 }
6095 
6096 static void si_irq_disable(struct radeon_device *rdev)
6097 {
6098 	si_disable_interrupts(rdev);
6099 	/* Wait and acknowledge irq */
6100 	mdelay(1);
6101 	si_irq_ack(rdev);
6102 	si_disable_interrupt_state(rdev);
6103 }
6104 
6105 static void si_irq_suspend(struct radeon_device *rdev)
6106 {
6107 	si_irq_disable(rdev);
6108 	si_rlc_stop(rdev);
6109 }
6110 
6111 static void si_irq_fini(struct radeon_device *rdev)
6112 {
6113 	si_irq_suspend(rdev);
6114 	r600_ih_ring_fini(rdev);
6115 }
6116 
6117 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6118 {
6119 	u32 wptr, tmp;
6120 
6121 	if (rdev->wb.enabled)
6122 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6123 	else
6124 		wptr = RREG32(IH_RB_WPTR);
6125 
6126 	if (wptr & RB_OVERFLOW) {
6127 		wptr &= ~RB_OVERFLOW;
6128 		/* When a ring buffer overflow happen start parsing interrupt
6129 		 * from the last not overwritten vector (wptr + 16). Hopefully
6130 		 * this should allow us to catchup.
6131 		 */
6132 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6133 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6134 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6135 		tmp = RREG32(IH_RB_CNTL);
6136 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6137 		WREG32(IH_RB_CNTL, tmp);
6138 	}
6139 	return (wptr & rdev->ih.ptr_mask);
6140 }
6141 
6142 /*        SI IV Ring
6143  * Each IV ring entry is 128 bits:
6144  * [7:0]    - interrupt source id
6145  * [31:8]   - reserved
6146  * [59:32]  - interrupt source data
6147  * [63:60]  - reserved
6148  * [71:64]  - RINGID
6149  * [79:72]  - VMID
6150  * [127:80] - reserved
6151  */
6152 irqreturn_t si_irq_process(struct radeon_device *rdev)
6153 {
6154 	u32 wptr;
6155 	u32 rptr;
6156 	u32 src_id, src_data, ring_id;
6157 	u32 ring_index;
6158 	bool queue_hotplug = false;
6159 	bool queue_thermal = false;
6160 	u32 status, addr;
6161 
6162 	if (!rdev->ih.enabled || rdev->shutdown)
6163 		return IRQ_NONE;
6164 
6165 	wptr = si_get_ih_wptr(rdev);
6166 
6167 restart_ih:
6168 	/* is somebody else already processing irqs? */
6169 	if (atomic_xchg(&rdev->ih.lock, 1))
6170 		return IRQ_NONE;
6171 
6172 	rptr = rdev->ih.rptr;
6173 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6174 
6175 	/* Order reading of wptr vs. reading of IH ring data */
6176 	rmb();
6177 
6178 	/* display interrupts */
6179 	si_irq_ack(rdev);
6180 
6181 	while (rptr != wptr) {
6182 		/* wptr/rptr are in bytes! */
6183 		ring_index = rptr / 4;
6184 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6185 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6186 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6187 
6188 		switch (src_id) {
6189 		case 1: /* D1 vblank/vline */
6190 			switch (src_data) {
6191 			case 0: /* D1 vblank */
6192 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6193 					if (rdev->irq.crtc_vblank_int[0]) {
6194 						drm_handle_vblank(rdev->ddev, 0);
6195 						rdev->pm.vblank_sync = true;
6196 						wake_up(&rdev->irq.vblank_queue);
6197 					}
6198 					if (atomic_read(&rdev->irq.pflip[0]))
6199 						radeon_crtc_handle_vblank(rdev, 0);
6200 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6201 					DRM_DEBUG("IH: D1 vblank\n");
6202 				}
6203 				break;
6204 			case 1: /* D1 vline */
6205 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6206 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6207 					DRM_DEBUG("IH: D1 vline\n");
6208 				}
6209 				break;
6210 			default:
6211 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6212 				break;
6213 			}
6214 			break;
6215 		case 2: /* D2 vblank/vline */
6216 			switch (src_data) {
6217 			case 0: /* D2 vblank */
6218 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6219 					if (rdev->irq.crtc_vblank_int[1]) {
6220 						drm_handle_vblank(rdev->ddev, 1);
6221 						rdev->pm.vblank_sync = true;
6222 						wake_up(&rdev->irq.vblank_queue);
6223 					}
6224 					if (atomic_read(&rdev->irq.pflip[1]))
6225 						radeon_crtc_handle_vblank(rdev, 1);
6226 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6227 					DRM_DEBUG("IH: D2 vblank\n");
6228 				}
6229 				break;
6230 			case 1: /* D2 vline */
6231 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6232 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6233 					DRM_DEBUG("IH: D2 vline\n");
6234 				}
6235 				break;
6236 			default:
6237 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6238 				break;
6239 			}
6240 			break;
6241 		case 3: /* D3 vblank/vline */
6242 			switch (src_data) {
6243 			case 0: /* D3 vblank */
6244 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6245 					if (rdev->irq.crtc_vblank_int[2]) {
6246 						drm_handle_vblank(rdev->ddev, 2);
6247 						rdev->pm.vblank_sync = true;
6248 						wake_up(&rdev->irq.vblank_queue);
6249 					}
6250 					if (atomic_read(&rdev->irq.pflip[2]))
6251 						radeon_crtc_handle_vblank(rdev, 2);
6252 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6253 					DRM_DEBUG("IH: D3 vblank\n");
6254 				}
6255 				break;
6256 			case 1: /* D3 vline */
6257 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6258 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6259 					DRM_DEBUG("IH: D3 vline\n");
6260 				}
6261 				break;
6262 			default:
6263 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6264 				break;
6265 			}
6266 			break;
6267 		case 4: /* D4 vblank/vline */
6268 			switch (src_data) {
6269 			case 0: /* D4 vblank */
6270 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6271 					if (rdev->irq.crtc_vblank_int[3]) {
6272 						drm_handle_vblank(rdev->ddev, 3);
6273 						rdev->pm.vblank_sync = true;
6274 						wake_up(&rdev->irq.vblank_queue);
6275 					}
6276 					if (atomic_read(&rdev->irq.pflip[3]))
6277 						radeon_crtc_handle_vblank(rdev, 3);
6278 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6279 					DRM_DEBUG("IH: D4 vblank\n");
6280 				}
6281 				break;
6282 			case 1: /* D4 vline */
6283 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6284 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6285 					DRM_DEBUG("IH: D4 vline\n");
6286 				}
6287 				break;
6288 			default:
6289 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6290 				break;
6291 			}
6292 			break;
6293 		case 5: /* D5 vblank/vline */
6294 			switch (src_data) {
6295 			case 0: /* D5 vblank */
6296 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6297 					if (rdev->irq.crtc_vblank_int[4]) {
6298 						drm_handle_vblank(rdev->ddev, 4);
6299 						rdev->pm.vblank_sync = true;
6300 						wake_up(&rdev->irq.vblank_queue);
6301 					}
6302 					if (atomic_read(&rdev->irq.pflip[4]))
6303 						radeon_crtc_handle_vblank(rdev, 4);
6304 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6305 					DRM_DEBUG("IH: D5 vblank\n");
6306 				}
6307 				break;
6308 			case 1: /* D5 vline */
6309 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6310 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6311 					DRM_DEBUG("IH: D5 vline\n");
6312 				}
6313 				break;
6314 			default:
6315 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6316 				break;
6317 			}
6318 			break;
6319 		case 6: /* D6 vblank/vline */
6320 			switch (src_data) {
6321 			case 0: /* D6 vblank */
6322 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6323 					if (rdev->irq.crtc_vblank_int[5]) {
6324 						drm_handle_vblank(rdev->ddev, 5);
6325 						rdev->pm.vblank_sync = true;
6326 						wake_up(&rdev->irq.vblank_queue);
6327 					}
6328 					if (atomic_read(&rdev->irq.pflip[5]))
6329 						radeon_crtc_handle_vblank(rdev, 5);
6330 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6331 					DRM_DEBUG("IH: D6 vblank\n");
6332 				}
6333 				break;
6334 			case 1: /* D6 vline */
6335 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6336 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6337 					DRM_DEBUG("IH: D6 vline\n");
6338 				}
6339 				break;
6340 			default:
6341 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6342 				break;
6343 			}
6344 			break;
6345 		case 8: /* D1 page flip */
6346 		case 10: /* D2 page flip */
6347 		case 12: /* D3 page flip */
6348 		case 14: /* D4 page flip */
6349 		case 16: /* D5 page flip */
6350 		case 18: /* D6 page flip */
6351 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6352 			if (radeon_use_pflipirq > 0)
6353 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6354 			break;
6355 		case 42: /* HPD hotplug */
6356 			switch (src_data) {
6357 			case 0:
6358 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6359 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6360 					queue_hotplug = true;
6361 					DRM_DEBUG("IH: HPD1\n");
6362 				}
6363 				break;
6364 			case 1:
6365 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6366 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6367 					queue_hotplug = true;
6368 					DRM_DEBUG("IH: HPD2\n");
6369 				}
6370 				break;
6371 			case 2:
6372 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6373 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6374 					queue_hotplug = true;
6375 					DRM_DEBUG("IH: HPD3\n");
6376 				}
6377 				break;
6378 			case 3:
6379 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6380 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6381 					queue_hotplug = true;
6382 					DRM_DEBUG("IH: HPD4\n");
6383 				}
6384 				break;
6385 			case 4:
6386 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6387 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6388 					queue_hotplug = true;
6389 					DRM_DEBUG("IH: HPD5\n");
6390 				}
6391 				break;
6392 			case 5:
6393 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6394 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6395 					queue_hotplug = true;
6396 					DRM_DEBUG("IH: HPD6\n");
6397 				}
6398 				break;
6399 			default:
6400 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6401 				break;
6402 			}
6403 			break;
6404 		case 124: /* UVD */
6405 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6406 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6407 			break;
6408 		case 146:
6409 		case 147:
6410 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6411 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6412 			/* reset addr and status */
6413 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6414 			if (addr == 0x0 && status == 0x0)
6415 				break;
6416 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6417 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6418 				addr);
6419 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6420 				status);
6421 			si_vm_decode_fault(rdev, status, addr);
6422 			break;
6423 		case 176: /* RINGID0 CP_INT */
6424 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6425 			break;
6426 		case 177: /* RINGID1 CP_INT */
6427 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6428 			break;
6429 		case 178: /* RINGID2 CP_INT */
6430 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6431 			break;
6432 		case 181: /* CP EOP event */
6433 			DRM_DEBUG("IH: CP EOP\n");
6434 			switch (ring_id) {
6435 			case 0:
6436 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6437 				break;
6438 			case 1:
6439 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6440 				break;
6441 			case 2:
6442 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6443 				break;
6444 			}
6445 			break;
6446 		case 224: /* DMA trap event */
6447 			DRM_DEBUG("IH: DMA trap\n");
6448 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6449 			break;
6450 		case 230: /* thermal low to high */
6451 			DRM_DEBUG("IH: thermal low to high\n");
6452 			rdev->pm.dpm.thermal.high_to_low = false;
6453 			queue_thermal = true;
6454 			break;
6455 		case 231: /* thermal high to low */
6456 			DRM_DEBUG("IH: thermal high to low\n");
6457 			rdev->pm.dpm.thermal.high_to_low = true;
6458 			queue_thermal = true;
6459 			break;
6460 		case 233: /* GUI IDLE */
6461 			DRM_DEBUG("IH: GUI idle\n");
6462 			break;
6463 		case 244: /* DMA trap event */
6464 			DRM_DEBUG("IH: DMA1 trap\n");
6465 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6466 			break;
6467 		default:
6468 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6469 			break;
6470 		}
6471 
6472 		/* wptr/rptr are in bytes! */
6473 		rptr += 16;
6474 		rptr &= rdev->ih.ptr_mask;
6475 		WREG32(IH_RB_RPTR, rptr);
6476 	}
6477 	if (queue_hotplug)
6478 		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
6479 	if (queue_thermal && rdev->pm.dpm_enabled)
6480 		taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
6481 	rdev->ih.rptr = rptr;
6482 	atomic_set(&rdev->ih.lock, 0);
6483 
6484 	/* make sure wptr hasn't changed while processing */
6485 	wptr = si_get_ih_wptr(rdev);
6486 	if (wptr != rptr)
6487 		goto restart_ih;
6488 
6489 	return IRQ_HANDLED;
6490 }
6491 
6492 /*
6493  * startup/shutdown callbacks
6494  */
6495 static int si_startup(struct radeon_device *rdev)
6496 {
6497 	struct radeon_ring *ring;
6498 	int r;
6499 
6500 	/* enable pcie gen2/3 link */
6501 	si_pcie_gen3_enable(rdev);
6502 	/* enable aspm */
6503 	si_program_aspm(rdev);
6504 
6505 	/* scratch needs to be initialized before MC */
6506 	r = r600_vram_scratch_init(rdev);
6507 	if (r)
6508 		return r;
6509 
6510 	si_mc_program(rdev);
6511 
6512 	if (!rdev->pm.dpm_enabled) {
6513 		r = si_mc_load_microcode(rdev);
6514 		if (r) {
6515 			DRM_ERROR("Failed to load MC firmware!\n");
6516 			return r;
6517 		}
6518 	}
6519 
6520 	r = si_pcie_gart_enable(rdev);
6521 	if (r)
6522 		return r;
6523 	si_gpu_init(rdev);
6524 
6525 	/* allocate rlc buffers */
6526 	if (rdev->family == CHIP_VERDE) {
6527 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6528 		rdev->rlc.reg_list_size =
6529 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6530 	}
6531 	rdev->rlc.cs_data = si_cs_data;
6532 	r = sumo_rlc_init(rdev);
6533 	if (r) {
6534 		DRM_ERROR("Failed to init rlc BOs!\n");
6535 		return r;
6536 	}
6537 
6538 	/* allocate wb buffer */
6539 	r = radeon_wb_init(rdev);
6540 	if (r)
6541 		return r;
6542 
6543 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6544 	if (r) {
6545 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6546 		return r;
6547 	}
6548 
6549 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6550 	if (r) {
6551 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6552 		return r;
6553 	}
6554 
6555 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6556 	if (r) {
6557 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6558 		return r;
6559 	}
6560 
6561 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6562 	if (r) {
6563 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6564 		return r;
6565 	}
6566 
6567 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6568 	if (r) {
6569 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6570 		return r;
6571 	}
6572 
6573 	if (rdev->has_uvd) {
6574 		r = uvd_v2_2_resume(rdev);
6575 		if (!r) {
6576 			r = radeon_fence_driver_start_ring(rdev,
6577 							   R600_RING_TYPE_UVD_INDEX);
6578 			if (r)
6579 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6580 		}
6581 		if (r)
6582 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6583 	}
6584 
6585 	/* Enable IRQ */
6586 	if (!rdev->irq.installed) {
6587 		r = radeon_irq_kms_init(rdev);
6588 		if (r)
6589 			return r;
6590 	}
6591 
6592 	r = si_irq_init(rdev);
6593 	if (r) {
6594 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6595 		radeon_irq_kms_fini(rdev);
6596 		return r;
6597 	}
6598 	si_irq_set(rdev);
6599 
6600 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6601 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6602 			     RADEON_CP_PACKET2);
6603 	if (r)
6604 		return r;
6605 
6606 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6607 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6608 			     RADEON_CP_PACKET2);
6609 	if (r)
6610 		return r;
6611 
6612 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6613 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6614 			     RADEON_CP_PACKET2);
6615 	if (r)
6616 		return r;
6617 
6618 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6619 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6620 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6621 	if (r)
6622 		return r;
6623 
6624 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6625 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6626 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6627 	if (r)
6628 		return r;
6629 
6630 	r = si_cp_load_microcode(rdev);
6631 	if (r)
6632 		return r;
6633 	r = si_cp_resume(rdev);
6634 	if (r)
6635 		return r;
6636 
6637 	r = cayman_dma_resume(rdev);
6638 	if (r)
6639 		return r;
6640 
6641 	if (rdev->has_uvd) {
6642 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6643 		if (ring->ring_size) {
6644 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6645 					     RADEON_CP_PACKET2);
6646 			if (!r)
6647 				r = uvd_v1_0_init(rdev);
6648 			if (r)
6649 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6650 		}
6651 	}
6652 
6653 	r = radeon_ib_pool_init(rdev);
6654 	if (r) {
6655 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6656 		return r;
6657 	}
6658 
6659 	r = radeon_vm_manager_init(rdev);
6660 	if (r) {
6661 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6662 		return r;
6663 	}
6664 
6665 	r = dce6_audio_init(rdev);
6666 	if (r)
6667 		return r;
6668 
6669 	return 0;
6670 }
6671 
6672 int si_resume(struct radeon_device *rdev)
6673 {
6674 	int r;
6675 
6676 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6677 	 * posting will perform necessary task to bring back GPU into good
6678 	 * shape.
6679 	 */
6680 	/* post card */
6681 	atom_asic_init(rdev->mode_info.atom_context);
6682 
6683 	/* init golden registers */
6684 	si_init_golden_registers(rdev);
6685 
6686 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6687 		radeon_pm_resume(rdev);
6688 
6689 	rdev->accel_working = true;
6690 	r = si_startup(rdev);
6691 	if (r) {
6692 		DRM_ERROR("si startup failed on resume\n");
6693 		rdev->accel_working = false;
6694 		return r;
6695 	}
6696 
6697 	return r;
6698 
6699 }
6700 
6701 int si_suspend(struct radeon_device *rdev)
6702 {
6703 	radeon_pm_suspend(rdev);
6704 	dce6_audio_fini(rdev);
6705 	radeon_vm_manager_fini(rdev);
6706 	si_cp_enable(rdev, false);
6707 	cayman_dma_stop(rdev);
6708 	if (rdev->has_uvd) {
6709 		uvd_v1_0_fini(rdev);
6710 		radeon_uvd_suspend(rdev);
6711 	}
6712 	si_fini_pg(rdev);
6713 	si_fini_cg(rdev);
6714 	si_irq_suspend(rdev);
6715 	radeon_wb_disable(rdev);
6716 	si_pcie_gart_disable(rdev);
6717 	return 0;
6718 }
6719 
6720 /* Plan is to move initialization in that function and use
6721  * helper function so that radeon_device_init pretty much
6722  * do nothing more than calling asic specific function. This
6723  * should also allow to remove a bunch of callback function
6724  * like vram_info.
6725  */
6726 int si_init(struct radeon_device *rdev)
6727 {
6728 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6729 	int r;
6730 
6731 	/* Read BIOS */
6732 	if (!radeon_get_bios(rdev)) {
6733 		if (ASIC_IS_AVIVO(rdev))
6734 			return -EINVAL;
6735 	}
6736 	/* Must be an ATOMBIOS */
6737 	if (!rdev->is_atom_bios) {
6738 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6739 		return -EINVAL;
6740 	}
6741 	r = radeon_atombios_init(rdev);
6742 	if (r)
6743 		return r;
6744 
6745 	/* Post card if necessary */
6746 	if (!radeon_card_posted(rdev)) {
6747 		if (!rdev->bios) {
6748 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6749 			return -EINVAL;
6750 		}
6751 		DRM_INFO("GPU not posted. posting now...\n");
6752 		atom_asic_init(rdev->mode_info.atom_context);
6753 	}
6754 	/* init golden registers */
6755 	si_init_golden_registers(rdev);
6756 	/* Initialize scratch registers */
6757 	si_scratch_init(rdev);
6758 	/* Initialize surface registers */
6759 	radeon_surface_init(rdev);
6760 	/* Initialize clocks */
6761 	radeon_get_clock_info(rdev->ddev);
6762 
6763 	/* Fence driver */
6764 	r = radeon_fence_driver_init(rdev);
6765 	if (r)
6766 		return r;
6767 
6768 	/* initialize memory controller */
6769 	r = si_mc_init(rdev);
6770 	if (r)
6771 		return r;
6772 	/* Memory manager */
6773 	r = radeon_bo_init(rdev);
6774 	if (r)
6775 		return r;
6776 
6777 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6778 	    !rdev->rlc_fw || !rdev->mc_fw) {
6779 		r = si_init_microcode(rdev);
6780 		if (r) {
6781 			DRM_ERROR("Failed to load firmware!\n");
6782 			return r;
6783 		}
6784 	}
6785 
6786 	/* Initialize power management */
6787 	radeon_pm_init(rdev);
6788 
6789 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6790 	ring->ring_obj = NULL;
6791 	r600_ring_init(rdev, ring, 1024 * 1024);
6792 
6793 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6794 	ring->ring_obj = NULL;
6795 	r600_ring_init(rdev, ring, 1024 * 1024);
6796 
6797 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6798 	ring->ring_obj = NULL;
6799 	r600_ring_init(rdev, ring, 1024 * 1024);
6800 
6801 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6802 	ring->ring_obj = NULL;
6803 	r600_ring_init(rdev, ring, 64 * 1024);
6804 
6805 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6806 	ring->ring_obj = NULL;
6807 	r600_ring_init(rdev, ring, 64 * 1024);
6808 
6809 	if (rdev->has_uvd) {
6810 		r = radeon_uvd_init(rdev);
6811 		if (!r) {
6812 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6813 			ring->ring_obj = NULL;
6814 			r600_ring_init(rdev, ring, 4096);
6815 		}
6816 	}
6817 
6818 	rdev->ih.ring_obj = NULL;
6819 	r600_ih_ring_init(rdev, 64 * 1024);
6820 
6821 	r = r600_pcie_gart_init(rdev);
6822 	if (r)
6823 		return r;
6824 
6825 	rdev->accel_working = true;
6826 	r = si_startup(rdev);
6827 	if (r) {
6828 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6829 		si_cp_fini(rdev);
6830 		cayman_dma_fini(rdev);
6831 		si_irq_fini(rdev);
6832 		sumo_rlc_fini(rdev);
6833 		radeon_wb_fini(rdev);
6834 		radeon_ib_pool_fini(rdev);
6835 		radeon_vm_manager_fini(rdev);
6836 		radeon_irq_kms_fini(rdev);
6837 		si_pcie_gart_fini(rdev);
6838 		rdev->accel_working = false;
6839 	}
6840 
6841 	/* Don't start up if the MC ucode is missing.
6842 	 * The default clocks and voltages before the MC ucode
6843 	 * is loaded are not suffient for advanced operations.
6844 	 */
6845 	if (!rdev->mc_fw) {
6846 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6847 		return -EINVAL;
6848 	}
6849 
6850 	return 0;
6851 }
6852 
6853 void si_fini(struct radeon_device *rdev)
6854 {
6855 	radeon_pm_fini(rdev);
6856 	si_cp_fini(rdev);
6857 	cayman_dma_fini(rdev);
6858 	si_fini_pg(rdev);
6859 	si_fini_cg(rdev);
6860 	si_irq_fini(rdev);
6861 	sumo_rlc_fini(rdev);
6862 	radeon_wb_fini(rdev);
6863 	radeon_vm_manager_fini(rdev);
6864 	radeon_ib_pool_fini(rdev);
6865 	radeon_irq_kms_fini(rdev);
6866 	if (rdev->has_uvd) {
6867 		uvd_v1_0_fini(rdev);
6868 		radeon_uvd_fini(rdev);
6869 	}
6870 	si_pcie_gart_fini(rdev);
6871 	r600_vram_scratch_fini(rdev);
6872 	radeon_gem_fini(rdev);
6873 	radeon_fence_driver_fini(rdev);
6874 	radeon_bo_fini(rdev);
6875 	radeon_atombios_fini(rdev);
6876 	si_fini_microcode(rdev);
6877 	kfree(rdev->bios);
6878 	rdev->bios = NULL;
6879 }
6880 
6881 /**
6882  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6883  *
6884  * @rdev: radeon_device pointer
6885  *
6886  * Fetches a GPU clock counter snapshot (SI).
6887  * Returns the 64 bit clock counter snapshot.
6888  */
6889 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6890 {
6891 	uint64_t clock;
6892 
6893 	spin_lock(&rdev->gpu_clock_mutex);
6894 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6895 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6896 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6897 	spin_unlock(&rdev->gpu_clock_mutex);
6898 	return clock;
6899 }
6900 
6901 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6902 {
6903 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6904 	int r;
6905 
6906 	/* bypass vclk and dclk with bclk */
6907 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6908 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6909 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6910 
6911 	/* put PLL in bypass mode */
6912 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6913 
6914 	if (!vclk || !dclk) {
6915 		/* keep the Bypass mode, put PLL to sleep */
6916 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6917 		return 0;
6918 	}
6919 
6920 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6921 					  16384, 0x03FFFFFF, 0, 128, 5,
6922 					  &fb_div, &vclk_div, &dclk_div);
6923 	if (r)
6924 		return r;
6925 
6926 	/* set RESET_ANTI_MUX to 0 */
6927 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6928 
6929 	/* set VCO_MODE to 1 */
6930 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6931 
6932 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6933 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6934 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6935 
6936 	/* deassert UPLL_RESET */
6937 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6938 
6939 	mdelay(1);
6940 
6941 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6942 	if (r)
6943 		return r;
6944 
6945 	/* assert UPLL_RESET again */
6946 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6947 
6948 	/* disable spread spectrum. */
6949 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6950 
6951 	/* set feedback divider */
6952 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6953 
6954 	/* set ref divider to 0 */
6955 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6956 
6957 	if (fb_div < 307200)
6958 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6959 	else
6960 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6961 
6962 	/* set PDIV_A and PDIV_B */
6963 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6964 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6965 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6966 
6967 	/* give the PLL some time to settle */
6968 	mdelay(15);
6969 
6970 	/* deassert PLL_RESET */
6971 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6972 
6973 	mdelay(15);
6974 
6975 	/* switch from bypass mode to normal mode */
6976 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6977 
6978 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6979 	if (r)
6980 		return r;
6981 
6982 	/* switch VCLK and DCLK selection */
6983 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6984 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6985 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6986 
6987 	mdelay(100);
6988 
6989 	return 0;
6990 }
6991 
6992 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6993 {
6994 	struct pci_dev *root = rdev->pdev->bus->self;
6995 	int bridge_pos, gpu_pos;
6996 	u32 speed_cntl, mask, current_data_rate;
6997 	int ret, i;
6998 	u16 tmp16;
6999 
7000 	if (radeon_pcie_gen2 == 0)
7001 		return;
7002 
7003 	if (rdev->flags & RADEON_IS_IGP)
7004 		return;
7005 
7006 	if (!(rdev->flags & RADEON_IS_PCIE))
7007 		return;
7008 
7009 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7010 	if (ret != 0)
7011 		return;
7012 
7013 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7014 		return;
7015 
7016 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7017 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7018 		LC_CURRENT_DATA_RATE_SHIFT;
7019 	if (mask & DRM_PCIE_SPEED_80) {
7020 		if (current_data_rate == 2) {
7021 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7022 			return;
7023 		}
7024 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7025 	} else if (mask & DRM_PCIE_SPEED_50) {
7026 		if (current_data_rate == 1) {
7027 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7028 			return;
7029 		}
7030 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7031 	}
7032 
7033 	bridge_pos = pci_get_pciecap_ptr(root->dev);
7034 	if (!bridge_pos)
7035 		return;
7036 
7037 	gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev);
7038 	if (!gpu_pos)
7039 		return;
7040 
7041 	if (mask & DRM_PCIE_SPEED_80) {
7042 		/* re-try equalization if gen3 is not already enabled */
7043 		if (current_data_rate != 2) {
7044 			u16 bridge_cfg, gpu_cfg;
7045 			u16 bridge_cfg2, gpu_cfg2;
7046 			u32 max_lw, current_lw, tmp;
7047 
7048 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7049 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7050 
7051 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7052 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7053 
7054 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7055 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7056 
7057 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7058 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7059 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7060 
7061 			if (current_lw < max_lw) {
7062 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7063 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7064 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7065 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7066 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7067 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7068 				}
7069 			}
7070 
7071 			for (i = 0; i < 10; i++) {
7072 				/* check status */
7073 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7074 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7075 					break;
7076 
7077 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7078 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7079 
7080 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7081 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7082 
7083 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7084 				tmp |= LC_SET_QUIESCE;
7085 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7086 
7087 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7088 				tmp |= LC_REDO_EQ;
7089 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7090 
7091 				mdelay(100);
7092 
7093 				/* linkctl */
7094 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7095 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7096 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7097 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7098 
7099 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7100 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7101 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7102 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7103 
7104 				/* linkctl2 */
7105 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7106 				tmp16 &= ~((1 << 4) | (7 << 9));
7107 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7108 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7109 
7110 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7111 				tmp16 &= ~((1 << 4) | (7 << 9));
7112 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7113 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7114 
7115 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7116 				tmp &= ~LC_SET_QUIESCE;
7117 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7118 			}
7119 		}
7120 	}
7121 
7122 	/* set the link speed */
7123 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7124 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7125 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7126 
7127 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7128 	tmp16 &= ~0xf;
7129 	if (mask & DRM_PCIE_SPEED_80)
7130 		tmp16 |= 3; /* gen3 */
7131 	else if (mask & DRM_PCIE_SPEED_50)
7132 		tmp16 |= 2; /* gen2 */
7133 	else
7134 		tmp16 |= 1; /* gen1 */
7135 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7136 
7137 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7138 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7139 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7140 
7141 	for (i = 0; i < rdev->usec_timeout; i++) {
7142 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7143 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7144 			break;
7145 		udelay(1);
7146 	}
7147 }
7148 
7149 static void si_program_aspm(struct radeon_device *rdev)
7150 {
7151 	u32 data, orig;
7152 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7153 	bool disable_clkreq = false;
7154 
7155 	if (radeon_aspm == 0)
7156 		return;
7157 
7158 	if (!(rdev->flags & RADEON_IS_PCIE))
7159 		return;
7160 
7161 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7162 	data &= ~LC_XMIT_N_FTS_MASK;
7163 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7164 	if (orig != data)
7165 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7166 
7167 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7168 	data |= LC_GO_TO_RECOVERY;
7169 	if (orig != data)
7170 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7171 
7172 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7173 	data |= P_IGNORE_EDB_ERR;
7174 	if (orig != data)
7175 		WREG32_PCIE(PCIE_P_CNTL, data);
7176 
7177 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7178 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7179 	data |= LC_PMI_TO_L1_DIS;
7180 	if (!disable_l0s)
7181 		data |= LC_L0S_INACTIVITY(7);
7182 
7183 	if (!disable_l1) {
7184 		data |= LC_L1_INACTIVITY(7);
7185 		data &= ~LC_PMI_TO_L1_DIS;
7186 		if (orig != data)
7187 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7188 
7189 		if (!disable_plloff_in_l1) {
7190 			bool clk_req_support;
7191 
7192 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7193 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7194 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7195 			if (orig != data)
7196 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7197 
7198 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7199 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7200 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7201 			if (orig != data)
7202 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7203 
7204 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7205 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7206 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7207 			if (orig != data)
7208 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7209 
7210 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7211 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7212 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7213 			if (orig != data)
7214 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7215 
7216 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7217 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7218 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7219 				if (orig != data)
7220 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7221 
7222 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7223 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7224 				if (orig != data)
7225 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7226 
7227 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7228 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7229 				if (orig != data)
7230 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7231 
7232 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7233 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7234 				if (orig != data)
7235 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7236 
7237 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7238 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7239 				if (orig != data)
7240 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7241 
7242 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7243 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7244 				if (orig != data)
7245 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7246 
7247 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7248 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7249 				if (orig != data)
7250 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7251 
7252 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7253 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7254 				if (orig != data)
7255 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7256 			}
7257 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7258 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7259 			data |= LC_DYN_LANES_PWR_STATE(3);
7260 			if (orig != data)
7261 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7262 
7263 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7264 			data &= ~LS2_EXIT_TIME_MASK;
7265 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7266 				data |= LS2_EXIT_TIME(5);
7267 			if (orig != data)
7268 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7269 
7270 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7271 			data &= ~LS2_EXIT_TIME_MASK;
7272 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7273 				data |= LS2_EXIT_TIME(5);
7274 			if (orig != data)
7275 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7276 
7277 			if (!disable_clkreq) {
7278 #ifdef zMN_TODO
7279 				struct pci_dev *root = rdev->pdev->bus->self;
7280 				u32 lnkcap;
7281 
7282 				clk_req_support = false;
7283 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7284 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7285 					clk_req_support = true;
7286 #else
7287 				clk_req_support = false;
7288 #endif
7289 			} else {
7290 				clk_req_support = false;
7291 			}
7292 
7293 			if (clk_req_support) {
7294 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7295 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7296 				if (orig != data)
7297 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7298 
7299 				orig = data = RREG32(THM_CLK_CNTL);
7300 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7301 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7302 				if (orig != data)
7303 					WREG32(THM_CLK_CNTL, data);
7304 
7305 				orig = data = RREG32(MISC_CLK_CNTL);
7306 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7307 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7308 				if (orig != data)
7309 					WREG32(MISC_CLK_CNTL, data);
7310 
7311 				orig = data = RREG32(CG_CLKPIN_CNTL);
7312 				data &= ~BCLK_AS_XCLK;
7313 				if (orig != data)
7314 					WREG32(CG_CLKPIN_CNTL, data);
7315 
7316 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7317 				data &= ~FORCE_BIF_REFCLK_EN;
7318 				if (orig != data)
7319 					WREG32(CG_CLKPIN_CNTL_2, data);
7320 
7321 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7322 				data &= ~MPLL_CLKOUT_SEL_MASK;
7323 				data |= MPLL_CLKOUT_SEL(4);
7324 				if (orig != data)
7325 					WREG32(MPLL_BYPASSCLK_SEL, data);
7326 
7327 				orig = data = RREG32(SPLL_CNTL_MODE);
7328 				data &= ~SPLL_REFCLK_SEL_MASK;
7329 				if (orig != data)
7330 					WREG32(SPLL_CNTL_MODE, data);
7331 			}
7332 		}
7333 	} else {
7334 		if (orig != data)
7335 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7336 	}
7337 
7338 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7339 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7340 	if (orig != data)
7341 		WREG32_PCIE(PCIE_CNTL2, data);
7342 
7343 	if (!disable_l0s) {
7344 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7345 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7346 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7347 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7348 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7349 				data &= ~LC_L0S_INACTIVITY_MASK;
7350 				if (orig != data)
7351 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7352 			}
7353 		}
7354 	}
7355 }
7356