xref: /dragonfly/sys/dev/drm/radeon/si.c (revision 73610d44)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include <uapi_drm/radeon_drm.h>
30 #include "sid.h"
31 #include "atom.h"
32 #include "si_blit_shaders.h"
33 #include "clearstate_si.h"
34 #include "radeon_ucode.h"
35 
36 
37 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
38 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
50 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
51 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
52 MODULE_FIRMWARE("radeon/VERDE_me.bin");
53 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
54 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
56 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
57 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
58 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
59 MODULE_FIRMWARE("radeon/OLAND_me.bin");
60 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
61 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
62 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
63 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
64 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
68 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
69 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
70 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
71 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
72 
73 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
74 static void si_pcie_gen3_enable(struct radeon_device *rdev);
75 static void si_program_aspm(struct radeon_device *rdev);
76 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
77 					 bool enable);
78 static void si_init_pg(struct radeon_device *rdev);
79 static void si_init_cg(struct radeon_device *rdev);
80 static void si_fini_pg(struct radeon_device *rdev);
81 static void si_fini_cg(struct radeon_device *rdev);
82 static void si_rlc_stop(struct radeon_device *rdev);
83 
84 static const u32 verde_rlc_save_restore_register_list[] =
85 {
86 	(0x8000 << 16) | (0x98f4 >> 2),
87 	0x00000000,
88 	(0x8040 << 16) | (0x98f4 >> 2),
89 	0x00000000,
90 	(0x8000 << 16) | (0xe80 >> 2),
91 	0x00000000,
92 	(0x8040 << 16) | (0xe80 >> 2),
93 	0x00000000,
94 	(0x8000 << 16) | (0x89bc >> 2),
95 	0x00000000,
96 	(0x8040 << 16) | (0x89bc >> 2),
97 	0x00000000,
98 	(0x8000 << 16) | (0x8c1c >> 2),
99 	0x00000000,
100 	(0x8040 << 16) | (0x8c1c >> 2),
101 	0x00000000,
102 	(0x9c00 << 16) | (0x98f0 >> 2),
103 	0x00000000,
104 	(0x9c00 << 16) | (0xe7c >> 2),
105 	0x00000000,
106 	(0x8000 << 16) | (0x9148 >> 2),
107 	0x00000000,
108 	(0x8040 << 16) | (0x9148 >> 2),
109 	0x00000000,
110 	(0x9c00 << 16) | (0x9150 >> 2),
111 	0x00000000,
112 	(0x9c00 << 16) | (0x897c >> 2),
113 	0x00000000,
114 	(0x9c00 << 16) | (0x8d8c >> 2),
115 	0x00000000,
116 	(0x9c00 << 16) | (0xac54 >> 2),
117 	0X00000000,
118 	0x3,
119 	(0x9c00 << 16) | (0x98f8 >> 2),
120 	0x00000000,
121 	(0x9c00 << 16) | (0x9910 >> 2),
122 	0x00000000,
123 	(0x9c00 << 16) | (0x9914 >> 2),
124 	0x00000000,
125 	(0x9c00 << 16) | (0x9918 >> 2),
126 	0x00000000,
127 	(0x9c00 << 16) | (0x991c >> 2),
128 	0x00000000,
129 	(0x9c00 << 16) | (0x9920 >> 2),
130 	0x00000000,
131 	(0x9c00 << 16) | (0x9924 >> 2),
132 	0x00000000,
133 	(0x9c00 << 16) | (0x9928 >> 2),
134 	0x00000000,
135 	(0x9c00 << 16) | (0x992c >> 2),
136 	0x00000000,
137 	(0x9c00 << 16) | (0x9930 >> 2),
138 	0x00000000,
139 	(0x9c00 << 16) | (0x9934 >> 2),
140 	0x00000000,
141 	(0x9c00 << 16) | (0x9938 >> 2),
142 	0x00000000,
143 	(0x9c00 << 16) | (0x993c >> 2),
144 	0x00000000,
145 	(0x9c00 << 16) | (0x9940 >> 2),
146 	0x00000000,
147 	(0x9c00 << 16) | (0x9944 >> 2),
148 	0x00000000,
149 	(0x9c00 << 16) | (0x9948 >> 2),
150 	0x00000000,
151 	(0x9c00 << 16) | (0x994c >> 2),
152 	0x00000000,
153 	(0x9c00 << 16) | (0x9950 >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0x9954 >> 2),
156 	0x00000000,
157 	(0x9c00 << 16) | (0x9958 >> 2),
158 	0x00000000,
159 	(0x9c00 << 16) | (0x995c >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x9960 >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x9964 >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x9968 >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0x996c >> 2),
168 	0x00000000,
169 	(0x9c00 << 16) | (0x9970 >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x9974 >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x9978 >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x997c >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x9980 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9984 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9988 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x998c >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x8c00 >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x8c14 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x8c04 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x8c08 >> 2),
192 	0x00000000,
193 	(0x8000 << 16) | (0x9b7c >> 2),
194 	0x00000000,
195 	(0x8040 << 16) | (0x9b7c >> 2),
196 	0x00000000,
197 	(0x8000 << 16) | (0xe84 >> 2),
198 	0x00000000,
199 	(0x8040 << 16) | (0xe84 >> 2),
200 	0x00000000,
201 	(0x8000 << 16) | (0x89c0 >> 2),
202 	0x00000000,
203 	(0x8040 << 16) | (0x89c0 >> 2),
204 	0x00000000,
205 	(0x8000 << 16) | (0x914c >> 2),
206 	0x00000000,
207 	(0x8040 << 16) | (0x914c >> 2),
208 	0x00000000,
209 	(0x8000 << 16) | (0x8c20 >> 2),
210 	0x00000000,
211 	(0x8040 << 16) | (0x8c20 >> 2),
212 	0x00000000,
213 	(0x8000 << 16) | (0x9354 >> 2),
214 	0x00000000,
215 	(0x8040 << 16) | (0x9354 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9060 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9364 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9100 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x913c >> 2),
224 	0x00000000,
225 	(0x8000 << 16) | (0x90e0 >> 2),
226 	0x00000000,
227 	(0x8000 << 16) | (0x90e4 >> 2),
228 	0x00000000,
229 	(0x8000 << 16) | (0x90e8 >> 2),
230 	0x00000000,
231 	(0x8040 << 16) | (0x90e0 >> 2),
232 	0x00000000,
233 	(0x8040 << 16) | (0x90e4 >> 2),
234 	0x00000000,
235 	(0x8040 << 16) | (0x90e8 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x8bcc >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x8b24 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x88c4 >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x8e50 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8c0c >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x8e58 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x8e5c >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0x9508 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0x950c >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0x9494 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0xac0c >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0xac10 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0xac14 >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0xae00 >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0xac08 >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0x88d4 >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0x88c8 >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0x88cc >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x89b0 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x8b10 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x8a14 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x9830 >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x9834 >> 2),
282 	0x00000000,
283 	(0x9c00 << 16) | (0x9838 >> 2),
284 	0x00000000,
285 	(0x9c00 << 16) | (0x9a10 >> 2),
286 	0x00000000,
287 	(0x8000 << 16) | (0x9870 >> 2),
288 	0x00000000,
289 	(0x8000 << 16) | (0x9874 >> 2),
290 	0x00000000,
291 	(0x8001 << 16) | (0x9870 >> 2),
292 	0x00000000,
293 	(0x8001 << 16) | (0x9874 >> 2),
294 	0x00000000,
295 	(0x8040 << 16) | (0x9870 >> 2),
296 	0x00000000,
297 	(0x8040 << 16) | (0x9874 >> 2),
298 	0x00000000,
299 	(0x8041 << 16) | (0x9870 >> 2),
300 	0x00000000,
301 	(0x8041 << 16) | (0x9874 >> 2),
302 	0x00000000,
303 	0x00000000
304 };
305 
306 static const u32 tahiti_golden_rlc_registers[] =
307 {
308 	0xc424, 0xffffffff, 0x00601005,
309 	0xc47c, 0xffffffff, 0x10104040,
310 	0xc488, 0xffffffff, 0x0100000a,
311 	0xc314, 0xffffffff, 0x00000800,
312 	0xc30c, 0xffffffff, 0x800000f4,
313 	0xf4a8, 0xffffffff, 0x00000000
314 };
315 
316 static const u32 tahiti_golden_registers[] =
317 {
318 	0x9a10, 0x00010000, 0x00018208,
319 	0x9830, 0xffffffff, 0x00000000,
320 	0x9834, 0xf00fffff, 0x00000400,
321 	0x9838, 0x0002021c, 0x00020200,
322 	0xc78, 0x00000080, 0x00000000,
323 	0xd030, 0x000300c0, 0x00800040,
324 	0xd830, 0x000300c0, 0x00800040,
325 	0x5bb0, 0x000000f0, 0x00000070,
326 	0x5bc0, 0x00200000, 0x50100000,
327 	0x7030, 0x31000311, 0x00000011,
328 	0x277c, 0x00000003, 0x000007ff,
329 	0x240c, 0x000007ff, 0x00000000,
330 	0x8a14, 0xf000001f, 0x00000007,
331 	0x8b24, 0xffffffff, 0x00ffffff,
332 	0x8b10, 0x0000ff0f, 0x00000000,
333 	0x28a4c, 0x07ffffff, 0x4e000000,
334 	0x28350, 0x3f3f3fff, 0x2a00126a,
335 	0x30, 0x000000ff, 0x0040,
336 	0x34, 0x00000040, 0x00004040,
337 	0x9100, 0x07ffffff, 0x03000000,
338 	0x8e88, 0x01ff1f3f, 0x00000000,
339 	0x8e84, 0x01ff1f3f, 0x00000000,
340 	0x9060, 0x0000007f, 0x00000020,
341 	0x9508, 0x00010000, 0x00010000,
342 	0xac14, 0x00000200, 0x000002fb,
343 	0xac10, 0xffffffff, 0x0000543b,
344 	0xac0c, 0xffffffff, 0xa9210876,
345 	0x88d0, 0xffffffff, 0x000fff40,
346 	0x88d4, 0x0000001f, 0x00000010,
347 	0x1410, 0x20000000, 0x20fffed8,
348 	0x15c0, 0x000c0fc0, 0x000c0400
349 };
350 
351 static const u32 tahiti_golden_registers2[] =
352 {
353 	0xc64, 0x00000001, 0x00000001
354 };
355 
356 static const u32 pitcairn_golden_rlc_registers[] =
357 {
358 	0xc424, 0xffffffff, 0x00601004,
359 	0xc47c, 0xffffffff, 0x10102020,
360 	0xc488, 0xffffffff, 0x01000020,
361 	0xc314, 0xffffffff, 0x00000800,
362 	0xc30c, 0xffffffff, 0x800000a4
363 };
364 
365 static const u32 pitcairn_golden_registers[] =
366 {
367 	0x9a10, 0x00010000, 0x00018208,
368 	0x9830, 0xffffffff, 0x00000000,
369 	0x9834, 0xf00fffff, 0x00000400,
370 	0x9838, 0x0002021c, 0x00020200,
371 	0xc78, 0x00000080, 0x00000000,
372 	0xd030, 0x000300c0, 0x00800040,
373 	0xd830, 0x000300c0, 0x00800040,
374 	0x5bb0, 0x000000f0, 0x00000070,
375 	0x5bc0, 0x00200000, 0x50100000,
376 	0x7030, 0x31000311, 0x00000011,
377 	0x2ae4, 0x00073ffe, 0x000022a2,
378 	0x240c, 0x000007ff, 0x00000000,
379 	0x8a14, 0xf000001f, 0x00000007,
380 	0x8b24, 0xffffffff, 0x00ffffff,
381 	0x8b10, 0x0000ff0f, 0x00000000,
382 	0x28a4c, 0x07ffffff, 0x4e000000,
383 	0x28350, 0x3f3f3fff, 0x2a00126a,
384 	0x30, 0x000000ff, 0x0040,
385 	0x34, 0x00000040, 0x00004040,
386 	0x9100, 0x07ffffff, 0x03000000,
387 	0x9060, 0x0000007f, 0x00000020,
388 	0x9508, 0x00010000, 0x00010000,
389 	0xac14, 0x000003ff, 0x000000f7,
390 	0xac10, 0xffffffff, 0x00000000,
391 	0xac0c, 0xffffffff, 0x32761054,
392 	0x88d4, 0x0000001f, 0x00000010,
393 	0x15c0, 0x000c0fc0, 0x000c0400
394 };
395 
396 static const u32 verde_golden_rlc_registers[] =
397 {
398 	0xc424, 0xffffffff, 0x033f1005,
399 	0xc47c, 0xffffffff, 0x10808020,
400 	0xc488, 0xffffffff, 0x00800008,
401 	0xc314, 0xffffffff, 0x00001000,
402 	0xc30c, 0xffffffff, 0x80010014
403 };
404 
405 static const u32 verde_golden_registers[] =
406 {
407 	0x9a10, 0x00010000, 0x00018208,
408 	0x9830, 0xffffffff, 0x00000000,
409 	0x9834, 0xf00fffff, 0x00000400,
410 	0x9838, 0x0002021c, 0x00020200,
411 	0xc78, 0x00000080, 0x00000000,
412 	0xd030, 0x000300c0, 0x00800040,
413 	0xd030, 0x000300c0, 0x00800040,
414 	0xd830, 0x000300c0, 0x00800040,
415 	0xd830, 0x000300c0, 0x00800040,
416 	0x5bb0, 0x000000f0, 0x00000070,
417 	0x5bc0, 0x00200000, 0x50100000,
418 	0x7030, 0x31000311, 0x00000011,
419 	0x2ae4, 0x00073ffe, 0x000022a2,
420 	0x2ae4, 0x00073ffe, 0x000022a2,
421 	0x2ae4, 0x00073ffe, 0x000022a2,
422 	0x240c, 0x000007ff, 0x00000000,
423 	0x240c, 0x000007ff, 0x00000000,
424 	0x240c, 0x000007ff, 0x00000000,
425 	0x8a14, 0xf000001f, 0x00000007,
426 	0x8a14, 0xf000001f, 0x00000007,
427 	0x8a14, 0xf000001f, 0x00000007,
428 	0x8b24, 0xffffffff, 0x00ffffff,
429 	0x8b10, 0x0000ff0f, 0x00000000,
430 	0x28a4c, 0x07ffffff, 0x4e000000,
431 	0x28350, 0x3f3f3fff, 0x0000124a,
432 	0x28350, 0x3f3f3fff, 0x0000124a,
433 	0x28350, 0x3f3f3fff, 0x0000124a,
434 	0x30, 0x000000ff, 0x0040,
435 	0x34, 0x00000040, 0x00004040,
436 	0x9100, 0x07ffffff, 0x03000000,
437 	0x9100, 0x07ffffff, 0x03000000,
438 	0x8e88, 0x01ff1f3f, 0x00000000,
439 	0x8e88, 0x01ff1f3f, 0x00000000,
440 	0x8e88, 0x01ff1f3f, 0x00000000,
441 	0x8e84, 0x01ff1f3f, 0x00000000,
442 	0x8e84, 0x01ff1f3f, 0x00000000,
443 	0x8e84, 0x01ff1f3f, 0x00000000,
444 	0x9060, 0x0000007f, 0x00000020,
445 	0x9508, 0x00010000, 0x00010000,
446 	0xac14, 0x000003ff, 0x00000003,
447 	0xac14, 0x000003ff, 0x00000003,
448 	0xac14, 0x000003ff, 0x00000003,
449 	0xac10, 0xffffffff, 0x00000000,
450 	0xac10, 0xffffffff, 0x00000000,
451 	0xac10, 0xffffffff, 0x00000000,
452 	0xac0c, 0xffffffff, 0x00001032,
453 	0xac0c, 0xffffffff, 0x00001032,
454 	0xac0c, 0xffffffff, 0x00001032,
455 	0x88d4, 0x0000001f, 0x00000010,
456 	0x88d4, 0x0000001f, 0x00000010,
457 	0x88d4, 0x0000001f, 0x00000010,
458 	0x15c0, 0x000c0fc0, 0x000c0400
459 };
460 
461 static const u32 oland_golden_rlc_registers[] =
462 {
463 	0xc424, 0xffffffff, 0x00601005,
464 	0xc47c, 0xffffffff, 0x10104040,
465 	0xc488, 0xffffffff, 0x0100000a,
466 	0xc314, 0xffffffff, 0x00000800,
467 	0xc30c, 0xffffffff, 0x800000f4
468 };
469 
470 static const u32 oland_golden_registers[] =
471 {
472 	0x9a10, 0x00010000, 0x00018208,
473 	0x9830, 0xffffffff, 0x00000000,
474 	0x9834, 0xf00fffff, 0x00000400,
475 	0x9838, 0x0002021c, 0x00020200,
476 	0xc78, 0x00000080, 0x00000000,
477 	0xd030, 0x000300c0, 0x00800040,
478 	0xd830, 0x000300c0, 0x00800040,
479 	0x5bb0, 0x000000f0, 0x00000070,
480 	0x5bc0, 0x00200000, 0x50100000,
481 	0x7030, 0x31000311, 0x00000011,
482 	0x2ae4, 0x00073ffe, 0x000022a2,
483 	0x240c, 0x000007ff, 0x00000000,
484 	0x8a14, 0xf000001f, 0x00000007,
485 	0x8b24, 0xffffffff, 0x00ffffff,
486 	0x8b10, 0x0000ff0f, 0x00000000,
487 	0x28a4c, 0x07ffffff, 0x4e000000,
488 	0x28350, 0x3f3f3fff, 0x00000082,
489 	0x30, 0x000000ff, 0x0040,
490 	0x34, 0x00000040, 0x00004040,
491 	0x9100, 0x07ffffff, 0x03000000,
492 	0x9060, 0x0000007f, 0x00000020,
493 	0x9508, 0x00010000, 0x00010000,
494 	0xac14, 0x000003ff, 0x000000f3,
495 	0xac10, 0xffffffff, 0x00000000,
496 	0xac0c, 0xffffffff, 0x00003210,
497 	0x88d4, 0x0000001f, 0x00000010,
498 	0x15c0, 0x000c0fc0, 0x000c0400
499 };
500 
501 static const u32 hainan_golden_registers[] =
502 {
503 	0x9a10, 0x00010000, 0x00018208,
504 	0x9830, 0xffffffff, 0x00000000,
505 	0x9834, 0xf00fffff, 0x00000400,
506 	0x9838, 0x0002021c, 0x00020200,
507 	0xd0c0, 0xff000fff, 0x00000100,
508 	0xd030, 0x000300c0, 0x00800040,
509 	0xd8c0, 0xff000fff, 0x00000100,
510 	0xd830, 0x000300c0, 0x00800040,
511 	0x2ae4, 0x00073ffe, 0x000022a2,
512 	0x240c, 0x000007ff, 0x00000000,
513 	0x8a14, 0xf000001f, 0x00000007,
514 	0x8b24, 0xffffffff, 0x00ffffff,
515 	0x8b10, 0x0000ff0f, 0x00000000,
516 	0x28a4c, 0x07ffffff, 0x4e000000,
517 	0x28350, 0x3f3f3fff, 0x00000000,
518 	0x30, 0x000000ff, 0x0040,
519 	0x34, 0x00000040, 0x00004040,
520 	0x9100, 0x03e00000, 0x03600000,
521 	0x9060, 0x0000007f, 0x00000020,
522 	0x9508, 0x00010000, 0x00010000,
523 	0xac14, 0x000003ff, 0x000000f1,
524 	0xac10, 0xffffffff, 0x00000000,
525 	0xac0c, 0xffffffff, 0x00003210,
526 	0x88d4, 0x0000001f, 0x00000010,
527 	0x15c0, 0x000c0fc0, 0x000c0400
528 };
529 
530 static const u32 hainan_golden_registers2[] =
531 {
532 	0x98f8, 0xffffffff, 0x02010001
533 };
534 
535 static const u32 tahiti_mgcg_cgcg_init[] =
536 {
537 	0xc400, 0xffffffff, 0xfffffffc,
538 	0x802c, 0xffffffff, 0xe0000000,
539 	0x9a60, 0xffffffff, 0x00000100,
540 	0x92a4, 0xffffffff, 0x00000100,
541 	0xc164, 0xffffffff, 0x00000100,
542 	0x9774, 0xffffffff, 0x00000100,
543 	0x8984, 0xffffffff, 0x06000100,
544 	0x8a18, 0xffffffff, 0x00000100,
545 	0x92a0, 0xffffffff, 0x00000100,
546 	0xc380, 0xffffffff, 0x00000100,
547 	0x8b28, 0xffffffff, 0x00000100,
548 	0x9144, 0xffffffff, 0x00000100,
549 	0x8d88, 0xffffffff, 0x00000100,
550 	0x8d8c, 0xffffffff, 0x00000100,
551 	0x9030, 0xffffffff, 0x00000100,
552 	0x9034, 0xffffffff, 0x00000100,
553 	0x9038, 0xffffffff, 0x00000100,
554 	0x903c, 0xffffffff, 0x00000100,
555 	0xad80, 0xffffffff, 0x00000100,
556 	0xac54, 0xffffffff, 0x00000100,
557 	0x897c, 0xffffffff, 0x06000100,
558 	0x9868, 0xffffffff, 0x00000100,
559 	0x9510, 0xffffffff, 0x00000100,
560 	0xaf04, 0xffffffff, 0x00000100,
561 	0xae04, 0xffffffff, 0x00000100,
562 	0x949c, 0xffffffff, 0x00000100,
563 	0x802c, 0xffffffff, 0xe0000000,
564 	0x9160, 0xffffffff, 0x00010000,
565 	0x9164, 0xffffffff, 0x00030002,
566 	0x9168, 0xffffffff, 0x00040007,
567 	0x916c, 0xffffffff, 0x00060005,
568 	0x9170, 0xffffffff, 0x00090008,
569 	0x9174, 0xffffffff, 0x00020001,
570 	0x9178, 0xffffffff, 0x00040003,
571 	0x917c, 0xffffffff, 0x00000007,
572 	0x9180, 0xffffffff, 0x00060005,
573 	0x9184, 0xffffffff, 0x00090008,
574 	0x9188, 0xffffffff, 0x00030002,
575 	0x918c, 0xffffffff, 0x00050004,
576 	0x9190, 0xffffffff, 0x00000008,
577 	0x9194, 0xffffffff, 0x00070006,
578 	0x9198, 0xffffffff, 0x000a0009,
579 	0x919c, 0xffffffff, 0x00040003,
580 	0x91a0, 0xffffffff, 0x00060005,
581 	0x91a4, 0xffffffff, 0x00000009,
582 	0x91a8, 0xffffffff, 0x00080007,
583 	0x91ac, 0xffffffff, 0x000b000a,
584 	0x91b0, 0xffffffff, 0x00050004,
585 	0x91b4, 0xffffffff, 0x00070006,
586 	0x91b8, 0xffffffff, 0x0008000b,
587 	0x91bc, 0xffffffff, 0x000a0009,
588 	0x91c0, 0xffffffff, 0x000d000c,
589 	0x91c4, 0xffffffff, 0x00060005,
590 	0x91c8, 0xffffffff, 0x00080007,
591 	0x91cc, 0xffffffff, 0x0000000b,
592 	0x91d0, 0xffffffff, 0x000a0009,
593 	0x91d4, 0xffffffff, 0x000d000c,
594 	0x91d8, 0xffffffff, 0x00070006,
595 	0x91dc, 0xffffffff, 0x00090008,
596 	0x91e0, 0xffffffff, 0x0000000c,
597 	0x91e4, 0xffffffff, 0x000b000a,
598 	0x91e8, 0xffffffff, 0x000e000d,
599 	0x91ec, 0xffffffff, 0x00080007,
600 	0x91f0, 0xffffffff, 0x000a0009,
601 	0x91f4, 0xffffffff, 0x0000000d,
602 	0x91f8, 0xffffffff, 0x000c000b,
603 	0x91fc, 0xffffffff, 0x000f000e,
604 	0x9200, 0xffffffff, 0x00090008,
605 	0x9204, 0xffffffff, 0x000b000a,
606 	0x9208, 0xffffffff, 0x000c000f,
607 	0x920c, 0xffffffff, 0x000e000d,
608 	0x9210, 0xffffffff, 0x00110010,
609 	0x9214, 0xffffffff, 0x000a0009,
610 	0x9218, 0xffffffff, 0x000c000b,
611 	0x921c, 0xffffffff, 0x0000000f,
612 	0x9220, 0xffffffff, 0x000e000d,
613 	0x9224, 0xffffffff, 0x00110010,
614 	0x9228, 0xffffffff, 0x000b000a,
615 	0x922c, 0xffffffff, 0x000d000c,
616 	0x9230, 0xffffffff, 0x00000010,
617 	0x9234, 0xffffffff, 0x000f000e,
618 	0x9238, 0xffffffff, 0x00120011,
619 	0x923c, 0xffffffff, 0x000c000b,
620 	0x9240, 0xffffffff, 0x000e000d,
621 	0x9244, 0xffffffff, 0x00000011,
622 	0x9248, 0xffffffff, 0x0010000f,
623 	0x924c, 0xffffffff, 0x00130012,
624 	0x9250, 0xffffffff, 0x000d000c,
625 	0x9254, 0xffffffff, 0x000f000e,
626 	0x9258, 0xffffffff, 0x00100013,
627 	0x925c, 0xffffffff, 0x00120011,
628 	0x9260, 0xffffffff, 0x00150014,
629 	0x9264, 0xffffffff, 0x000e000d,
630 	0x9268, 0xffffffff, 0x0010000f,
631 	0x926c, 0xffffffff, 0x00000013,
632 	0x9270, 0xffffffff, 0x00120011,
633 	0x9274, 0xffffffff, 0x00150014,
634 	0x9278, 0xffffffff, 0x000f000e,
635 	0x927c, 0xffffffff, 0x00110010,
636 	0x9280, 0xffffffff, 0x00000014,
637 	0x9284, 0xffffffff, 0x00130012,
638 	0x9288, 0xffffffff, 0x00160015,
639 	0x928c, 0xffffffff, 0x0010000f,
640 	0x9290, 0xffffffff, 0x00120011,
641 	0x9294, 0xffffffff, 0x00000015,
642 	0x9298, 0xffffffff, 0x00140013,
643 	0x929c, 0xffffffff, 0x00170016,
644 	0x9150, 0xffffffff, 0x96940200,
645 	0x8708, 0xffffffff, 0x00900100,
646 	0xc478, 0xffffffff, 0x00000080,
647 	0xc404, 0xffffffff, 0x0020003f,
648 	0x30, 0xffffffff, 0x0000001c,
649 	0x34, 0x000f0000, 0x000f0000,
650 	0x160c, 0xffffffff, 0x00000100,
651 	0x1024, 0xffffffff, 0x00000100,
652 	0x102c, 0x00000101, 0x00000000,
653 	0x20a8, 0xffffffff, 0x00000104,
654 	0x264c, 0x000c0000, 0x000c0000,
655 	0x2648, 0x000c0000, 0x000c0000,
656 	0x55e4, 0xff000fff, 0x00000100,
657 	0x55e8, 0x00000001, 0x00000001,
658 	0x2f50, 0x00000001, 0x00000001,
659 	0x30cc, 0xc0000fff, 0x00000104,
660 	0xc1e4, 0x00000001, 0x00000001,
661 	0xd0c0, 0xfffffff0, 0x00000100,
662 	0xd8c0, 0xfffffff0, 0x00000100
663 };
664 
665 static const u32 pitcairn_mgcg_cgcg_init[] =
666 {
667 	0xc400, 0xffffffff, 0xfffffffc,
668 	0x802c, 0xffffffff, 0xe0000000,
669 	0x9a60, 0xffffffff, 0x00000100,
670 	0x92a4, 0xffffffff, 0x00000100,
671 	0xc164, 0xffffffff, 0x00000100,
672 	0x9774, 0xffffffff, 0x00000100,
673 	0x8984, 0xffffffff, 0x06000100,
674 	0x8a18, 0xffffffff, 0x00000100,
675 	0x92a0, 0xffffffff, 0x00000100,
676 	0xc380, 0xffffffff, 0x00000100,
677 	0x8b28, 0xffffffff, 0x00000100,
678 	0x9144, 0xffffffff, 0x00000100,
679 	0x8d88, 0xffffffff, 0x00000100,
680 	0x8d8c, 0xffffffff, 0x00000100,
681 	0x9030, 0xffffffff, 0x00000100,
682 	0x9034, 0xffffffff, 0x00000100,
683 	0x9038, 0xffffffff, 0x00000100,
684 	0x903c, 0xffffffff, 0x00000100,
685 	0xad80, 0xffffffff, 0x00000100,
686 	0xac54, 0xffffffff, 0x00000100,
687 	0x897c, 0xffffffff, 0x06000100,
688 	0x9868, 0xffffffff, 0x00000100,
689 	0x9510, 0xffffffff, 0x00000100,
690 	0xaf04, 0xffffffff, 0x00000100,
691 	0xae04, 0xffffffff, 0x00000100,
692 	0x949c, 0xffffffff, 0x00000100,
693 	0x802c, 0xffffffff, 0xe0000000,
694 	0x9160, 0xffffffff, 0x00010000,
695 	0x9164, 0xffffffff, 0x00030002,
696 	0x9168, 0xffffffff, 0x00040007,
697 	0x916c, 0xffffffff, 0x00060005,
698 	0x9170, 0xffffffff, 0x00090008,
699 	0x9174, 0xffffffff, 0x00020001,
700 	0x9178, 0xffffffff, 0x00040003,
701 	0x917c, 0xffffffff, 0x00000007,
702 	0x9180, 0xffffffff, 0x00060005,
703 	0x9184, 0xffffffff, 0x00090008,
704 	0x9188, 0xffffffff, 0x00030002,
705 	0x918c, 0xffffffff, 0x00050004,
706 	0x9190, 0xffffffff, 0x00000008,
707 	0x9194, 0xffffffff, 0x00070006,
708 	0x9198, 0xffffffff, 0x000a0009,
709 	0x919c, 0xffffffff, 0x00040003,
710 	0x91a0, 0xffffffff, 0x00060005,
711 	0x91a4, 0xffffffff, 0x00000009,
712 	0x91a8, 0xffffffff, 0x00080007,
713 	0x91ac, 0xffffffff, 0x000b000a,
714 	0x91b0, 0xffffffff, 0x00050004,
715 	0x91b4, 0xffffffff, 0x00070006,
716 	0x91b8, 0xffffffff, 0x0008000b,
717 	0x91bc, 0xffffffff, 0x000a0009,
718 	0x91c0, 0xffffffff, 0x000d000c,
719 	0x9200, 0xffffffff, 0x00090008,
720 	0x9204, 0xffffffff, 0x000b000a,
721 	0x9208, 0xffffffff, 0x000c000f,
722 	0x920c, 0xffffffff, 0x000e000d,
723 	0x9210, 0xffffffff, 0x00110010,
724 	0x9214, 0xffffffff, 0x000a0009,
725 	0x9218, 0xffffffff, 0x000c000b,
726 	0x921c, 0xffffffff, 0x0000000f,
727 	0x9220, 0xffffffff, 0x000e000d,
728 	0x9224, 0xffffffff, 0x00110010,
729 	0x9228, 0xffffffff, 0x000b000a,
730 	0x922c, 0xffffffff, 0x000d000c,
731 	0x9230, 0xffffffff, 0x00000010,
732 	0x9234, 0xffffffff, 0x000f000e,
733 	0x9238, 0xffffffff, 0x00120011,
734 	0x923c, 0xffffffff, 0x000c000b,
735 	0x9240, 0xffffffff, 0x000e000d,
736 	0x9244, 0xffffffff, 0x00000011,
737 	0x9248, 0xffffffff, 0x0010000f,
738 	0x924c, 0xffffffff, 0x00130012,
739 	0x9250, 0xffffffff, 0x000d000c,
740 	0x9254, 0xffffffff, 0x000f000e,
741 	0x9258, 0xffffffff, 0x00100013,
742 	0x925c, 0xffffffff, 0x00120011,
743 	0x9260, 0xffffffff, 0x00150014,
744 	0x9150, 0xffffffff, 0x96940200,
745 	0x8708, 0xffffffff, 0x00900100,
746 	0xc478, 0xffffffff, 0x00000080,
747 	0xc404, 0xffffffff, 0x0020003f,
748 	0x30, 0xffffffff, 0x0000001c,
749 	0x34, 0x000f0000, 0x000f0000,
750 	0x160c, 0xffffffff, 0x00000100,
751 	0x1024, 0xffffffff, 0x00000100,
752 	0x102c, 0x00000101, 0x00000000,
753 	0x20a8, 0xffffffff, 0x00000104,
754 	0x55e4, 0xff000fff, 0x00000100,
755 	0x55e8, 0x00000001, 0x00000001,
756 	0x2f50, 0x00000001, 0x00000001,
757 	0x30cc, 0xc0000fff, 0x00000104,
758 	0xc1e4, 0x00000001, 0x00000001,
759 	0xd0c0, 0xfffffff0, 0x00000100,
760 	0xd8c0, 0xfffffff0, 0x00000100
761 };
762 
763 static const u32 verde_mgcg_cgcg_init[] =
764 {
765 	0xc400, 0xffffffff, 0xfffffffc,
766 	0x802c, 0xffffffff, 0xe0000000,
767 	0x9a60, 0xffffffff, 0x00000100,
768 	0x92a4, 0xffffffff, 0x00000100,
769 	0xc164, 0xffffffff, 0x00000100,
770 	0x9774, 0xffffffff, 0x00000100,
771 	0x8984, 0xffffffff, 0x06000100,
772 	0x8a18, 0xffffffff, 0x00000100,
773 	0x92a0, 0xffffffff, 0x00000100,
774 	0xc380, 0xffffffff, 0x00000100,
775 	0x8b28, 0xffffffff, 0x00000100,
776 	0x9144, 0xffffffff, 0x00000100,
777 	0x8d88, 0xffffffff, 0x00000100,
778 	0x8d8c, 0xffffffff, 0x00000100,
779 	0x9030, 0xffffffff, 0x00000100,
780 	0x9034, 0xffffffff, 0x00000100,
781 	0x9038, 0xffffffff, 0x00000100,
782 	0x903c, 0xffffffff, 0x00000100,
783 	0xad80, 0xffffffff, 0x00000100,
784 	0xac54, 0xffffffff, 0x00000100,
785 	0x897c, 0xffffffff, 0x06000100,
786 	0x9868, 0xffffffff, 0x00000100,
787 	0x9510, 0xffffffff, 0x00000100,
788 	0xaf04, 0xffffffff, 0x00000100,
789 	0xae04, 0xffffffff, 0x00000100,
790 	0x949c, 0xffffffff, 0x00000100,
791 	0x802c, 0xffffffff, 0xe0000000,
792 	0x9160, 0xffffffff, 0x00010000,
793 	0x9164, 0xffffffff, 0x00030002,
794 	0x9168, 0xffffffff, 0x00040007,
795 	0x916c, 0xffffffff, 0x00060005,
796 	0x9170, 0xffffffff, 0x00090008,
797 	0x9174, 0xffffffff, 0x00020001,
798 	0x9178, 0xffffffff, 0x00040003,
799 	0x917c, 0xffffffff, 0x00000007,
800 	0x9180, 0xffffffff, 0x00060005,
801 	0x9184, 0xffffffff, 0x00090008,
802 	0x9188, 0xffffffff, 0x00030002,
803 	0x918c, 0xffffffff, 0x00050004,
804 	0x9190, 0xffffffff, 0x00000008,
805 	0x9194, 0xffffffff, 0x00070006,
806 	0x9198, 0xffffffff, 0x000a0009,
807 	0x919c, 0xffffffff, 0x00040003,
808 	0x91a0, 0xffffffff, 0x00060005,
809 	0x91a4, 0xffffffff, 0x00000009,
810 	0x91a8, 0xffffffff, 0x00080007,
811 	0x91ac, 0xffffffff, 0x000b000a,
812 	0x91b0, 0xffffffff, 0x00050004,
813 	0x91b4, 0xffffffff, 0x00070006,
814 	0x91b8, 0xffffffff, 0x0008000b,
815 	0x91bc, 0xffffffff, 0x000a0009,
816 	0x91c0, 0xffffffff, 0x000d000c,
817 	0x9200, 0xffffffff, 0x00090008,
818 	0x9204, 0xffffffff, 0x000b000a,
819 	0x9208, 0xffffffff, 0x000c000f,
820 	0x920c, 0xffffffff, 0x000e000d,
821 	0x9210, 0xffffffff, 0x00110010,
822 	0x9214, 0xffffffff, 0x000a0009,
823 	0x9218, 0xffffffff, 0x000c000b,
824 	0x921c, 0xffffffff, 0x0000000f,
825 	0x9220, 0xffffffff, 0x000e000d,
826 	0x9224, 0xffffffff, 0x00110010,
827 	0x9228, 0xffffffff, 0x000b000a,
828 	0x922c, 0xffffffff, 0x000d000c,
829 	0x9230, 0xffffffff, 0x00000010,
830 	0x9234, 0xffffffff, 0x000f000e,
831 	0x9238, 0xffffffff, 0x00120011,
832 	0x923c, 0xffffffff, 0x000c000b,
833 	0x9240, 0xffffffff, 0x000e000d,
834 	0x9244, 0xffffffff, 0x00000011,
835 	0x9248, 0xffffffff, 0x0010000f,
836 	0x924c, 0xffffffff, 0x00130012,
837 	0x9250, 0xffffffff, 0x000d000c,
838 	0x9254, 0xffffffff, 0x000f000e,
839 	0x9258, 0xffffffff, 0x00100013,
840 	0x925c, 0xffffffff, 0x00120011,
841 	0x9260, 0xffffffff, 0x00150014,
842 	0x9150, 0xffffffff, 0x96940200,
843 	0x8708, 0xffffffff, 0x00900100,
844 	0xc478, 0xffffffff, 0x00000080,
845 	0xc404, 0xffffffff, 0x0020003f,
846 	0x30, 0xffffffff, 0x0000001c,
847 	0x34, 0x000f0000, 0x000f0000,
848 	0x160c, 0xffffffff, 0x00000100,
849 	0x1024, 0xffffffff, 0x00000100,
850 	0x102c, 0x00000101, 0x00000000,
851 	0x20a8, 0xffffffff, 0x00000104,
852 	0x264c, 0x000c0000, 0x000c0000,
853 	0x2648, 0x000c0000, 0x000c0000,
854 	0x55e4, 0xff000fff, 0x00000100,
855 	0x55e8, 0x00000001, 0x00000001,
856 	0x2f50, 0x00000001, 0x00000001,
857 	0x30cc, 0xc0000fff, 0x00000104,
858 	0xc1e4, 0x00000001, 0x00000001,
859 	0xd0c0, 0xfffffff0, 0x00000100,
860 	0xd8c0, 0xfffffff0, 0x00000100
861 };
862 
863 static const u32 oland_mgcg_cgcg_init[] =
864 {
865 	0xc400, 0xffffffff, 0xfffffffc,
866 	0x802c, 0xffffffff, 0xe0000000,
867 	0x9a60, 0xffffffff, 0x00000100,
868 	0x92a4, 0xffffffff, 0x00000100,
869 	0xc164, 0xffffffff, 0x00000100,
870 	0x9774, 0xffffffff, 0x00000100,
871 	0x8984, 0xffffffff, 0x06000100,
872 	0x8a18, 0xffffffff, 0x00000100,
873 	0x92a0, 0xffffffff, 0x00000100,
874 	0xc380, 0xffffffff, 0x00000100,
875 	0x8b28, 0xffffffff, 0x00000100,
876 	0x9144, 0xffffffff, 0x00000100,
877 	0x8d88, 0xffffffff, 0x00000100,
878 	0x8d8c, 0xffffffff, 0x00000100,
879 	0x9030, 0xffffffff, 0x00000100,
880 	0x9034, 0xffffffff, 0x00000100,
881 	0x9038, 0xffffffff, 0x00000100,
882 	0x903c, 0xffffffff, 0x00000100,
883 	0xad80, 0xffffffff, 0x00000100,
884 	0xac54, 0xffffffff, 0x00000100,
885 	0x897c, 0xffffffff, 0x06000100,
886 	0x9868, 0xffffffff, 0x00000100,
887 	0x9510, 0xffffffff, 0x00000100,
888 	0xaf04, 0xffffffff, 0x00000100,
889 	0xae04, 0xffffffff, 0x00000100,
890 	0x949c, 0xffffffff, 0x00000100,
891 	0x802c, 0xffffffff, 0xe0000000,
892 	0x9160, 0xffffffff, 0x00010000,
893 	0x9164, 0xffffffff, 0x00030002,
894 	0x9168, 0xffffffff, 0x00040007,
895 	0x916c, 0xffffffff, 0x00060005,
896 	0x9170, 0xffffffff, 0x00090008,
897 	0x9174, 0xffffffff, 0x00020001,
898 	0x9178, 0xffffffff, 0x00040003,
899 	0x917c, 0xffffffff, 0x00000007,
900 	0x9180, 0xffffffff, 0x00060005,
901 	0x9184, 0xffffffff, 0x00090008,
902 	0x9188, 0xffffffff, 0x00030002,
903 	0x918c, 0xffffffff, 0x00050004,
904 	0x9190, 0xffffffff, 0x00000008,
905 	0x9194, 0xffffffff, 0x00070006,
906 	0x9198, 0xffffffff, 0x000a0009,
907 	0x919c, 0xffffffff, 0x00040003,
908 	0x91a0, 0xffffffff, 0x00060005,
909 	0x91a4, 0xffffffff, 0x00000009,
910 	0x91a8, 0xffffffff, 0x00080007,
911 	0x91ac, 0xffffffff, 0x000b000a,
912 	0x91b0, 0xffffffff, 0x00050004,
913 	0x91b4, 0xffffffff, 0x00070006,
914 	0x91b8, 0xffffffff, 0x0008000b,
915 	0x91bc, 0xffffffff, 0x000a0009,
916 	0x91c0, 0xffffffff, 0x000d000c,
917 	0x91c4, 0xffffffff, 0x00060005,
918 	0x91c8, 0xffffffff, 0x00080007,
919 	0x91cc, 0xffffffff, 0x0000000b,
920 	0x91d0, 0xffffffff, 0x000a0009,
921 	0x91d4, 0xffffffff, 0x000d000c,
922 	0x9150, 0xffffffff, 0x96940200,
923 	0x8708, 0xffffffff, 0x00900100,
924 	0xc478, 0xffffffff, 0x00000080,
925 	0xc404, 0xffffffff, 0x0020003f,
926 	0x30, 0xffffffff, 0x0000001c,
927 	0x34, 0x000f0000, 0x000f0000,
928 	0x160c, 0xffffffff, 0x00000100,
929 	0x1024, 0xffffffff, 0x00000100,
930 	0x102c, 0x00000101, 0x00000000,
931 	0x20a8, 0xffffffff, 0x00000104,
932 	0x264c, 0x000c0000, 0x000c0000,
933 	0x2648, 0x000c0000, 0x000c0000,
934 	0x55e4, 0xff000fff, 0x00000100,
935 	0x55e8, 0x00000001, 0x00000001,
936 	0x2f50, 0x00000001, 0x00000001,
937 	0x30cc, 0xc0000fff, 0x00000104,
938 	0xc1e4, 0x00000001, 0x00000001,
939 	0xd0c0, 0xfffffff0, 0x00000100,
940 	0xd8c0, 0xfffffff0, 0x00000100
941 };
942 
943 static const u32 hainan_mgcg_cgcg_init[] =
944 {
945 	0xc400, 0xffffffff, 0xfffffffc,
946 	0x802c, 0xffffffff, 0xe0000000,
947 	0x9a60, 0xffffffff, 0x00000100,
948 	0x92a4, 0xffffffff, 0x00000100,
949 	0xc164, 0xffffffff, 0x00000100,
950 	0x9774, 0xffffffff, 0x00000100,
951 	0x8984, 0xffffffff, 0x06000100,
952 	0x8a18, 0xffffffff, 0x00000100,
953 	0x92a0, 0xffffffff, 0x00000100,
954 	0xc380, 0xffffffff, 0x00000100,
955 	0x8b28, 0xffffffff, 0x00000100,
956 	0x9144, 0xffffffff, 0x00000100,
957 	0x8d88, 0xffffffff, 0x00000100,
958 	0x8d8c, 0xffffffff, 0x00000100,
959 	0x9030, 0xffffffff, 0x00000100,
960 	0x9034, 0xffffffff, 0x00000100,
961 	0x9038, 0xffffffff, 0x00000100,
962 	0x903c, 0xffffffff, 0x00000100,
963 	0xad80, 0xffffffff, 0x00000100,
964 	0xac54, 0xffffffff, 0x00000100,
965 	0x897c, 0xffffffff, 0x06000100,
966 	0x9868, 0xffffffff, 0x00000100,
967 	0x9510, 0xffffffff, 0x00000100,
968 	0xaf04, 0xffffffff, 0x00000100,
969 	0xae04, 0xffffffff, 0x00000100,
970 	0x949c, 0xffffffff, 0x00000100,
971 	0x802c, 0xffffffff, 0xe0000000,
972 	0x9160, 0xffffffff, 0x00010000,
973 	0x9164, 0xffffffff, 0x00030002,
974 	0x9168, 0xffffffff, 0x00040007,
975 	0x916c, 0xffffffff, 0x00060005,
976 	0x9170, 0xffffffff, 0x00090008,
977 	0x9174, 0xffffffff, 0x00020001,
978 	0x9178, 0xffffffff, 0x00040003,
979 	0x917c, 0xffffffff, 0x00000007,
980 	0x9180, 0xffffffff, 0x00060005,
981 	0x9184, 0xffffffff, 0x00090008,
982 	0x9188, 0xffffffff, 0x00030002,
983 	0x918c, 0xffffffff, 0x00050004,
984 	0x9190, 0xffffffff, 0x00000008,
985 	0x9194, 0xffffffff, 0x00070006,
986 	0x9198, 0xffffffff, 0x000a0009,
987 	0x919c, 0xffffffff, 0x00040003,
988 	0x91a0, 0xffffffff, 0x00060005,
989 	0x91a4, 0xffffffff, 0x00000009,
990 	0x91a8, 0xffffffff, 0x00080007,
991 	0x91ac, 0xffffffff, 0x000b000a,
992 	0x91b0, 0xffffffff, 0x00050004,
993 	0x91b4, 0xffffffff, 0x00070006,
994 	0x91b8, 0xffffffff, 0x0008000b,
995 	0x91bc, 0xffffffff, 0x000a0009,
996 	0x91c0, 0xffffffff, 0x000d000c,
997 	0x91c4, 0xffffffff, 0x00060005,
998 	0x91c8, 0xffffffff, 0x00080007,
999 	0x91cc, 0xffffffff, 0x0000000b,
1000 	0x91d0, 0xffffffff, 0x000a0009,
1001 	0x91d4, 0xffffffff, 0x000d000c,
1002 	0x9150, 0xffffffff, 0x96940200,
1003 	0x8708, 0xffffffff, 0x00900100,
1004 	0xc478, 0xffffffff, 0x00000080,
1005 	0xc404, 0xffffffff, 0x0020003f,
1006 	0x30, 0xffffffff, 0x0000001c,
1007 	0x34, 0x000f0000, 0x000f0000,
1008 	0x160c, 0xffffffff, 0x00000100,
1009 	0x1024, 0xffffffff, 0x00000100,
1010 	0x20a8, 0xffffffff, 0x00000104,
1011 	0x264c, 0x000c0000, 0x000c0000,
1012 	0x2648, 0x000c0000, 0x000c0000,
1013 	0x2f50, 0x00000001, 0x00000001,
1014 	0x30cc, 0xc0000fff, 0x00000104,
1015 	0xc1e4, 0x00000001, 0x00000001,
1016 	0xd0c0, 0xfffffff0, 0x00000100,
1017 	0xd8c0, 0xfffffff0, 0x00000100
1018 };
1019 
1020 static u32 verde_pg_init[] =
1021 {
1022 	0x353c, 0xffffffff, 0x40000,
1023 	0x3538, 0xffffffff, 0x200010ff,
1024 	0x353c, 0xffffffff, 0x0,
1025 	0x353c, 0xffffffff, 0x0,
1026 	0x353c, 0xffffffff, 0x0,
1027 	0x353c, 0xffffffff, 0x0,
1028 	0x353c, 0xffffffff, 0x0,
1029 	0x353c, 0xffffffff, 0x7007,
1030 	0x3538, 0xffffffff, 0x300010ff,
1031 	0x353c, 0xffffffff, 0x0,
1032 	0x353c, 0xffffffff, 0x0,
1033 	0x353c, 0xffffffff, 0x0,
1034 	0x353c, 0xffffffff, 0x0,
1035 	0x353c, 0xffffffff, 0x0,
1036 	0x353c, 0xffffffff, 0x400000,
1037 	0x3538, 0xffffffff, 0x100010ff,
1038 	0x353c, 0xffffffff, 0x0,
1039 	0x353c, 0xffffffff, 0x0,
1040 	0x353c, 0xffffffff, 0x0,
1041 	0x353c, 0xffffffff, 0x0,
1042 	0x353c, 0xffffffff, 0x0,
1043 	0x353c, 0xffffffff, 0x120200,
1044 	0x3538, 0xffffffff, 0x500010ff,
1045 	0x353c, 0xffffffff, 0x0,
1046 	0x353c, 0xffffffff, 0x0,
1047 	0x353c, 0xffffffff, 0x0,
1048 	0x353c, 0xffffffff, 0x0,
1049 	0x353c, 0xffffffff, 0x0,
1050 	0x353c, 0xffffffff, 0x1e1e16,
1051 	0x3538, 0xffffffff, 0x600010ff,
1052 	0x353c, 0xffffffff, 0x0,
1053 	0x353c, 0xffffffff, 0x0,
1054 	0x353c, 0xffffffff, 0x0,
1055 	0x353c, 0xffffffff, 0x0,
1056 	0x353c, 0xffffffff, 0x0,
1057 	0x353c, 0xffffffff, 0x171f1e,
1058 	0x3538, 0xffffffff, 0x700010ff,
1059 	0x353c, 0xffffffff, 0x0,
1060 	0x353c, 0xffffffff, 0x0,
1061 	0x353c, 0xffffffff, 0x0,
1062 	0x353c, 0xffffffff, 0x0,
1063 	0x353c, 0xffffffff, 0x0,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x3538, 0xffffffff, 0x9ff,
1066 	0x3500, 0xffffffff, 0x0,
1067 	0x3504, 0xffffffff, 0x10000800,
1068 	0x3504, 0xffffffff, 0xf,
1069 	0x3504, 0xffffffff, 0xf,
1070 	0x3500, 0xffffffff, 0x4,
1071 	0x3504, 0xffffffff, 0x1000051e,
1072 	0x3504, 0xffffffff, 0xffff,
1073 	0x3504, 0xffffffff, 0xffff,
1074 	0x3500, 0xffffffff, 0x8,
1075 	0x3504, 0xffffffff, 0x80500,
1076 	0x3500, 0xffffffff, 0x12,
1077 	0x3504, 0xffffffff, 0x9050c,
1078 	0x3500, 0xffffffff, 0x1d,
1079 	0x3504, 0xffffffff, 0xb052c,
1080 	0x3500, 0xffffffff, 0x2a,
1081 	0x3504, 0xffffffff, 0x1053e,
1082 	0x3500, 0xffffffff, 0x2d,
1083 	0x3504, 0xffffffff, 0x10546,
1084 	0x3500, 0xffffffff, 0x30,
1085 	0x3504, 0xffffffff, 0xa054e,
1086 	0x3500, 0xffffffff, 0x3c,
1087 	0x3504, 0xffffffff, 0x1055f,
1088 	0x3500, 0xffffffff, 0x3f,
1089 	0x3504, 0xffffffff, 0x10567,
1090 	0x3500, 0xffffffff, 0x42,
1091 	0x3504, 0xffffffff, 0x1056f,
1092 	0x3500, 0xffffffff, 0x45,
1093 	0x3504, 0xffffffff, 0x10572,
1094 	0x3500, 0xffffffff, 0x48,
1095 	0x3504, 0xffffffff, 0x20575,
1096 	0x3500, 0xffffffff, 0x4c,
1097 	0x3504, 0xffffffff, 0x190801,
1098 	0x3500, 0xffffffff, 0x67,
1099 	0x3504, 0xffffffff, 0x1082a,
1100 	0x3500, 0xffffffff, 0x6a,
1101 	0x3504, 0xffffffff, 0x1b082d,
1102 	0x3500, 0xffffffff, 0x87,
1103 	0x3504, 0xffffffff, 0x310851,
1104 	0x3500, 0xffffffff, 0xba,
1105 	0x3504, 0xffffffff, 0x891,
1106 	0x3500, 0xffffffff, 0xbc,
1107 	0x3504, 0xffffffff, 0x893,
1108 	0x3500, 0xffffffff, 0xbe,
1109 	0x3504, 0xffffffff, 0x20895,
1110 	0x3500, 0xffffffff, 0xc2,
1111 	0x3504, 0xffffffff, 0x20899,
1112 	0x3500, 0xffffffff, 0xc6,
1113 	0x3504, 0xffffffff, 0x2089d,
1114 	0x3500, 0xffffffff, 0xca,
1115 	0x3504, 0xffffffff, 0x8a1,
1116 	0x3500, 0xffffffff, 0xcc,
1117 	0x3504, 0xffffffff, 0x8a3,
1118 	0x3500, 0xffffffff, 0xce,
1119 	0x3504, 0xffffffff, 0x308a5,
1120 	0x3500, 0xffffffff, 0xd3,
1121 	0x3504, 0xffffffff, 0x6d08cd,
1122 	0x3500, 0xffffffff, 0x142,
1123 	0x3504, 0xffffffff, 0x2000095a,
1124 	0x3504, 0xffffffff, 0x1,
1125 	0x3500, 0xffffffff, 0x144,
1126 	0x3504, 0xffffffff, 0x301f095b,
1127 	0x3500, 0xffffffff, 0x165,
1128 	0x3504, 0xffffffff, 0xc094d,
1129 	0x3500, 0xffffffff, 0x173,
1130 	0x3504, 0xffffffff, 0xf096d,
1131 	0x3500, 0xffffffff, 0x184,
1132 	0x3504, 0xffffffff, 0x15097f,
1133 	0x3500, 0xffffffff, 0x19b,
1134 	0x3504, 0xffffffff, 0xc0998,
1135 	0x3500, 0xffffffff, 0x1a9,
1136 	0x3504, 0xffffffff, 0x409a7,
1137 	0x3500, 0xffffffff, 0x1af,
1138 	0x3504, 0xffffffff, 0xcdc,
1139 	0x3500, 0xffffffff, 0x1b1,
1140 	0x3504, 0xffffffff, 0x800,
1141 	0x3508, 0xffffffff, 0x6c9b2000,
1142 	0x3510, 0xfc00, 0x2000,
1143 	0x3544, 0xffffffff, 0xfc0,
1144 	0x28d4, 0x00000100, 0x100
1145 };
1146 
1147 static void si_init_golden_registers(struct radeon_device *rdev)
1148 {
1149 	switch (rdev->family) {
1150 	case CHIP_TAHITI:
1151 		radeon_program_register_sequence(rdev,
1152 						 tahiti_golden_registers,
1153 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1154 		radeon_program_register_sequence(rdev,
1155 						 tahiti_golden_rlc_registers,
1156 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1157 		radeon_program_register_sequence(rdev,
1158 						 tahiti_mgcg_cgcg_init,
1159 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1160 		radeon_program_register_sequence(rdev,
1161 						 tahiti_golden_registers2,
1162 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1163 		break;
1164 	case CHIP_PITCAIRN:
1165 		radeon_program_register_sequence(rdev,
1166 						 pitcairn_golden_registers,
1167 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1168 		radeon_program_register_sequence(rdev,
1169 						 pitcairn_golden_rlc_registers,
1170 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1171 		radeon_program_register_sequence(rdev,
1172 						 pitcairn_mgcg_cgcg_init,
1173 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1174 		break;
1175 	case CHIP_VERDE:
1176 		radeon_program_register_sequence(rdev,
1177 						 verde_golden_registers,
1178 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1179 		radeon_program_register_sequence(rdev,
1180 						 verde_golden_rlc_registers,
1181 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1182 		radeon_program_register_sequence(rdev,
1183 						 verde_mgcg_cgcg_init,
1184 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1185 		radeon_program_register_sequence(rdev,
1186 						 verde_pg_init,
1187 						 (const u32)ARRAY_SIZE(verde_pg_init));
1188 		break;
1189 	case CHIP_OLAND:
1190 		radeon_program_register_sequence(rdev,
1191 						 oland_golden_registers,
1192 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1193 		radeon_program_register_sequence(rdev,
1194 						 oland_golden_rlc_registers,
1195 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1196 		radeon_program_register_sequence(rdev,
1197 						 oland_mgcg_cgcg_init,
1198 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1199 		break;
1200 	case CHIP_HAINAN:
1201 		radeon_program_register_sequence(rdev,
1202 						 hainan_golden_registers,
1203 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1204 		radeon_program_register_sequence(rdev,
1205 						 hainan_golden_registers2,
1206 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1207 		radeon_program_register_sequence(rdev,
1208 						 hainan_mgcg_cgcg_init,
1209 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1210 		break;
1211 	default:
1212 		break;
1213 	}
1214 }
1215 
1216 #define PCIE_BUS_CLK                10000
1217 #define TCLK                        (PCIE_BUS_CLK / 10)
1218 
1219 /**
1220  * si_get_xclk - get the xclk
1221  *
1222  * @rdev: radeon_device pointer
1223  *
1224  * Returns the reference clock used by the gfx engine
1225  * (SI).
1226  */
1227 u32 si_get_xclk(struct radeon_device *rdev)
1228 {
1229         u32 reference_clock = rdev->clock.spll.reference_freq;
1230 	u32 tmp;
1231 
1232 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1233 	if (tmp & MUX_TCLK_TO_XCLK)
1234 		return TCLK;
1235 
1236 	tmp = RREG32(CG_CLKPIN_CNTL);
1237 	if (tmp & XTALIN_DIVIDE)
1238 		return reference_clock / 4;
1239 
1240 	return reference_clock;
1241 }
1242 
1243 /* get temperature in millidegrees */
1244 int si_get_temp(struct radeon_device *rdev)
1245 {
1246 	u32 temp;
1247 	int actual_temp = 0;
1248 
1249 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1250 		CTF_TEMP_SHIFT;
1251 
1252 	if (temp & 0x200)
1253 		actual_temp = 255;
1254 	else
1255 		actual_temp = temp & 0x1ff;
1256 
1257 	actual_temp = (actual_temp * 1000);
1258 
1259 	return actual_temp;
1260 }
1261 
1262 #define TAHITI_IO_MC_REGS_SIZE 36
1263 
1264 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1265 	{0x0000006f, 0x03044000},
1266 	{0x00000070, 0x0480c018},
1267 	{0x00000071, 0x00000040},
1268 	{0x00000072, 0x01000000},
1269 	{0x00000074, 0x000000ff},
1270 	{0x00000075, 0x00143400},
1271 	{0x00000076, 0x08ec0800},
1272 	{0x00000077, 0x040000cc},
1273 	{0x00000079, 0x00000000},
1274 	{0x0000007a, 0x21000409},
1275 	{0x0000007c, 0x00000000},
1276 	{0x0000007d, 0xe8000000},
1277 	{0x0000007e, 0x044408a8},
1278 	{0x0000007f, 0x00000003},
1279 	{0x00000080, 0x00000000},
1280 	{0x00000081, 0x01000000},
1281 	{0x00000082, 0x02000000},
1282 	{0x00000083, 0x00000000},
1283 	{0x00000084, 0xe3f3e4f4},
1284 	{0x00000085, 0x00052024},
1285 	{0x00000087, 0x00000000},
1286 	{0x00000088, 0x66036603},
1287 	{0x00000089, 0x01000000},
1288 	{0x0000008b, 0x1c0a0000},
1289 	{0x0000008c, 0xff010000},
1290 	{0x0000008e, 0xffffefff},
1291 	{0x0000008f, 0xfff3efff},
1292 	{0x00000090, 0xfff3efbf},
1293 	{0x00000094, 0x00101101},
1294 	{0x00000095, 0x00000fff},
1295 	{0x00000096, 0x00116fff},
1296 	{0x00000097, 0x60010000},
1297 	{0x00000098, 0x10010000},
1298 	{0x00000099, 0x00006000},
1299 	{0x0000009a, 0x00001000},
1300 	{0x0000009f, 0x00a77400}
1301 };
1302 
1303 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1304 	{0x0000006f, 0x03044000},
1305 	{0x00000070, 0x0480c018},
1306 	{0x00000071, 0x00000040},
1307 	{0x00000072, 0x01000000},
1308 	{0x00000074, 0x000000ff},
1309 	{0x00000075, 0x00143400},
1310 	{0x00000076, 0x08ec0800},
1311 	{0x00000077, 0x040000cc},
1312 	{0x00000079, 0x00000000},
1313 	{0x0000007a, 0x21000409},
1314 	{0x0000007c, 0x00000000},
1315 	{0x0000007d, 0xe8000000},
1316 	{0x0000007e, 0x044408a8},
1317 	{0x0000007f, 0x00000003},
1318 	{0x00000080, 0x00000000},
1319 	{0x00000081, 0x01000000},
1320 	{0x00000082, 0x02000000},
1321 	{0x00000083, 0x00000000},
1322 	{0x00000084, 0xe3f3e4f4},
1323 	{0x00000085, 0x00052024},
1324 	{0x00000087, 0x00000000},
1325 	{0x00000088, 0x66036603},
1326 	{0x00000089, 0x01000000},
1327 	{0x0000008b, 0x1c0a0000},
1328 	{0x0000008c, 0xff010000},
1329 	{0x0000008e, 0xffffefff},
1330 	{0x0000008f, 0xfff3efff},
1331 	{0x00000090, 0xfff3efbf},
1332 	{0x00000094, 0x00101101},
1333 	{0x00000095, 0x00000fff},
1334 	{0x00000096, 0x00116fff},
1335 	{0x00000097, 0x60010000},
1336 	{0x00000098, 0x10010000},
1337 	{0x00000099, 0x00006000},
1338 	{0x0000009a, 0x00001000},
1339 	{0x0000009f, 0x00a47400}
1340 };
1341 
1342 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1343 	{0x0000006f, 0x03044000},
1344 	{0x00000070, 0x0480c018},
1345 	{0x00000071, 0x00000040},
1346 	{0x00000072, 0x01000000},
1347 	{0x00000074, 0x000000ff},
1348 	{0x00000075, 0x00143400},
1349 	{0x00000076, 0x08ec0800},
1350 	{0x00000077, 0x040000cc},
1351 	{0x00000079, 0x00000000},
1352 	{0x0000007a, 0x21000409},
1353 	{0x0000007c, 0x00000000},
1354 	{0x0000007d, 0xe8000000},
1355 	{0x0000007e, 0x044408a8},
1356 	{0x0000007f, 0x00000003},
1357 	{0x00000080, 0x00000000},
1358 	{0x00000081, 0x01000000},
1359 	{0x00000082, 0x02000000},
1360 	{0x00000083, 0x00000000},
1361 	{0x00000084, 0xe3f3e4f4},
1362 	{0x00000085, 0x00052024},
1363 	{0x00000087, 0x00000000},
1364 	{0x00000088, 0x66036603},
1365 	{0x00000089, 0x01000000},
1366 	{0x0000008b, 0x1c0a0000},
1367 	{0x0000008c, 0xff010000},
1368 	{0x0000008e, 0xffffefff},
1369 	{0x0000008f, 0xfff3efff},
1370 	{0x00000090, 0xfff3efbf},
1371 	{0x00000094, 0x00101101},
1372 	{0x00000095, 0x00000fff},
1373 	{0x00000096, 0x00116fff},
1374 	{0x00000097, 0x60010000},
1375 	{0x00000098, 0x10010000},
1376 	{0x00000099, 0x00006000},
1377 	{0x0000009a, 0x00001000},
1378 	{0x0000009f, 0x00a37400}
1379 };
1380 
1381 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1382 	{0x0000006f, 0x03044000},
1383 	{0x00000070, 0x0480c018},
1384 	{0x00000071, 0x00000040},
1385 	{0x00000072, 0x01000000},
1386 	{0x00000074, 0x000000ff},
1387 	{0x00000075, 0x00143400},
1388 	{0x00000076, 0x08ec0800},
1389 	{0x00000077, 0x040000cc},
1390 	{0x00000079, 0x00000000},
1391 	{0x0000007a, 0x21000409},
1392 	{0x0000007c, 0x00000000},
1393 	{0x0000007d, 0xe8000000},
1394 	{0x0000007e, 0x044408a8},
1395 	{0x0000007f, 0x00000003},
1396 	{0x00000080, 0x00000000},
1397 	{0x00000081, 0x01000000},
1398 	{0x00000082, 0x02000000},
1399 	{0x00000083, 0x00000000},
1400 	{0x00000084, 0xe3f3e4f4},
1401 	{0x00000085, 0x00052024},
1402 	{0x00000087, 0x00000000},
1403 	{0x00000088, 0x66036603},
1404 	{0x00000089, 0x01000000},
1405 	{0x0000008b, 0x1c0a0000},
1406 	{0x0000008c, 0xff010000},
1407 	{0x0000008e, 0xffffefff},
1408 	{0x0000008f, 0xfff3efff},
1409 	{0x00000090, 0xfff3efbf},
1410 	{0x00000094, 0x00101101},
1411 	{0x00000095, 0x00000fff},
1412 	{0x00000096, 0x00116fff},
1413 	{0x00000097, 0x60010000},
1414 	{0x00000098, 0x10010000},
1415 	{0x00000099, 0x00006000},
1416 	{0x0000009a, 0x00001000},
1417 	{0x0000009f, 0x00a17730}
1418 };
1419 
1420 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1421 	{0x0000006f, 0x03044000},
1422 	{0x00000070, 0x0480c018},
1423 	{0x00000071, 0x00000040},
1424 	{0x00000072, 0x01000000},
1425 	{0x00000074, 0x000000ff},
1426 	{0x00000075, 0x00143400},
1427 	{0x00000076, 0x08ec0800},
1428 	{0x00000077, 0x040000cc},
1429 	{0x00000079, 0x00000000},
1430 	{0x0000007a, 0x21000409},
1431 	{0x0000007c, 0x00000000},
1432 	{0x0000007d, 0xe8000000},
1433 	{0x0000007e, 0x044408a8},
1434 	{0x0000007f, 0x00000003},
1435 	{0x00000080, 0x00000000},
1436 	{0x00000081, 0x01000000},
1437 	{0x00000082, 0x02000000},
1438 	{0x00000083, 0x00000000},
1439 	{0x00000084, 0xe3f3e4f4},
1440 	{0x00000085, 0x00052024},
1441 	{0x00000087, 0x00000000},
1442 	{0x00000088, 0x66036603},
1443 	{0x00000089, 0x01000000},
1444 	{0x0000008b, 0x1c0a0000},
1445 	{0x0000008c, 0xff010000},
1446 	{0x0000008e, 0xffffefff},
1447 	{0x0000008f, 0xfff3efff},
1448 	{0x00000090, 0xfff3efbf},
1449 	{0x00000094, 0x00101101},
1450 	{0x00000095, 0x00000fff},
1451 	{0x00000096, 0x00116fff},
1452 	{0x00000097, 0x60010000},
1453 	{0x00000098, 0x10010000},
1454 	{0x00000099, 0x00006000},
1455 	{0x0000009a, 0x00001000},
1456 	{0x0000009f, 0x00a07730}
1457 };
1458 
1459 /* ucode loading */
1460 int si_mc_load_microcode(struct radeon_device *rdev)
1461 {
1462 	const __be32 *fw_data;
1463 	u32 running, blackout = 0;
1464 	u32 *io_mc_regs;
1465 	int i, regs_size, ucode_size;
1466 
1467 	if (!rdev->mc_fw)
1468 		return -EINVAL;
1469 
1470 	ucode_size = rdev->mc_fw->datasize / 4;
1471 
1472 	switch (rdev->family) {
1473 	case CHIP_TAHITI:
1474 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1475 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1476 		break;
1477 	case CHIP_PITCAIRN:
1478 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1479 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1480 		break;
1481 	case CHIP_VERDE:
1482 	default:
1483 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1484 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1485 		break;
1486 	case CHIP_OLAND:
1487 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1488 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1489 		break;
1490 	case CHIP_HAINAN:
1491 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1492 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1493 		break;
1494 	}
1495 
1496 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1497 
1498 	if (running == 0) {
1499 		if (running) {
1500 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1501 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1502 		}
1503 
1504 		/* reset the engine and set to writable */
1505 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1506 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1507 
1508 		/* load mc io regs */
1509 		for (i = 0; i < regs_size; i++) {
1510 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1511 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1512 		}
1513 		/* load the MC ucode */
1514 		fw_data = (const __be32 *)rdev->mc_fw->data;
1515 		for (i = 0; i < ucode_size; i++)
1516 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1517 
1518 		/* put the engine back into the active state */
1519 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1520 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1521 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1522 
1523 		/* wait for training to complete */
1524 		for (i = 0; i < rdev->usec_timeout; i++) {
1525 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1526 				break;
1527 			udelay(1);
1528 		}
1529 		for (i = 0; i < rdev->usec_timeout; i++) {
1530 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1531 				break;
1532 			udelay(1);
1533 		}
1534 
1535 		if (running)
1536 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1537 	}
1538 
1539 	return 0;
1540 }
1541 
1542 static int si_init_microcode(struct radeon_device *rdev)
1543 {
1544 	const char *chip_name;
1545 	const char *rlc_chip_name;
1546 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1547 	size_t smc_req_size, mc2_req_size;
1548 	char fw_name[30];
1549 	int err;
1550 
1551 	DRM_DEBUG("\n");
1552 
1553 	switch (rdev->family) {
1554 	case CHIP_TAHITI:
1555 		chip_name = "TAHITI";
1556 		rlc_chip_name = "TAHITI";
1557 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1558 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1559 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1560 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1561 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1562 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1563 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1564 		break;
1565 	case CHIP_PITCAIRN:
1566 		chip_name = "PITCAIRN";
1567 		rlc_chip_name = "PITCAIRN";
1568 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1569 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1570 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1571 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1572 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1573 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1574 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1575 		break;
1576 	case CHIP_VERDE:
1577 		chip_name = "VERDE";
1578 		rlc_chip_name = "VERDE";
1579 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1580 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1581 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1582 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1583 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1584 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1585 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1586 		break;
1587 	case CHIP_OLAND:
1588 		chip_name = "OLAND";
1589 		rlc_chip_name = "OLAND";
1590 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1591 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1592 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1593 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1594 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1595 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1596 		break;
1597 	case CHIP_HAINAN:
1598 		chip_name = "HAINAN";
1599 		rlc_chip_name = "HAINAN";
1600 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1601 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1602 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1603 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1604 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1605 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1606 		break;
1607 	default: BUG();
1608 	}
1609 
1610 	DRM_INFO("Loading %s Microcode\n", chip_name);
1611 
1612 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1613 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1614 	if (err)
1615 		goto out;
1616 	if (rdev->pfp_fw->datasize != pfp_req_size) {
1617 		printk(KERN_ERR
1618 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1619 		       rdev->pfp_fw->datasize, fw_name);
1620 		err = -EINVAL;
1621 		goto out;
1622 	}
1623 
1624 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1625 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1626 	if (err)
1627 		goto out;
1628 	if (rdev->me_fw->datasize != me_req_size) {
1629 		printk(KERN_ERR
1630 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1631 		       rdev->me_fw->datasize, fw_name);
1632 		err = -EINVAL;
1633 	}
1634 
1635 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1636 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1637 	if (err)
1638 		goto out;
1639 	if (rdev->ce_fw->datasize != ce_req_size) {
1640 		printk(KERN_ERR
1641 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1642 		       rdev->ce_fw->datasize, fw_name);
1643 		err = -EINVAL;
1644 	}
1645 
1646 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", rlc_chip_name);
1647 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1648 	if (err)
1649 		goto out;
1650 	if (rdev->rlc_fw->datasize != rlc_req_size) {
1651 		printk(KERN_ERR
1652 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1653 		       rdev->rlc_fw->datasize, fw_name);
1654 		err = -EINVAL;
1655 	}
1656 
1657 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
1658 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1659 	if (err) {
1660 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1661 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1662 		if (err)
1663 			goto out;
1664 	}
1665 	if ((rdev->mc_fw->datasize != mc_req_size) &&
1666 	    (rdev->mc_fw->datasize != mc2_req_size)) {
1667 		printk(KERN_ERR
1668 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1669 		       rdev->mc_fw->datasize, fw_name);
1670 		err = -EINVAL;
1671 	}
1672 	DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
1673 
1674 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1675 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1676 	if (err) {
1677 		printk(KERN_ERR
1678 		       "smc: error loading firmware \"%s\"\n",
1679 		       fw_name);
1680 		release_firmware(rdev->smc_fw);
1681 		rdev->smc_fw = NULL;
1682 		err = 0;
1683 	} else if (rdev->smc_fw->datasize != smc_req_size) {
1684 		printk(KERN_ERR
1685 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1686 		       rdev->smc_fw->datasize, fw_name);
1687 		err = -EINVAL;
1688 	}
1689 
1690 out:
1691 	if (err) {
1692 		if (err != -EINVAL)
1693 			printk(KERN_ERR
1694 			       "si_cp: Failed to load firmware \"%s\"\n",
1695 			       fw_name);
1696 		release_firmware(rdev->pfp_fw);
1697 		rdev->pfp_fw = NULL;
1698 		release_firmware(rdev->me_fw);
1699 		rdev->me_fw = NULL;
1700 		release_firmware(rdev->ce_fw);
1701 		rdev->ce_fw = NULL;
1702 		release_firmware(rdev->rlc_fw);
1703 		rdev->rlc_fw = NULL;
1704 		release_firmware(rdev->mc_fw);
1705 		rdev->mc_fw = NULL;
1706 		release_firmware(rdev->smc_fw);
1707 		rdev->smc_fw = NULL;
1708 	}
1709 	return err;
1710 }
1711 
1712 /**
1713  * si_fini_microcode - drop the firmwares image references
1714  *
1715  * @rdev: radeon_device pointer
1716  *
1717  * Drop the pfp, me, rlc, mc and ce firmware image references.
1718  * Called at driver shutdown.
1719  */
1720 static void si_fini_microcode(struct radeon_device *rdev)
1721 {
1722 	release_firmware(rdev->pfp_fw);
1723 	rdev->pfp_fw = NULL;
1724 	release_firmware(rdev->me_fw);
1725 	rdev->me_fw = NULL;
1726 	release_firmware(rdev->rlc_fw);
1727 	rdev->rlc_fw = NULL;
1728 	release_firmware(rdev->mc_fw);
1729 	rdev->mc_fw = NULL;
1730 	release_firmware(rdev->smc_fw);
1731 	rdev->smc_fw = NULL;
1732 	release_firmware(rdev->ce_fw);
1733 	rdev->ce_fw = NULL;
1734 }
1735 
1736 /* watermark setup */
1737 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1738 				   struct radeon_crtc *radeon_crtc,
1739 				   struct drm_display_mode *mode,
1740 				   struct drm_display_mode *other_mode)
1741 {
1742 	u32 tmp, buffer_alloc, i;
1743 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1744 	/*
1745 	 * Line Buffer Setup
1746 	 * There are 3 line buffers, each one shared by 2 display controllers.
1747 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1748 	 * the display controllers.  The paritioning is done via one of four
1749 	 * preset allocations specified in bits 21:20:
1750 	 *  0 - half lb
1751 	 *  2 - whole lb, other crtc must be disabled
1752 	 */
1753 	/* this can get tricky if we have two large displays on a paired group
1754 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1755 	 * non-linked crtcs for maximum line buffer allocation.
1756 	 */
1757 	if (radeon_crtc->base.enabled && mode) {
1758 		if (other_mode) {
1759 			tmp = 0; /* 1/2 */
1760 			buffer_alloc = 1;
1761 		} else {
1762 			tmp = 2; /* whole */
1763 			buffer_alloc = 2;
1764 		}
1765 	} else {
1766 		tmp = 0;
1767 		buffer_alloc = 0;
1768 	}
1769 
1770 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1771 	       DC_LB_MEMORY_CONFIG(tmp));
1772 
1773 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1774 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1775 	for (i = 0; i < rdev->usec_timeout; i++) {
1776 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1777 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1778 			break;
1779 		udelay(1);
1780 	}
1781 
1782 	if (radeon_crtc->base.enabled && mode) {
1783 		switch (tmp) {
1784 		case 0:
1785 		default:
1786 			return 4096 * 2;
1787 		case 2:
1788 			return 8192 * 2;
1789 		}
1790 	}
1791 
1792 	/* controller not enabled, so no lb used */
1793 	return 0;
1794 }
1795 
1796 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1797 {
1798 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1799 
1800 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1801 	case 0:
1802 	default:
1803 		return 1;
1804 	case 1:
1805 		return 2;
1806 	case 2:
1807 		return 4;
1808 	case 3:
1809 		return 8;
1810 	case 4:
1811 		return 3;
1812 	case 5:
1813 		return 6;
1814 	case 6:
1815 		return 10;
1816 	case 7:
1817 		return 12;
1818 	case 8:
1819 		return 16;
1820 	}
1821 }
1822 
1823 struct dce6_wm_params {
1824 	u32 dram_channels; /* number of dram channels */
1825 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1826 	u32 sclk;          /* engine clock in kHz */
1827 	u32 disp_clk;      /* display clock in kHz */
1828 	u32 src_width;     /* viewport width */
1829 	u32 active_time;   /* active display time in ns */
1830 	u32 blank_time;    /* blank time in ns */
1831 	bool interlaced;    /* mode is interlaced */
1832 	fixed20_12 vsc;    /* vertical scale ratio */
1833 	u32 num_heads;     /* number of active crtcs */
1834 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1835 	u32 lb_size;       /* line buffer allocated to pipe */
1836 	u32 vtaps;         /* vertical scaler taps */
1837 };
1838 
1839 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1840 {
1841 	/* Calculate raw DRAM Bandwidth */
1842 	fixed20_12 dram_efficiency; /* 0.7 */
1843 	fixed20_12 yclk, dram_channels, bandwidth;
1844 	fixed20_12 a;
1845 
1846 	a.full = dfixed_const(1000);
1847 	yclk.full = dfixed_const(wm->yclk);
1848 	yclk.full = dfixed_div(yclk, a);
1849 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1850 	a.full = dfixed_const(10);
1851 	dram_efficiency.full = dfixed_const(7);
1852 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1853 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1854 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1855 
1856 	return dfixed_trunc(bandwidth);
1857 }
1858 
1859 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1860 {
1861 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1862 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1863 	fixed20_12 yclk, dram_channels, bandwidth;
1864 	fixed20_12 a;
1865 
1866 	a.full = dfixed_const(1000);
1867 	yclk.full = dfixed_const(wm->yclk);
1868 	yclk.full = dfixed_div(yclk, a);
1869 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1870 	a.full = dfixed_const(10);
1871 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1872 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1873 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1874 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1875 
1876 	return dfixed_trunc(bandwidth);
1877 }
1878 
1879 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1880 {
1881 	/* Calculate the display Data return Bandwidth */
1882 	fixed20_12 return_efficiency; /* 0.8 */
1883 	fixed20_12 sclk, bandwidth;
1884 	fixed20_12 a;
1885 
1886 	a.full = dfixed_const(1000);
1887 	sclk.full = dfixed_const(wm->sclk);
1888 	sclk.full = dfixed_div(sclk, a);
1889 	a.full = dfixed_const(10);
1890 	return_efficiency.full = dfixed_const(8);
1891 	return_efficiency.full = dfixed_div(return_efficiency, a);
1892 	a.full = dfixed_const(32);
1893 	bandwidth.full = dfixed_mul(a, sclk);
1894 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1895 
1896 	return dfixed_trunc(bandwidth);
1897 }
1898 
1899 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1900 {
1901 	return 32;
1902 }
1903 
1904 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1905 {
1906 	/* Calculate the DMIF Request Bandwidth */
1907 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1908 	fixed20_12 disp_clk, sclk, bandwidth;
1909 	fixed20_12 a, b1, b2;
1910 	u32 min_bandwidth;
1911 
1912 	a.full = dfixed_const(1000);
1913 	disp_clk.full = dfixed_const(wm->disp_clk);
1914 	disp_clk.full = dfixed_div(disp_clk, a);
1915 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1916 	b1.full = dfixed_mul(a, disp_clk);
1917 
1918 	a.full = dfixed_const(1000);
1919 	sclk.full = dfixed_const(wm->sclk);
1920 	sclk.full = dfixed_div(sclk, a);
1921 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1922 	b2.full = dfixed_mul(a, sclk);
1923 
1924 	a.full = dfixed_const(10);
1925 	disp_clk_request_efficiency.full = dfixed_const(8);
1926 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1927 
1928 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1929 
1930 	a.full = dfixed_const(min_bandwidth);
1931 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1932 
1933 	return dfixed_trunc(bandwidth);
1934 }
1935 
1936 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1937 {
1938 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1939 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1940 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1941 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1942 
1943 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1944 }
1945 
1946 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1947 {
1948 	/* Calculate the display mode Average Bandwidth
1949 	 * DisplayMode should contain the source and destination dimensions,
1950 	 * timing, etc.
1951 	 */
1952 	fixed20_12 bpp;
1953 	fixed20_12 line_time;
1954 	fixed20_12 src_width;
1955 	fixed20_12 bandwidth;
1956 	fixed20_12 a;
1957 
1958 	a.full = dfixed_const(1000);
1959 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1960 	line_time.full = dfixed_div(line_time, a);
1961 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1962 	src_width.full = dfixed_const(wm->src_width);
1963 	bandwidth.full = dfixed_mul(src_width, bpp);
1964 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1965 	bandwidth.full = dfixed_div(bandwidth, line_time);
1966 
1967 	return dfixed_trunc(bandwidth);
1968 }
1969 
1970 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1971 {
1972 	/* First calcualte the latency in ns */
1973 	u32 mc_latency = 2000; /* 2000 ns. */
1974 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1975 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1976 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1977 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1978 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1979 		(wm->num_heads * cursor_line_pair_return_time);
1980 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1981 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1982 	u32 tmp, dmif_size = 12288;
1983 	fixed20_12 a, b, c;
1984 
1985 	if (wm->num_heads == 0)
1986 		return 0;
1987 
1988 	a.full = dfixed_const(2);
1989 	b.full = dfixed_const(1);
1990 	if ((wm->vsc.full > a.full) ||
1991 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1992 	    (wm->vtaps >= 5) ||
1993 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1994 		max_src_lines_per_dst_line = 4;
1995 	else
1996 		max_src_lines_per_dst_line = 2;
1997 
1998 	a.full = dfixed_const(available_bandwidth);
1999 	b.full = dfixed_const(wm->num_heads);
2000 	a.full = dfixed_div(a, b);
2001 
2002 	b.full = dfixed_const(mc_latency + 512);
2003 	c.full = dfixed_const(wm->disp_clk);
2004 	b.full = dfixed_div(b, c);
2005 
2006 	c.full = dfixed_const(dmif_size);
2007 	b.full = dfixed_div(c, b);
2008 
2009 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2010 
2011 	b.full = dfixed_const(1000);
2012 	c.full = dfixed_const(wm->disp_clk);
2013 	b.full = dfixed_div(c, b);
2014 	c.full = dfixed_const(wm->bytes_per_pixel);
2015 	b.full = dfixed_mul(b, c);
2016 
2017 	lb_fill_bw = min(tmp, dfixed_trunc(b));
2018 
2019 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2020 	b.full = dfixed_const(1000);
2021 	c.full = dfixed_const(lb_fill_bw);
2022 	b.full = dfixed_div(c, b);
2023 	a.full = dfixed_div(a, b);
2024 	line_fill_time = dfixed_trunc(a);
2025 
2026 	if (line_fill_time < wm->active_time)
2027 		return latency;
2028 	else
2029 		return latency + (line_fill_time - wm->active_time);
2030 
2031 }
2032 
2033 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2034 {
2035 	if (dce6_average_bandwidth(wm) <=
2036 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2037 		return true;
2038 	else
2039 		return false;
2040 };
2041 
2042 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2043 {
2044 	if (dce6_average_bandwidth(wm) <=
2045 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2046 		return true;
2047 	else
2048 		return false;
2049 };
2050 
2051 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2052 {
2053 	u32 lb_partitions = wm->lb_size / wm->src_width;
2054 	u32 line_time = wm->active_time + wm->blank_time;
2055 	u32 latency_tolerant_lines;
2056 	u32 latency_hiding;
2057 	fixed20_12 a;
2058 
2059 	a.full = dfixed_const(1);
2060 	if (wm->vsc.full > a.full)
2061 		latency_tolerant_lines = 1;
2062 	else {
2063 		if (lb_partitions <= (wm->vtaps + 1))
2064 			latency_tolerant_lines = 1;
2065 		else
2066 			latency_tolerant_lines = 2;
2067 	}
2068 
2069 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2070 
2071 	if (dce6_latency_watermark(wm) <= latency_hiding)
2072 		return true;
2073 	else
2074 		return false;
2075 }
2076 
2077 static void dce6_program_watermarks(struct radeon_device *rdev,
2078 					 struct radeon_crtc *radeon_crtc,
2079 					 u32 lb_size, u32 num_heads)
2080 {
2081 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2082 	struct dce6_wm_params wm_low, wm_high;
2083 	u32 dram_channels;
2084 	u32 pixel_period;
2085 	u32 line_time = 0;
2086 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2087 	u32 priority_a_mark = 0, priority_b_mark = 0;
2088 	u32 priority_a_cnt = PRIORITY_OFF;
2089 	u32 priority_b_cnt = PRIORITY_OFF;
2090 	u32 tmp, arb_control3;
2091 	fixed20_12 a, b, c;
2092 
2093 	if (radeon_crtc->base.enabled && num_heads && mode) {
2094 		pixel_period = 1000000 / (u32)mode->clock;
2095 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2096 		priority_a_cnt = 0;
2097 		priority_b_cnt = 0;
2098 
2099 		if (rdev->family == CHIP_ARUBA)
2100 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2101 		else
2102 			dram_channels = si_get_number_of_dram_channels(rdev);
2103 
2104 		/* watermark for high clocks */
2105 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2106 			wm_high.yclk =
2107 				radeon_dpm_get_mclk(rdev, false) * 10;
2108 			wm_high.sclk =
2109 				radeon_dpm_get_sclk(rdev, false) * 10;
2110 		} else {
2111 			wm_high.yclk = rdev->pm.current_mclk * 10;
2112 			wm_high.sclk = rdev->pm.current_sclk * 10;
2113 		}
2114 
2115 		wm_high.disp_clk = mode->clock;
2116 		wm_high.src_width = mode->crtc_hdisplay;
2117 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2118 		wm_high.blank_time = line_time - wm_high.active_time;
2119 		wm_high.interlaced = false;
2120 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2121 			wm_high.interlaced = true;
2122 		wm_high.vsc = radeon_crtc->vsc;
2123 		wm_high.vtaps = 1;
2124 		if (radeon_crtc->rmx_type != RMX_OFF)
2125 			wm_high.vtaps = 2;
2126 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2127 		wm_high.lb_size = lb_size;
2128 		wm_high.dram_channels = dram_channels;
2129 		wm_high.num_heads = num_heads;
2130 
2131 		/* watermark for low clocks */
2132 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2133 			wm_low.yclk =
2134 				radeon_dpm_get_mclk(rdev, true) * 10;
2135 			wm_low.sclk =
2136 				radeon_dpm_get_sclk(rdev, true) * 10;
2137 		} else {
2138 			wm_low.yclk = rdev->pm.current_mclk * 10;
2139 			wm_low.sclk = rdev->pm.current_sclk * 10;
2140 		}
2141 
2142 		wm_low.disp_clk = mode->clock;
2143 		wm_low.src_width = mode->crtc_hdisplay;
2144 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2145 		wm_low.blank_time = line_time - wm_low.active_time;
2146 		wm_low.interlaced = false;
2147 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2148 			wm_low.interlaced = true;
2149 		wm_low.vsc = radeon_crtc->vsc;
2150 		wm_low.vtaps = 1;
2151 		if (radeon_crtc->rmx_type != RMX_OFF)
2152 			wm_low.vtaps = 2;
2153 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2154 		wm_low.lb_size = lb_size;
2155 		wm_low.dram_channels = dram_channels;
2156 		wm_low.num_heads = num_heads;
2157 
2158 		/* set for high clocks */
2159 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2160 		/* set for low clocks */
2161 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2162 
2163 		/* possibly force display priority to high */
2164 		/* should really do this at mode validation time... */
2165 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2166 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2167 		    !dce6_check_latency_hiding(&wm_high) ||
2168 		    (rdev->disp_priority == 2)) {
2169 			DRM_DEBUG_KMS("force priority to high\n");
2170 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2171 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2172 		}
2173 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2174 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2175 		    !dce6_check_latency_hiding(&wm_low) ||
2176 		    (rdev->disp_priority == 2)) {
2177 			DRM_DEBUG_KMS("force priority to high\n");
2178 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2179 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2180 		}
2181 
2182 		a.full = dfixed_const(1000);
2183 		b.full = dfixed_const(mode->clock);
2184 		b.full = dfixed_div(b, a);
2185 		c.full = dfixed_const(latency_watermark_a);
2186 		c.full = dfixed_mul(c, b);
2187 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2188 		c.full = dfixed_div(c, a);
2189 		a.full = dfixed_const(16);
2190 		c.full = dfixed_div(c, a);
2191 		priority_a_mark = dfixed_trunc(c);
2192 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2193 
2194 		a.full = dfixed_const(1000);
2195 		b.full = dfixed_const(mode->clock);
2196 		b.full = dfixed_div(b, a);
2197 		c.full = dfixed_const(latency_watermark_b);
2198 		c.full = dfixed_mul(c, b);
2199 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2200 		c.full = dfixed_div(c, a);
2201 		a.full = dfixed_const(16);
2202 		c.full = dfixed_div(c, a);
2203 		priority_b_mark = dfixed_trunc(c);
2204 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2205 	}
2206 
2207 	/* select wm A */
2208 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2209 	tmp = arb_control3;
2210 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2211 	tmp |= LATENCY_WATERMARK_MASK(1);
2212 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2213 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2214 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2215 		LATENCY_HIGH_WATERMARK(line_time)));
2216 	/* select wm B */
2217 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2218 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2219 	tmp |= LATENCY_WATERMARK_MASK(2);
2220 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2221 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2222 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2223 		LATENCY_HIGH_WATERMARK(line_time)));
2224 	/* restore original selection */
2225 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2226 
2227 	/* write the priority marks */
2228 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2229 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2230 
2231 	/* save values for DPM */
2232 	radeon_crtc->line_time = line_time;
2233 	radeon_crtc->wm_high = latency_watermark_a;
2234 	radeon_crtc->wm_low = latency_watermark_b;
2235 }
2236 
2237 void dce6_bandwidth_update(struct radeon_device *rdev)
2238 {
2239 	struct drm_display_mode *mode0 = NULL;
2240 	struct drm_display_mode *mode1 = NULL;
2241 	u32 num_heads = 0, lb_size;
2242 	int i;
2243 
2244 	radeon_update_display_priority(rdev);
2245 
2246 	for (i = 0; i < rdev->num_crtc; i++) {
2247 		if (rdev->mode_info.crtcs[i]->base.enabled)
2248 			num_heads++;
2249 	}
2250 	for (i = 0; i < rdev->num_crtc; i += 2) {
2251 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2252 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2253 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2254 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2255 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2256 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2257 	}
2258 }
2259 
2260 /*
2261  * Core functions
2262  */
2263 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2264 {
2265 	const u32 num_tile_mode_states = 32;
2266 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2267 
2268 	switch (rdev->config.si.mem_row_size_in_kb) {
2269 	case 1:
2270 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2271 		break;
2272 	case 2:
2273 	default:
2274 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2275 		break;
2276 	case 4:
2277 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2278 		break;
2279 	}
2280 
2281 	if ((rdev->family == CHIP_TAHITI) ||
2282 	    (rdev->family == CHIP_PITCAIRN)) {
2283 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2284 			switch (reg_offset) {
2285 			case 0:  /* non-AA compressed depth or any compressed stencil */
2286 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2288 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2289 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2290 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2291 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2294 				break;
2295 			case 1:  /* 2xAA/4xAA compressed depth only */
2296 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2297 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2298 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2299 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2300 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2301 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2303 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2304 				break;
2305 			case 2:  /* 8xAA compressed depth only */
2306 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2308 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2309 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2310 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2311 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2312 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2313 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2314 				break;
2315 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2316 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2318 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2319 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2320 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2321 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2322 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2323 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2324 				break;
2325 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2326 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2327 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2328 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2329 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2330 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2331 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2333 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2334 				break;
2335 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2336 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2339 						 TILE_SPLIT(split_equal_to_row_size) |
2340 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2341 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2342 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2343 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2344 				break;
2345 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2346 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2347 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2348 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2349 						 TILE_SPLIT(split_equal_to_row_size) |
2350 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2351 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2354 				break;
2355 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2356 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2358 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2359 						 TILE_SPLIT(split_equal_to_row_size) |
2360 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2361 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2363 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2364 				break;
2365 			case 8:  /* 1D and 1D Array Surfaces */
2366 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2367 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2368 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2369 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2370 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2371 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2374 				break;
2375 			case 9:  /* Displayable maps. */
2376 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2377 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2378 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2379 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2380 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2381 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2382 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2383 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2384 				break;
2385 			case 10:  /* Display 8bpp. */
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2389 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2390 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2391 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2394 				break;
2395 			case 11:  /* Display 16bpp. */
2396 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2397 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2398 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2399 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2400 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2401 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2402 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2403 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2404 				break;
2405 			case 12:  /* Display 32bpp. */
2406 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2409 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2410 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2411 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2413 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2414 				break;
2415 			case 13:  /* Thin. */
2416 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2417 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2418 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2419 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2420 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2421 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2423 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2424 				break;
2425 			case 14:  /* Thin 8 bpp. */
2426 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2427 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2428 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2429 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2430 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2431 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2432 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2433 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2434 				break;
2435 			case 15:  /* Thin 16 bpp. */
2436 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2438 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2439 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2440 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2441 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2443 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2444 				break;
2445 			case 16:  /* Thin 32 bpp. */
2446 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2448 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2449 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2450 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2451 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2453 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2454 				break;
2455 			case 17:  /* Thin 64 bpp. */
2456 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2458 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2459 						 TILE_SPLIT(split_equal_to_row_size) |
2460 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2461 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2464 				break;
2465 			case 21:  /* 8 bpp PRT. */
2466 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2467 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2468 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2469 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2470 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2471 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2472 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2473 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2474 				break;
2475 			case 22:  /* 16 bpp PRT */
2476 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2478 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2479 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2480 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2481 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2483 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2484 				break;
2485 			case 23:  /* 32 bpp PRT */
2486 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2487 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2488 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2489 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2490 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2491 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2493 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2494 				break;
2495 			case 24:  /* 64 bpp PRT */
2496 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2498 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2499 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2500 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2501 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2504 				break;
2505 			case 25:  /* 128 bpp PRT */
2506 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2507 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2508 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2509 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2510 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2511 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2513 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2514 				break;
2515 			default:
2516 				gb_tile_moden = 0;
2517 				break;
2518 			}
2519 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2520 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2521 		}
2522 	} else if ((rdev->family == CHIP_VERDE) ||
2523 		   (rdev->family == CHIP_OLAND) ||
2524 		   (rdev->family == CHIP_HAINAN)) {
2525 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2526 			switch (reg_offset) {
2527 			case 0:  /* non-AA compressed depth or any compressed stencil */
2528 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2530 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2531 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2532 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2533 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2535 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2536 				break;
2537 			case 1:  /* 2xAA/4xAA compressed depth only */
2538 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2540 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2541 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2542 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2543 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2546 				break;
2547 			case 2:  /* 8xAA compressed depth only */
2548 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2550 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2551 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2552 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2553 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2555 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2556 				break;
2557 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2558 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2559 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2560 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2561 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2562 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2563 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2565 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2566 				break;
2567 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2568 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2569 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2570 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2571 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2572 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2573 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2575 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2576 				break;
2577 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2578 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2580 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2581 						 TILE_SPLIT(split_equal_to_row_size) |
2582 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2583 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2586 				break;
2587 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2588 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2590 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591 						 TILE_SPLIT(split_equal_to_row_size) |
2592 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2593 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2595 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2596 				break;
2597 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2598 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2599 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2600 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601 						 TILE_SPLIT(split_equal_to_row_size) |
2602 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2603 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2605 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2606 				break;
2607 			case 8:  /* 1D and 1D Array Surfaces */
2608 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2609 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2610 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2611 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2612 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2613 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2614 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2615 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2616 				break;
2617 			case 9:  /* Displayable maps. */
2618 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2619 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2622 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2623 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2625 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2626 				break;
2627 			case 10:  /* Display 8bpp. */
2628 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2629 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2631 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2632 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2633 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2635 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2636 				break;
2637 			case 11:  /* Display 16bpp. */
2638 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2639 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2640 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2641 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2642 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2643 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2646 				break;
2647 			case 12:  /* Display 32bpp. */
2648 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2650 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2651 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2652 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2653 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2655 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2656 				break;
2657 			case 13:  /* Thin. */
2658 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2659 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2660 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2661 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2662 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2663 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2665 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2666 				break;
2667 			case 14:  /* Thin 8 bpp. */
2668 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2670 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2672 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2673 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2675 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2676 				break;
2677 			case 15:  /* Thin 16 bpp. */
2678 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2680 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2681 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2682 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2683 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2685 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2686 				break;
2687 			case 16:  /* Thin 32 bpp. */
2688 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2690 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2691 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2692 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2693 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2694 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2695 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2696 				break;
2697 			case 17:  /* Thin 64 bpp. */
2698 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2700 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2701 						 TILE_SPLIT(split_equal_to_row_size) |
2702 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2703 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2706 				break;
2707 			case 21:  /* 8 bpp PRT. */
2708 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2710 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2711 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2712 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2713 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2714 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2715 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2716 				break;
2717 			case 22:  /* 16 bpp PRT */
2718 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2719 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2720 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2721 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2722 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2723 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2725 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2726 				break;
2727 			case 23:  /* 32 bpp PRT */
2728 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2730 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2731 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2732 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2733 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2735 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2736 				break;
2737 			case 24:  /* 64 bpp PRT */
2738 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2739 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2740 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2741 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2742 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2743 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2745 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2746 				break;
2747 			case 25:  /* 128 bpp PRT */
2748 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2749 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2750 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2751 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2752 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2753 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2755 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2756 				break;
2757 			default:
2758 				gb_tile_moden = 0;
2759 				break;
2760 			}
2761 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2762 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2763 		}
2764 	} else
2765 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2766 }
2767 
2768 static void si_select_se_sh(struct radeon_device *rdev,
2769 			    u32 se_num, u32 sh_num)
2770 {
2771 	u32 data = INSTANCE_BROADCAST_WRITES;
2772 
2773 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2774 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2775 	else if (se_num == 0xffffffff)
2776 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2777 	else if (sh_num == 0xffffffff)
2778 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2779 	else
2780 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2781 	WREG32(GRBM_GFX_INDEX, data);
2782 }
2783 
2784 static u32 si_create_bitmask(u32 bit_width)
2785 {
2786 	u32 i, mask = 0;
2787 
2788 	for (i = 0; i < bit_width; i++) {
2789 		mask <<= 1;
2790 		mask |= 1;
2791 	}
2792 	return mask;
2793 }
2794 
2795 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2796 {
2797 	u32 data, mask;
2798 
2799 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2800 	if (data & 1)
2801 		data &= INACTIVE_CUS_MASK;
2802 	else
2803 		data = 0;
2804 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2805 
2806 	data >>= INACTIVE_CUS_SHIFT;
2807 
2808 	mask = si_create_bitmask(cu_per_sh);
2809 
2810 	return ~data & mask;
2811 }
2812 
2813 static void si_setup_spi(struct radeon_device *rdev,
2814 			 u32 se_num, u32 sh_per_se,
2815 			 u32 cu_per_sh)
2816 {
2817 	int i, j, k;
2818 	u32 data, mask, active_cu;
2819 
2820 	for (i = 0; i < se_num; i++) {
2821 		for (j = 0; j < sh_per_se; j++) {
2822 			si_select_se_sh(rdev, i, j);
2823 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2824 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2825 
2826 			mask = 1;
2827 			for (k = 0; k < 16; k++) {
2828 				mask <<= k;
2829 				if (active_cu & mask) {
2830 					data &= ~mask;
2831 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2832 					break;
2833 				}
2834 			}
2835 		}
2836 	}
2837 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2838 }
2839 
2840 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2841 			      u32 max_rb_num_per_se,
2842 			      u32 sh_per_se)
2843 {
2844 	u32 data, mask;
2845 
2846 	data = RREG32(CC_RB_BACKEND_DISABLE);
2847 	if (data & 1)
2848 		data &= BACKEND_DISABLE_MASK;
2849 	else
2850 		data = 0;
2851 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2852 
2853 	data >>= BACKEND_DISABLE_SHIFT;
2854 
2855 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
2856 
2857 	return data & mask;
2858 }
2859 
2860 static void si_setup_rb(struct radeon_device *rdev,
2861 			u32 se_num, u32 sh_per_se,
2862 			u32 max_rb_num_per_se)
2863 {
2864 	int i, j;
2865 	u32 data, mask;
2866 	u32 disabled_rbs = 0;
2867 	u32 enabled_rbs = 0;
2868 
2869 	for (i = 0; i < se_num; i++) {
2870 		for (j = 0; j < sh_per_se; j++) {
2871 			si_select_se_sh(rdev, i, j);
2872 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
2873 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2874 		}
2875 	}
2876 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2877 
2878 	mask = 1;
2879 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2880 		if (!(disabled_rbs & mask))
2881 			enabled_rbs |= mask;
2882 		mask <<= 1;
2883 	}
2884 
2885 	rdev->config.si.backend_enable_mask = enabled_rbs;
2886 
2887 	for (i = 0; i < se_num; i++) {
2888 		si_select_se_sh(rdev, i, 0xffffffff);
2889 		data = 0;
2890 		for (j = 0; j < sh_per_se; j++) {
2891 			switch (enabled_rbs & 3) {
2892 			case 1:
2893 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2894 				break;
2895 			case 2:
2896 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2897 				break;
2898 			case 3:
2899 			default:
2900 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2901 				break;
2902 			}
2903 			enabled_rbs >>= 2;
2904 		}
2905 		WREG32(PA_SC_RASTER_CONFIG, data);
2906 	}
2907 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2908 }
2909 
2910 static void si_gpu_init(struct radeon_device *rdev)
2911 {
2912 	u32 gb_addr_config = 0;
2913 	u32 mc_shared_chmap, mc_arb_ramcfg;
2914 	u32 sx_debug_1;
2915 	u32 hdp_host_path_cntl;
2916 	u32 tmp;
2917 	int i, j;
2918 
2919 	switch (rdev->family) {
2920 	case CHIP_TAHITI:
2921 		rdev->config.si.max_shader_engines = 2;
2922 		rdev->config.si.max_tile_pipes = 12;
2923 		rdev->config.si.max_cu_per_sh = 8;
2924 		rdev->config.si.max_sh_per_se = 2;
2925 		rdev->config.si.max_backends_per_se = 4;
2926 		rdev->config.si.max_texture_channel_caches = 12;
2927 		rdev->config.si.max_gprs = 256;
2928 		rdev->config.si.max_gs_threads = 32;
2929 		rdev->config.si.max_hw_contexts = 8;
2930 
2931 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2932 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2933 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2934 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2935 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2936 		break;
2937 	case CHIP_PITCAIRN:
2938 		rdev->config.si.max_shader_engines = 2;
2939 		rdev->config.si.max_tile_pipes = 8;
2940 		rdev->config.si.max_cu_per_sh = 5;
2941 		rdev->config.si.max_sh_per_se = 2;
2942 		rdev->config.si.max_backends_per_se = 4;
2943 		rdev->config.si.max_texture_channel_caches = 8;
2944 		rdev->config.si.max_gprs = 256;
2945 		rdev->config.si.max_gs_threads = 32;
2946 		rdev->config.si.max_hw_contexts = 8;
2947 
2948 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2949 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2950 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2951 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2952 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2953 		break;
2954 	case CHIP_VERDE:
2955 	default:
2956 		rdev->config.si.max_shader_engines = 1;
2957 		rdev->config.si.max_tile_pipes = 4;
2958 		rdev->config.si.max_cu_per_sh = 5;
2959 		rdev->config.si.max_sh_per_se = 2;
2960 		rdev->config.si.max_backends_per_se = 4;
2961 		rdev->config.si.max_texture_channel_caches = 4;
2962 		rdev->config.si.max_gprs = 256;
2963 		rdev->config.si.max_gs_threads = 32;
2964 		rdev->config.si.max_hw_contexts = 8;
2965 
2966 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2967 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2968 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2969 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2970 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2971 		break;
2972 	case CHIP_OLAND:
2973 		rdev->config.si.max_shader_engines = 1;
2974 		rdev->config.si.max_tile_pipes = 4;
2975 		rdev->config.si.max_cu_per_sh = 6;
2976 		rdev->config.si.max_sh_per_se = 1;
2977 		rdev->config.si.max_backends_per_se = 2;
2978 		rdev->config.si.max_texture_channel_caches = 4;
2979 		rdev->config.si.max_gprs = 256;
2980 		rdev->config.si.max_gs_threads = 16;
2981 		rdev->config.si.max_hw_contexts = 8;
2982 
2983 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2984 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2985 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2986 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2987 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2988 		break;
2989 	case CHIP_HAINAN:
2990 		rdev->config.si.max_shader_engines = 1;
2991 		rdev->config.si.max_tile_pipes = 4;
2992 		rdev->config.si.max_cu_per_sh = 5;
2993 		rdev->config.si.max_sh_per_se = 1;
2994 		rdev->config.si.max_backends_per_se = 1;
2995 		rdev->config.si.max_texture_channel_caches = 2;
2996 		rdev->config.si.max_gprs = 256;
2997 		rdev->config.si.max_gs_threads = 16;
2998 		rdev->config.si.max_hw_contexts = 8;
2999 
3000 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3001 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3002 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3003 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3004 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3005 		break;
3006 	}
3007 
3008 	/* Initialize HDP */
3009 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3010 		WREG32((0x2c14 + j), 0x00000000);
3011 		WREG32((0x2c18 + j), 0x00000000);
3012 		WREG32((0x2c1c + j), 0x00000000);
3013 		WREG32((0x2c20 + j), 0x00000000);
3014 		WREG32((0x2c24 + j), 0x00000000);
3015 	}
3016 
3017 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3018 
3019 	evergreen_fix_pci_max_read_req_size(rdev);
3020 
3021 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3022 
3023 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3024 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3025 
3026 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3027 	rdev->config.si.mem_max_burst_length_bytes = 256;
3028 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3029 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3030 	if (rdev->config.si.mem_row_size_in_kb > 4)
3031 		rdev->config.si.mem_row_size_in_kb = 4;
3032 	/* XXX use MC settings? */
3033 	rdev->config.si.shader_engine_tile_size = 32;
3034 	rdev->config.si.num_gpus = 1;
3035 	rdev->config.si.multi_gpu_tile_size = 64;
3036 
3037 	/* fix up row size */
3038 	gb_addr_config &= ~ROW_SIZE_MASK;
3039 	switch (rdev->config.si.mem_row_size_in_kb) {
3040 	case 1:
3041 	default:
3042 		gb_addr_config |= ROW_SIZE(0);
3043 		break;
3044 	case 2:
3045 		gb_addr_config |= ROW_SIZE(1);
3046 		break;
3047 	case 4:
3048 		gb_addr_config |= ROW_SIZE(2);
3049 		break;
3050 	}
3051 
3052 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3053 	 * not have bank info, so create a custom tiling dword.
3054 	 * bits 3:0   num_pipes
3055 	 * bits 7:4   num_banks
3056 	 * bits 11:8  group_size
3057 	 * bits 15:12 row_size
3058 	 */
3059 	rdev->config.si.tile_config = 0;
3060 	switch (rdev->config.si.num_tile_pipes) {
3061 	case 1:
3062 		rdev->config.si.tile_config |= (0 << 0);
3063 		break;
3064 	case 2:
3065 		rdev->config.si.tile_config |= (1 << 0);
3066 		break;
3067 	case 4:
3068 		rdev->config.si.tile_config |= (2 << 0);
3069 		break;
3070 	case 8:
3071 	default:
3072 		/* XXX what about 12? */
3073 		rdev->config.si.tile_config |= (3 << 0);
3074 		break;
3075 	}
3076 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3077 	case 0: /* four banks */
3078 		rdev->config.si.tile_config |= 0 << 4;
3079 		break;
3080 	case 1: /* eight banks */
3081 		rdev->config.si.tile_config |= 1 << 4;
3082 		break;
3083 	case 2: /* sixteen banks */
3084 	default:
3085 		rdev->config.si.tile_config |= 2 << 4;
3086 		break;
3087 	}
3088 	rdev->config.si.tile_config |=
3089 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3090 	rdev->config.si.tile_config |=
3091 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3092 
3093 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3094 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3095 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3096 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3097 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3098 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3099 	if (rdev->has_uvd) {
3100 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3101 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3102 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3103 	}
3104 
3105 	si_tiling_mode_table_init(rdev);
3106 
3107 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3108 		    rdev->config.si.max_sh_per_se,
3109 		    rdev->config.si.max_backends_per_se);
3110 
3111 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3112 		     rdev->config.si.max_sh_per_se,
3113 		     rdev->config.si.max_cu_per_sh);
3114 
3115 	rdev->config.si.active_cus = 0;
3116 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3117 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3118 			rdev->config.si.active_cus +=
3119 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3120 		}
3121 	}
3122 
3123 	/* set HW defaults for 3D engine */
3124 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3125 				     ROQ_IB2_START(0x2b)));
3126 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3127 
3128 	sx_debug_1 = RREG32(SX_DEBUG_1);
3129 	WREG32(SX_DEBUG_1, sx_debug_1);
3130 
3131 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3132 
3133 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3134 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3135 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3136 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3137 
3138 	WREG32(VGT_NUM_INSTANCES, 1);
3139 
3140 	WREG32(CP_PERFMON_CNTL, 0);
3141 
3142 	WREG32(SQ_CONFIG, 0);
3143 
3144 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3145 					  FORCE_EOV_MAX_REZ_CNT(255)));
3146 
3147 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3148 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3149 
3150 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3151 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3152 
3153 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3154 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3155 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3156 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3157 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3158 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3159 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3160 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3161 
3162 	tmp = RREG32(HDP_MISC_CNTL);
3163 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3164 	WREG32(HDP_MISC_CNTL, tmp);
3165 
3166 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3167 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3168 
3169 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3170 
3171 	udelay(50);
3172 }
3173 
3174 /*
3175  * GPU scratch registers helpers function.
3176  */
3177 static void si_scratch_init(struct radeon_device *rdev)
3178 {
3179 	int i;
3180 
3181 	rdev->scratch.num_reg = 7;
3182 	rdev->scratch.reg_base = SCRATCH_REG0;
3183 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3184 		rdev->scratch.free[i] = true;
3185 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3186 	}
3187 }
3188 
3189 void si_fence_ring_emit(struct radeon_device *rdev,
3190 			struct radeon_fence *fence)
3191 {
3192 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3193 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3194 
3195 	/* flush read cache over gart */
3196 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3197 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3198 	radeon_ring_write(ring, 0);
3199 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3200 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3201 			  PACKET3_TC_ACTION_ENA |
3202 			  PACKET3_SH_KCACHE_ACTION_ENA |
3203 			  PACKET3_SH_ICACHE_ACTION_ENA);
3204 	radeon_ring_write(ring, 0xFFFFFFFF);
3205 	radeon_ring_write(ring, 0);
3206 	radeon_ring_write(ring, 10); /* poll interval */
3207 	/* EVENT_WRITE_EOP - flush caches, send int */
3208 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3209 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3210 	radeon_ring_write(ring, lower_32_bits(addr));
3211 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3212 	radeon_ring_write(ring, fence->seq);
3213 	radeon_ring_write(ring, 0);
3214 }
3215 
3216 /*
3217  * IB stuff
3218  */
3219 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3220 {
3221 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3222 	u32 header;
3223 
3224 	if (ib->is_const_ib) {
3225 		/* set switch buffer packet before const IB */
3226 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3227 		radeon_ring_write(ring, 0);
3228 
3229 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3230 	} else {
3231 		u32 next_rptr;
3232 		if (ring->rptr_save_reg) {
3233 			next_rptr = ring->wptr + 3 + 4 + 8;
3234 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3235 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3236 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3237 			radeon_ring_write(ring, next_rptr);
3238 		} else if (rdev->wb.enabled) {
3239 			next_rptr = ring->wptr + 5 + 4 + 8;
3240 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3241 			radeon_ring_write(ring, (1 << 8));
3242 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3243 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3244 			radeon_ring_write(ring, next_rptr);
3245 		}
3246 
3247 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3248 	}
3249 
3250 	radeon_ring_write(ring, header);
3251 	radeon_ring_write(ring,
3252 #ifdef __BIG_ENDIAN
3253 			  (2 << 0) |
3254 #endif
3255 			  (ib->gpu_addr & 0xFFFFFFFC));
3256 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3257 	radeon_ring_write(ring, ib->length_dw |
3258 			  (ib->vm ? (ib->vm->id << 24) : 0));
3259 
3260 	if (!ib->is_const_ib) {
3261 		/* flush read cache over gart for this vmid */
3262 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3263 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3264 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3265 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3266 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3267 				  PACKET3_TC_ACTION_ENA |
3268 				  PACKET3_SH_KCACHE_ACTION_ENA |
3269 				  PACKET3_SH_ICACHE_ACTION_ENA);
3270 		radeon_ring_write(ring, 0xFFFFFFFF);
3271 		radeon_ring_write(ring, 0);
3272 		radeon_ring_write(ring, 10); /* poll interval */
3273 	}
3274 }
3275 
3276 /*
3277  * CP.
3278  */
3279 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3280 {
3281 	if (enable)
3282 		WREG32(CP_ME_CNTL, 0);
3283 	else {
3284 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3285 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3286 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3287 		WREG32(SCRATCH_UMSK, 0);
3288 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3289 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3290 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3291 	}
3292 	udelay(50);
3293 }
3294 
3295 static int si_cp_load_microcode(struct radeon_device *rdev)
3296 {
3297 	const __be32 *fw_data;
3298 	int i;
3299 
3300 	if (!rdev->me_fw || !rdev->pfp_fw)
3301 		return -EINVAL;
3302 
3303 	si_cp_enable(rdev, false);
3304 
3305 	/* PFP */
3306 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3307 	WREG32(CP_PFP_UCODE_ADDR, 0);
3308 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3309 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3310 	WREG32(CP_PFP_UCODE_ADDR, 0);
3311 
3312 	/* CE */
3313 	fw_data = (const __be32 *)rdev->ce_fw->data;
3314 	WREG32(CP_CE_UCODE_ADDR, 0);
3315 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3316 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3317 	WREG32(CP_CE_UCODE_ADDR, 0);
3318 
3319 	/* ME */
3320 	fw_data = (const __be32 *)rdev->me_fw->data;
3321 	WREG32(CP_ME_RAM_WADDR, 0);
3322 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3323 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3324 	WREG32(CP_ME_RAM_WADDR, 0);
3325 
3326 	WREG32(CP_PFP_UCODE_ADDR, 0);
3327 	WREG32(CP_CE_UCODE_ADDR, 0);
3328 	WREG32(CP_ME_RAM_WADDR, 0);
3329 	WREG32(CP_ME_RAM_RADDR, 0);
3330 	return 0;
3331 }
3332 
3333 static int si_cp_start(struct radeon_device *rdev)
3334 {
3335 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3336 	int r, i;
3337 
3338 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3339 	if (r) {
3340 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3341 		return r;
3342 	}
3343 	/* init the CP */
3344 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3345 	radeon_ring_write(ring, 0x1);
3346 	radeon_ring_write(ring, 0x0);
3347 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3348 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3349 	radeon_ring_write(ring, 0);
3350 	radeon_ring_write(ring, 0);
3351 
3352 	/* init the CE partitions */
3353 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3354 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3355 	radeon_ring_write(ring, 0xc000);
3356 	radeon_ring_write(ring, 0xe000);
3357 	radeon_ring_unlock_commit(rdev, ring, false);
3358 
3359 	si_cp_enable(rdev, true);
3360 
3361 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3362 	if (r) {
3363 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3364 		return r;
3365 	}
3366 
3367 	/* setup clear context state */
3368 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3369 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3370 
3371 	for (i = 0; i < si_default_size; i++)
3372 		radeon_ring_write(ring, si_default_state[i]);
3373 
3374 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3375 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3376 
3377 	/* set clear context state */
3378 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3379 	radeon_ring_write(ring, 0);
3380 
3381 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3382 	radeon_ring_write(ring, 0x00000316);
3383 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3384 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3385 
3386 	radeon_ring_unlock_commit(rdev, ring, false);
3387 
3388 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3389 		ring = &rdev->ring[i];
3390 		r = radeon_ring_lock(rdev, ring, 2);
3391 
3392 		/* clear the compute context state */
3393 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3394 		radeon_ring_write(ring, 0);
3395 
3396 		radeon_ring_unlock_commit(rdev, ring, false);
3397 	}
3398 
3399 	return 0;
3400 }
3401 
3402 static void si_cp_fini(struct radeon_device *rdev)
3403 {
3404 	struct radeon_ring *ring;
3405 	si_cp_enable(rdev, false);
3406 
3407 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3408 	radeon_ring_fini(rdev, ring);
3409 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3410 
3411 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3412 	radeon_ring_fini(rdev, ring);
3413 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3414 
3415 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3416 	radeon_ring_fini(rdev, ring);
3417 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3418 }
3419 
3420 static int si_cp_resume(struct radeon_device *rdev)
3421 {
3422 	struct radeon_ring *ring;
3423 	u32 tmp;
3424 	u32 rb_bufsz;
3425 	int r;
3426 
3427 	si_enable_gui_idle_interrupt(rdev, false);
3428 
3429 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3430 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3431 
3432 	/* Set the write pointer delay */
3433 	WREG32(CP_RB_WPTR_DELAY, 0);
3434 
3435 	WREG32(CP_DEBUG, 0);
3436 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3437 
3438 	/* ring 0 - compute and gfx */
3439 	/* Set ring buffer size */
3440 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3441 	rb_bufsz = order_base_2(ring->ring_size / 8);
3442 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3443 #ifdef __BIG_ENDIAN
3444 	tmp |= BUF_SWAP_32BIT;
3445 #endif
3446 	WREG32(CP_RB0_CNTL, tmp);
3447 
3448 	/* Initialize the ring buffer's read and write pointers */
3449 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3450 	ring->wptr = 0;
3451 	WREG32(CP_RB0_WPTR, ring->wptr);
3452 
3453 	/* set the wb address whether it's enabled or not */
3454 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3455 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3456 
3457 	if (rdev->wb.enabled)
3458 		WREG32(SCRATCH_UMSK, 0xff);
3459 	else {
3460 		tmp |= RB_NO_UPDATE;
3461 		WREG32(SCRATCH_UMSK, 0);
3462 	}
3463 
3464 	mdelay(1);
3465 	WREG32(CP_RB0_CNTL, tmp);
3466 
3467 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3468 
3469 	/* ring1  - compute only */
3470 	/* Set ring buffer size */
3471 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3472 	rb_bufsz = order_base_2(ring->ring_size / 8);
3473 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3474 #ifdef __BIG_ENDIAN
3475 	tmp |= BUF_SWAP_32BIT;
3476 #endif
3477 	WREG32(CP_RB1_CNTL, tmp);
3478 
3479 	/* Initialize the ring buffer's read and write pointers */
3480 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3481 	ring->wptr = 0;
3482 	WREG32(CP_RB1_WPTR, ring->wptr);
3483 
3484 	/* set the wb address whether it's enabled or not */
3485 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3486 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3487 
3488 	mdelay(1);
3489 	WREG32(CP_RB1_CNTL, tmp);
3490 
3491 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3492 
3493 	/* ring2 - compute only */
3494 	/* Set ring buffer size */
3495 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3496 	rb_bufsz = order_base_2(ring->ring_size / 8);
3497 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3498 #ifdef __BIG_ENDIAN
3499 	tmp |= BUF_SWAP_32BIT;
3500 #endif
3501 	WREG32(CP_RB2_CNTL, tmp);
3502 
3503 	/* Initialize the ring buffer's read and write pointers */
3504 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3505 	ring->wptr = 0;
3506 	WREG32(CP_RB2_WPTR, ring->wptr);
3507 
3508 	/* set the wb address whether it's enabled or not */
3509 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3510 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3511 
3512 	mdelay(1);
3513 	WREG32(CP_RB2_CNTL, tmp);
3514 
3515 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3516 
3517 	/* start the rings */
3518 	si_cp_start(rdev);
3519 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3520 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3521 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3522 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3523 	if (r) {
3524 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3525 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3526 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3527 		return r;
3528 	}
3529 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3530 	if (r) {
3531 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3532 	}
3533 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3534 	if (r) {
3535 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3536 	}
3537 
3538 	si_enable_gui_idle_interrupt(rdev, true);
3539 
3540 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3541 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3542 
3543 	return 0;
3544 }
3545 
3546 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3547 {
3548 	u32 reset_mask = 0;
3549 	u32 tmp;
3550 
3551 	/* GRBM_STATUS */
3552 	tmp = RREG32(GRBM_STATUS);
3553 	if (tmp & (PA_BUSY | SC_BUSY |
3554 		   BCI_BUSY | SX_BUSY |
3555 		   TA_BUSY | VGT_BUSY |
3556 		   DB_BUSY | CB_BUSY |
3557 		   GDS_BUSY | SPI_BUSY |
3558 		   IA_BUSY | IA_BUSY_NO_DMA))
3559 		reset_mask |= RADEON_RESET_GFX;
3560 
3561 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3562 		   CP_BUSY | CP_COHERENCY_BUSY))
3563 		reset_mask |= RADEON_RESET_CP;
3564 
3565 	if (tmp & GRBM_EE_BUSY)
3566 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3567 
3568 	/* GRBM_STATUS2 */
3569 	tmp = RREG32(GRBM_STATUS2);
3570 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3571 		reset_mask |= RADEON_RESET_RLC;
3572 
3573 	/* DMA_STATUS_REG 0 */
3574 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3575 	if (!(tmp & DMA_IDLE))
3576 		reset_mask |= RADEON_RESET_DMA;
3577 
3578 	/* DMA_STATUS_REG 1 */
3579 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3580 	if (!(tmp & DMA_IDLE))
3581 		reset_mask |= RADEON_RESET_DMA1;
3582 
3583 	/* SRBM_STATUS2 */
3584 	tmp = RREG32(SRBM_STATUS2);
3585 	if (tmp & DMA_BUSY)
3586 		reset_mask |= RADEON_RESET_DMA;
3587 
3588 	if (tmp & DMA1_BUSY)
3589 		reset_mask |= RADEON_RESET_DMA1;
3590 
3591 	/* SRBM_STATUS */
3592 	tmp = RREG32(SRBM_STATUS);
3593 
3594 	if (tmp & IH_BUSY)
3595 		reset_mask |= RADEON_RESET_IH;
3596 
3597 	if (tmp & SEM_BUSY)
3598 		reset_mask |= RADEON_RESET_SEM;
3599 
3600 	if (tmp & GRBM_RQ_PENDING)
3601 		reset_mask |= RADEON_RESET_GRBM;
3602 
3603 	if (tmp & VMC_BUSY)
3604 		reset_mask |= RADEON_RESET_VMC;
3605 
3606 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3607 		   MCC_BUSY | MCD_BUSY))
3608 		reset_mask |= RADEON_RESET_MC;
3609 
3610 	if (evergreen_is_display_hung(rdev))
3611 		reset_mask |= RADEON_RESET_DISPLAY;
3612 
3613 	/* VM_L2_STATUS */
3614 	tmp = RREG32(VM_L2_STATUS);
3615 	if (tmp & L2_BUSY)
3616 		reset_mask |= RADEON_RESET_VMC;
3617 
3618 	/* Skip MC reset as it's mostly likely not hung, just busy */
3619 	if (reset_mask & RADEON_RESET_MC) {
3620 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3621 		reset_mask &= ~RADEON_RESET_MC;
3622 	}
3623 
3624 	return reset_mask;
3625 }
3626 
3627 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3628 {
3629 	struct evergreen_mc_save save;
3630 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3631 	u32 tmp;
3632 
3633 	if (reset_mask == 0)
3634 		return;
3635 
3636 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3637 
3638 	evergreen_print_gpu_status_regs(rdev);
3639 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3640 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3641 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3642 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3643 
3644 	/* disable PG/CG */
3645 	si_fini_pg(rdev);
3646 	si_fini_cg(rdev);
3647 
3648 	/* stop the rlc */
3649 	si_rlc_stop(rdev);
3650 
3651 	/* Disable CP parsing/prefetching */
3652 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3653 
3654 	if (reset_mask & RADEON_RESET_DMA) {
3655 		/* dma0 */
3656 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3657 		tmp &= ~DMA_RB_ENABLE;
3658 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3659 	}
3660 	if (reset_mask & RADEON_RESET_DMA1) {
3661 		/* dma1 */
3662 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3663 		tmp &= ~DMA_RB_ENABLE;
3664 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3665 	}
3666 
3667 	udelay(50);
3668 
3669 	evergreen_mc_stop(rdev, &save);
3670 	if (evergreen_mc_wait_for_idle(rdev)) {
3671 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3672 	}
3673 
3674 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3675 		grbm_soft_reset = SOFT_RESET_CB |
3676 			SOFT_RESET_DB |
3677 			SOFT_RESET_GDS |
3678 			SOFT_RESET_PA |
3679 			SOFT_RESET_SC |
3680 			SOFT_RESET_BCI |
3681 			SOFT_RESET_SPI |
3682 			SOFT_RESET_SX |
3683 			SOFT_RESET_TC |
3684 			SOFT_RESET_TA |
3685 			SOFT_RESET_VGT |
3686 			SOFT_RESET_IA;
3687 	}
3688 
3689 	if (reset_mask & RADEON_RESET_CP) {
3690 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3691 
3692 		srbm_soft_reset |= SOFT_RESET_GRBM;
3693 	}
3694 
3695 	if (reset_mask & RADEON_RESET_DMA)
3696 		srbm_soft_reset |= SOFT_RESET_DMA;
3697 
3698 	if (reset_mask & RADEON_RESET_DMA1)
3699 		srbm_soft_reset |= SOFT_RESET_DMA1;
3700 
3701 	if (reset_mask & RADEON_RESET_DISPLAY)
3702 		srbm_soft_reset |= SOFT_RESET_DC;
3703 
3704 	if (reset_mask & RADEON_RESET_RLC)
3705 		grbm_soft_reset |= SOFT_RESET_RLC;
3706 
3707 	if (reset_mask & RADEON_RESET_SEM)
3708 		srbm_soft_reset |= SOFT_RESET_SEM;
3709 
3710 	if (reset_mask & RADEON_RESET_IH)
3711 		srbm_soft_reset |= SOFT_RESET_IH;
3712 
3713 	if (reset_mask & RADEON_RESET_GRBM)
3714 		srbm_soft_reset |= SOFT_RESET_GRBM;
3715 
3716 	if (reset_mask & RADEON_RESET_VMC)
3717 		srbm_soft_reset |= SOFT_RESET_VMC;
3718 
3719 	if (reset_mask & RADEON_RESET_MC)
3720 		srbm_soft_reset |= SOFT_RESET_MC;
3721 
3722 	if (grbm_soft_reset) {
3723 		tmp = RREG32(GRBM_SOFT_RESET);
3724 		tmp |= grbm_soft_reset;
3725 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3726 		WREG32(GRBM_SOFT_RESET, tmp);
3727 		tmp = RREG32(GRBM_SOFT_RESET);
3728 
3729 		udelay(50);
3730 
3731 		tmp &= ~grbm_soft_reset;
3732 		WREG32(GRBM_SOFT_RESET, tmp);
3733 		tmp = RREG32(GRBM_SOFT_RESET);
3734 	}
3735 
3736 	if (srbm_soft_reset) {
3737 		tmp = RREG32(SRBM_SOFT_RESET);
3738 		tmp |= srbm_soft_reset;
3739 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3740 		WREG32(SRBM_SOFT_RESET, tmp);
3741 		tmp = RREG32(SRBM_SOFT_RESET);
3742 
3743 		udelay(50);
3744 
3745 		tmp &= ~srbm_soft_reset;
3746 		WREG32(SRBM_SOFT_RESET, tmp);
3747 		tmp = RREG32(SRBM_SOFT_RESET);
3748 	}
3749 
3750 	/* Wait a little for things to settle down */
3751 	udelay(50);
3752 
3753 	evergreen_mc_resume(rdev, &save);
3754 	udelay(50);
3755 
3756 	evergreen_print_gpu_status_regs(rdev);
3757 }
3758 
3759 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3760 {
3761 	u32 tmp, i;
3762 
3763 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3764 	tmp |= SPLL_BYPASS_EN;
3765 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3766 
3767 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3768 	tmp |= SPLL_CTLREQ_CHG;
3769 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3770 
3771 	for (i = 0; i < rdev->usec_timeout; i++) {
3772 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3773 			break;
3774 		udelay(1);
3775 	}
3776 
3777 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3778 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3779 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3780 
3781 	tmp = RREG32(MPLL_CNTL_MODE);
3782 	tmp &= ~MPLL_MCLK_SEL;
3783 	WREG32(MPLL_CNTL_MODE, tmp);
3784 }
3785 
3786 static void si_spll_powerdown(struct radeon_device *rdev)
3787 {
3788 	u32 tmp;
3789 
3790 	tmp = RREG32(SPLL_CNTL_MODE);
3791 	tmp |= SPLL_SW_DIR_CONTROL;
3792 	WREG32(SPLL_CNTL_MODE, tmp);
3793 
3794 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3795 	tmp |= SPLL_RESET;
3796 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3797 
3798 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3799 	tmp |= SPLL_SLEEP;
3800 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3801 
3802 	tmp = RREG32(SPLL_CNTL_MODE);
3803 	tmp &= ~SPLL_SW_DIR_CONTROL;
3804 	WREG32(SPLL_CNTL_MODE, tmp);
3805 }
3806 
3807 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
3808 {
3809 	struct evergreen_mc_save save;
3810 	u32 tmp, i;
3811 
3812 	dev_info(rdev->dev, "GPU pci config reset\n");
3813 
3814 	/* disable dpm? */
3815 
3816 	/* disable cg/pg */
3817 	si_fini_pg(rdev);
3818 	si_fini_cg(rdev);
3819 
3820 	/* Disable CP parsing/prefetching */
3821 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3822 	/* dma0 */
3823 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3824 	tmp &= ~DMA_RB_ENABLE;
3825 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3826 	/* dma1 */
3827 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3828 	tmp &= ~DMA_RB_ENABLE;
3829 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3830 	/* XXX other engines? */
3831 
3832 	/* halt the rlc, disable cp internal ints */
3833 	si_rlc_stop(rdev);
3834 
3835 	udelay(50);
3836 
3837 	/* disable mem access */
3838 	evergreen_mc_stop(rdev, &save);
3839 	if (evergreen_mc_wait_for_idle(rdev)) {
3840 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
3841 	}
3842 
3843 	/* set mclk/sclk to bypass */
3844 	si_set_clk_bypass_mode(rdev);
3845 	/* powerdown spll */
3846 	si_spll_powerdown(rdev);
3847 	/* disable BM */
3848 	pci_disable_busmaster(rdev->pdev->dev);
3849 	/* reset */
3850 	radeon_pci_config_reset(rdev);
3851 	/* wait for asic to come out of reset */
3852 	for (i = 0; i < rdev->usec_timeout; i++) {
3853 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
3854 			break;
3855 		udelay(1);
3856 	}
3857 }
3858 
3859 int si_asic_reset(struct radeon_device *rdev)
3860 {
3861 	u32 reset_mask;
3862 
3863 	reset_mask = si_gpu_check_soft_reset(rdev);
3864 
3865 	if (reset_mask)
3866 		r600_set_bios_scratch_engine_hung(rdev, true);
3867 
3868 	/* try soft reset */
3869 	si_gpu_soft_reset(rdev, reset_mask);
3870 
3871 	reset_mask = si_gpu_check_soft_reset(rdev);
3872 
3873 	/* try pci config reset */
3874 	if (reset_mask && radeon_hard_reset)
3875 		si_gpu_pci_config_reset(rdev);
3876 
3877 	reset_mask = si_gpu_check_soft_reset(rdev);
3878 
3879 	if (!reset_mask)
3880 		r600_set_bios_scratch_engine_hung(rdev, false);
3881 
3882 	return 0;
3883 }
3884 
3885 /**
3886  * si_gfx_is_lockup - Check if the GFX engine is locked up
3887  *
3888  * @rdev: radeon_device pointer
3889  * @ring: radeon_ring structure holding ring information
3890  *
3891  * Check if the GFX engine is locked up.
3892  * Returns true if the engine appears to be locked up, false if not.
3893  */
3894 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3895 {
3896 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3897 
3898 	if (!(reset_mask & (RADEON_RESET_GFX |
3899 			    RADEON_RESET_COMPUTE |
3900 			    RADEON_RESET_CP))) {
3901 		radeon_ring_lockup_update(rdev, ring);
3902 		return false;
3903 	}
3904 	return radeon_ring_test_lockup(rdev, ring);
3905 }
3906 
3907 /* MC */
3908 static void si_mc_program(struct radeon_device *rdev)
3909 {
3910 	struct evergreen_mc_save save;
3911 	u32 tmp;
3912 	int i, j;
3913 
3914 	/* Initialize HDP */
3915 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3916 		WREG32((0x2c14 + j), 0x00000000);
3917 		WREG32((0x2c18 + j), 0x00000000);
3918 		WREG32((0x2c1c + j), 0x00000000);
3919 		WREG32((0x2c20 + j), 0x00000000);
3920 		WREG32((0x2c24 + j), 0x00000000);
3921 	}
3922 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3923 
3924 	evergreen_mc_stop(rdev, &save);
3925 	if (radeon_mc_wait_for_idle(rdev)) {
3926 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3927 	}
3928 	if (!ASIC_IS_NODCE(rdev))
3929 		/* Lockout access through VGA aperture*/
3930 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3931 	/* Update configuration */
3932 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3933 	       rdev->mc.vram_start >> 12);
3934 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3935 	       rdev->mc.vram_end >> 12);
3936 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3937 	       rdev->vram_scratch.gpu_addr >> 12);
3938 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3939 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3940 	WREG32(MC_VM_FB_LOCATION, tmp);
3941 	/* XXX double check these! */
3942 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3943 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3944 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3945 	WREG32(MC_VM_AGP_BASE, 0);
3946 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3947 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3948 	if (radeon_mc_wait_for_idle(rdev)) {
3949 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3950 	}
3951 	evergreen_mc_resume(rdev, &save);
3952 	if (!ASIC_IS_NODCE(rdev)) {
3953 		/* we need to own VRAM, so turn off the VGA renderer here
3954 		 * to stop it overwriting our objects */
3955 		rv515_vga_render_disable(rdev);
3956 	}
3957 }
3958 
3959 void si_vram_gtt_location(struct radeon_device *rdev,
3960 			  struct radeon_mc *mc)
3961 {
3962 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3963 		/* leave room for at least 1024M GTT */
3964 		dev_warn(rdev->dev, "limiting VRAM\n");
3965 		mc->real_vram_size = 0xFFC0000000ULL;
3966 		mc->mc_vram_size = 0xFFC0000000ULL;
3967 	}
3968 	radeon_vram_location(rdev, &rdev->mc, 0);
3969 	rdev->mc.gtt_base_align = 0;
3970 	radeon_gtt_location(rdev, mc);
3971 }
3972 
3973 static int si_mc_init(struct radeon_device *rdev)
3974 {
3975 	u32 tmp;
3976 	int chansize, numchan;
3977 
3978 	/* Get VRAM informations */
3979 	rdev->mc.vram_is_ddr = true;
3980 	tmp = RREG32(MC_ARB_RAMCFG);
3981 	if (tmp & CHANSIZE_OVERRIDE) {
3982 		chansize = 16;
3983 	} else if (tmp & CHANSIZE_MASK) {
3984 		chansize = 64;
3985 	} else {
3986 		chansize = 32;
3987 	}
3988 	tmp = RREG32(MC_SHARED_CHMAP);
3989 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3990 	case 0:
3991 	default:
3992 		numchan = 1;
3993 		break;
3994 	case 1:
3995 		numchan = 2;
3996 		break;
3997 	case 2:
3998 		numchan = 4;
3999 		break;
4000 	case 3:
4001 		numchan = 8;
4002 		break;
4003 	case 4:
4004 		numchan = 3;
4005 		break;
4006 	case 5:
4007 		numchan = 6;
4008 		break;
4009 	case 6:
4010 		numchan = 10;
4011 		break;
4012 	case 7:
4013 		numchan = 12;
4014 		break;
4015 	case 8:
4016 		numchan = 16;
4017 		break;
4018 	}
4019 	rdev->mc.vram_width = numchan * chansize;
4020 	/* Could aper size report 0 ? */
4021 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
4022 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
4023 	/* size in MB on si */
4024 	tmp = RREG32(CONFIG_MEMSIZE);
4025 	/* some boards may have garbage in the upper 16 bits */
4026 	if (tmp & 0xffff0000) {
4027 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4028 		if (tmp & 0xffff)
4029 			tmp &= 0xffff;
4030 	}
4031 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4032 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4033 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4034 	si_vram_gtt_location(rdev, &rdev->mc);
4035 	radeon_update_bandwidth_info(rdev);
4036 
4037 	return 0;
4038 }
4039 
4040 /*
4041  * GART
4042  */
4043 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4044 {
4045 	/* flush hdp cache */
4046 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4047 
4048 	/* bits 0-15 are the VM contexts0-15 */
4049 	WREG32(VM_INVALIDATE_REQUEST, 1);
4050 }
4051 
4052 static int si_pcie_gart_enable(struct radeon_device *rdev)
4053 {
4054 	int r, i;
4055 
4056 	if (rdev->gart.robj == NULL) {
4057 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4058 		return -EINVAL;
4059 	}
4060 	r = radeon_gart_table_vram_pin(rdev);
4061 	if (r)
4062 		return r;
4063 	/* Setup TLB control */
4064 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4065 	       (0xA << 7) |
4066 	       ENABLE_L1_TLB |
4067 	       ENABLE_L1_FRAGMENT_PROCESSING |
4068 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4069 	       ENABLE_ADVANCED_DRIVER_MODEL |
4070 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4071 	/* Setup L2 cache */
4072 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4073 	       ENABLE_L2_FRAGMENT_PROCESSING |
4074 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4075 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4076 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4077 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4078 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4079 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4080 	       BANK_SELECT(4) |
4081 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4082 	/* setup context0 */
4083 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4084 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4085 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4086 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4087 			(u32)(rdev->dummy_page.addr >> 12));
4088 	WREG32(VM_CONTEXT0_CNTL2, 0);
4089 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4090 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4091 
4092 	WREG32(0x15D4, 0);
4093 	WREG32(0x15D8, 0);
4094 	WREG32(0x15DC, 0);
4095 
4096 	/* empty context1-15 */
4097 	/* set vm size, must be a multiple of 4 */
4098 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4099 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4100 	/* Assign the pt base to something valid for now; the pts used for
4101 	 * the VMs are determined by the application and setup and assigned
4102 	 * on the fly in the vm part of radeon_gart.c
4103 	 */
4104 	for (i = 1; i < 16; i++) {
4105 		if (i < 8)
4106 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4107 			       rdev->vm_manager.saved_table_addr[i]);
4108 		else
4109 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4110 			       rdev->vm_manager.saved_table_addr[i]);
4111 	}
4112 
4113 	/* enable context1-15 */
4114 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4115 	       (u32)(rdev->dummy_page.addr >> 12));
4116 	WREG32(VM_CONTEXT1_CNTL2, 4);
4117 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4118 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4119 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4120 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4121 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4122 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4123 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4124 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4125 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4126 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4127 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4128 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4129 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4130 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4131 
4132 	si_pcie_gart_tlb_flush(rdev);
4133 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4134 		 (unsigned)(rdev->mc.gtt_size >> 20),
4135 		 (unsigned long long)rdev->gart.table_addr);
4136 	rdev->gart.ready = true;
4137 	return 0;
4138 }
4139 
4140 static void si_pcie_gart_disable(struct radeon_device *rdev)
4141 {
4142 	unsigned i;
4143 
4144 	for (i = 1; i < 16; ++i) {
4145 		uint32_t reg;
4146 		if (i < 8)
4147 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4148 		else
4149 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4150 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4151 	}
4152 
4153 	/* Disable all tables */
4154 	WREG32(VM_CONTEXT0_CNTL, 0);
4155 	WREG32(VM_CONTEXT1_CNTL, 0);
4156 	/* Setup TLB control */
4157 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4158 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4159 	/* Setup L2 cache */
4160 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4161 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4162 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4163 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4164 	WREG32(VM_L2_CNTL2, 0);
4165 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4166 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4167 	radeon_gart_table_vram_unpin(rdev);
4168 }
4169 
4170 static void si_pcie_gart_fini(struct radeon_device *rdev)
4171 {
4172 	si_pcie_gart_disable(rdev);
4173 	radeon_gart_table_vram_free(rdev);
4174 	radeon_gart_fini(rdev);
4175 }
4176 
4177 /* vm parser */
4178 static bool si_vm_reg_valid(u32 reg)
4179 {
4180 	/* context regs are fine */
4181 	if (reg >= 0x28000)
4182 		return true;
4183 
4184 	/* check config regs */
4185 	switch (reg) {
4186 	case GRBM_GFX_INDEX:
4187 	case CP_STRMOUT_CNTL:
4188 	case VGT_VTX_VECT_EJECT_REG:
4189 	case VGT_CACHE_INVALIDATION:
4190 	case VGT_ESGS_RING_SIZE:
4191 	case VGT_GSVS_RING_SIZE:
4192 	case VGT_GS_VERTEX_REUSE:
4193 	case VGT_PRIMITIVE_TYPE:
4194 	case VGT_INDEX_TYPE:
4195 	case VGT_NUM_INDICES:
4196 	case VGT_NUM_INSTANCES:
4197 	case VGT_TF_RING_SIZE:
4198 	case VGT_HS_OFFCHIP_PARAM:
4199 	case VGT_TF_MEMORY_BASE:
4200 	case PA_CL_ENHANCE:
4201 	case PA_SU_LINE_STIPPLE_VALUE:
4202 	case PA_SC_LINE_STIPPLE_STATE:
4203 	case PA_SC_ENHANCE:
4204 	case SQC_CACHES:
4205 	case SPI_STATIC_THREAD_MGMT_1:
4206 	case SPI_STATIC_THREAD_MGMT_2:
4207 	case SPI_STATIC_THREAD_MGMT_3:
4208 	case SPI_PS_MAX_WAVE_ID:
4209 	case SPI_CONFIG_CNTL:
4210 	case SPI_CONFIG_CNTL_1:
4211 	case TA_CNTL_AUX:
4212 		return true;
4213 	default:
4214 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4215 		return false;
4216 	}
4217 }
4218 
4219 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4220 				  u32 *ib, struct radeon_cs_packet *pkt)
4221 {
4222 	switch (pkt->opcode) {
4223 	case PACKET3_NOP:
4224 	case PACKET3_SET_BASE:
4225 	case PACKET3_SET_CE_DE_COUNTERS:
4226 	case PACKET3_LOAD_CONST_RAM:
4227 	case PACKET3_WRITE_CONST_RAM:
4228 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4229 	case PACKET3_DUMP_CONST_RAM:
4230 	case PACKET3_INCREMENT_CE_COUNTER:
4231 	case PACKET3_WAIT_ON_DE_COUNTER:
4232 	case PACKET3_CE_WRITE:
4233 		break;
4234 	default:
4235 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4236 		return -EINVAL;
4237 	}
4238 	return 0;
4239 }
4240 
4241 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4242 {
4243 	u32 start_reg, reg, i;
4244 	u32 command = ib[idx + 4];
4245 	u32 info = ib[idx + 1];
4246 	u32 idx_value = ib[idx];
4247 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4248 		/* src address space is register */
4249 		if (((info & 0x60000000) >> 29) == 0) {
4250 			start_reg = idx_value << 2;
4251 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4252 				reg = start_reg;
4253 				if (!si_vm_reg_valid(reg)) {
4254 					DRM_ERROR("CP DMA Bad SRC register\n");
4255 					return -EINVAL;
4256 				}
4257 			} else {
4258 				for (i = 0; i < (command & 0x1fffff); i++) {
4259 					reg = start_reg + (4 * i);
4260 					if (!si_vm_reg_valid(reg)) {
4261 						DRM_ERROR("CP DMA Bad SRC register\n");
4262 						return -EINVAL;
4263 					}
4264 				}
4265 			}
4266 		}
4267 	}
4268 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4269 		/* dst address space is register */
4270 		if (((info & 0x00300000) >> 20) == 0) {
4271 			start_reg = ib[idx + 2];
4272 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4273 				reg = start_reg;
4274 				if (!si_vm_reg_valid(reg)) {
4275 					DRM_ERROR("CP DMA Bad DST register\n");
4276 					return -EINVAL;
4277 				}
4278 			} else {
4279 				for (i = 0; i < (command & 0x1fffff); i++) {
4280 					reg = start_reg + (4 * i);
4281 				if (!si_vm_reg_valid(reg)) {
4282 						DRM_ERROR("CP DMA Bad DST register\n");
4283 						return -EINVAL;
4284 					}
4285 				}
4286 			}
4287 		}
4288 	}
4289 	return 0;
4290 }
4291 
4292 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4293 				   u32 *ib, struct radeon_cs_packet *pkt)
4294 {
4295 	int r;
4296 	u32 idx = pkt->idx + 1;
4297 	u32 idx_value = ib[idx];
4298 	u32 start_reg, end_reg, reg, i;
4299 
4300 	switch (pkt->opcode) {
4301 	case PACKET3_NOP:
4302 	case PACKET3_SET_BASE:
4303 	case PACKET3_CLEAR_STATE:
4304 	case PACKET3_INDEX_BUFFER_SIZE:
4305 	case PACKET3_DISPATCH_DIRECT:
4306 	case PACKET3_DISPATCH_INDIRECT:
4307 	case PACKET3_ALLOC_GDS:
4308 	case PACKET3_WRITE_GDS_RAM:
4309 	case PACKET3_ATOMIC_GDS:
4310 	case PACKET3_ATOMIC:
4311 	case PACKET3_OCCLUSION_QUERY:
4312 	case PACKET3_SET_PREDICATION:
4313 	case PACKET3_COND_EXEC:
4314 	case PACKET3_PRED_EXEC:
4315 	case PACKET3_DRAW_INDIRECT:
4316 	case PACKET3_DRAW_INDEX_INDIRECT:
4317 	case PACKET3_INDEX_BASE:
4318 	case PACKET3_DRAW_INDEX_2:
4319 	case PACKET3_CONTEXT_CONTROL:
4320 	case PACKET3_INDEX_TYPE:
4321 	case PACKET3_DRAW_INDIRECT_MULTI:
4322 	case PACKET3_DRAW_INDEX_AUTO:
4323 	case PACKET3_DRAW_INDEX_IMMD:
4324 	case PACKET3_NUM_INSTANCES:
4325 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4326 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4327 	case PACKET3_DRAW_INDEX_OFFSET_2:
4328 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4329 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4330 	case PACKET3_MPEG_INDEX:
4331 	case PACKET3_WAIT_REG_MEM:
4332 	case PACKET3_MEM_WRITE:
4333 	case PACKET3_PFP_SYNC_ME:
4334 	case PACKET3_SURFACE_SYNC:
4335 	case PACKET3_EVENT_WRITE:
4336 	case PACKET3_EVENT_WRITE_EOP:
4337 	case PACKET3_EVENT_WRITE_EOS:
4338 	case PACKET3_SET_CONTEXT_REG:
4339 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4340 	case PACKET3_SET_SH_REG:
4341 	case PACKET3_SET_SH_REG_OFFSET:
4342 	case PACKET3_INCREMENT_DE_COUNTER:
4343 	case PACKET3_WAIT_ON_CE_COUNTER:
4344 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4345 	case PACKET3_ME_WRITE:
4346 		break;
4347 	case PACKET3_COPY_DATA:
4348 		if ((idx_value & 0xf00) == 0) {
4349 			reg = ib[idx + 3] * 4;
4350 			if (!si_vm_reg_valid(reg))
4351 				return -EINVAL;
4352 		}
4353 		break;
4354 	case PACKET3_WRITE_DATA:
4355 		if ((idx_value & 0xf00) == 0) {
4356 			start_reg = ib[idx + 1] * 4;
4357 			if (idx_value & 0x10000) {
4358 				if (!si_vm_reg_valid(start_reg))
4359 					return -EINVAL;
4360 			} else {
4361 				for (i = 0; i < (pkt->count - 2); i++) {
4362 					reg = start_reg + (4 * i);
4363 					if (!si_vm_reg_valid(reg))
4364 						return -EINVAL;
4365 				}
4366 			}
4367 		}
4368 		break;
4369 	case PACKET3_COND_WRITE:
4370 		if (idx_value & 0x100) {
4371 			reg = ib[idx + 5] * 4;
4372 			if (!si_vm_reg_valid(reg))
4373 				return -EINVAL;
4374 		}
4375 		break;
4376 	case PACKET3_COPY_DW:
4377 		if (idx_value & 0x2) {
4378 			reg = ib[idx + 3] * 4;
4379 			if (!si_vm_reg_valid(reg))
4380 				return -EINVAL;
4381 		}
4382 		break;
4383 	case PACKET3_SET_CONFIG_REG:
4384 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4385 		end_reg = 4 * pkt->count + start_reg - 4;
4386 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4387 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4388 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4389 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4390 			return -EINVAL;
4391 		}
4392 		for (i = 0; i < pkt->count; i++) {
4393 			reg = start_reg + (4 * i);
4394 			if (!si_vm_reg_valid(reg))
4395 				return -EINVAL;
4396 		}
4397 		break;
4398 	case PACKET3_CP_DMA:
4399 		r = si_vm_packet3_cp_dma_check(ib, idx);
4400 		if (r)
4401 			return r;
4402 		break;
4403 	default:
4404 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4405 		return -EINVAL;
4406 	}
4407 	return 0;
4408 }
4409 
4410 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4411 				       u32 *ib, struct radeon_cs_packet *pkt)
4412 {
4413 	int r;
4414 	u32 idx = pkt->idx + 1;
4415 	u32 idx_value = ib[idx];
4416 	u32 start_reg, reg, i;
4417 
4418 	switch (pkt->opcode) {
4419 	case PACKET3_NOP:
4420 	case PACKET3_SET_BASE:
4421 	case PACKET3_CLEAR_STATE:
4422 	case PACKET3_DISPATCH_DIRECT:
4423 	case PACKET3_DISPATCH_INDIRECT:
4424 	case PACKET3_ALLOC_GDS:
4425 	case PACKET3_WRITE_GDS_RAM:
4426 	case PACKET3_ATOMIC_GDS:
4427 	case PACKET3_ATOMIC:
4428 	case PACKET3_OCCLUSION_QUERY:
4429 	case PACKET3_SET_PREDICATION:
4430 	case PACKET3_COND_EXEC:
4431 	case PACKET3_PRED_EXEC:
4432 	case PACKET3_CONTEXT_CONTROL:
4433 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4434 	case PACKET3_WAIT_REG_MEM:
4435 	case PACKET3_MEM_WRITE:
4436 	case PACKET3_PFP_SYNC_ME:
4437 	case PACKET3_SURFACE_SYNC:
4438 	case PACKET3_EVENT_WRITE:
4439 	case PACKET3_EVENT_WRITE_EOP:
4440 	case PACKET3_EVENT_WRITE_EOS:
4441 	case PACKET3_SET_CONTEXT_REG:
4442 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4443 	case PACKET3_SET_SH_REG:
4444 	case PACKET3_SET_SH_REG_OFFSET:
4445 	case PACKET3_INCREMENT_DE_COUNTER:
4446 	case PACKET3_WAIT_ON_CE_COUNTER:
4447 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4448 	case PACKET3_ME_WRITE:
4449 		break;
4450 	case PACKET3_COPY_DATA:
4451 		if ((idx_value & 0xf00) == 0) {
4452 			reg = ib[idx + 3] * 4;
4453 			if (!si_vm_reg_valid(reg))
4454 				return -EINVAL;
4455 		}
4456 		break;
4457 	case PACKET3_WRITE_DATA:
4458 		if ((idx_value & 0xf00) == 0) {
4459 			start_reg = ib[idx + 1] * 4;
4460 			if (idx_value & 0x10000) {
4461 				if (!si_vm_reg_valid(start_reg))
4462 					return -EINVAL;
4463 			} else {
4464 				for (i = 0; i < (pkt->count - 2); i++) {
4465 					reg = start_reg + (4 * i);
4466 					if (!si_vm_reg_valid(reg))
4467 						return -EINVAL;
4468 				}
4469 			}
4470 		}
4471 		break;
4472 	case PACKET3_COND_WRITE:
4473 		if (idx_value & 0x100) {
4474 			reg = ib[idx + 5] * 4;
4475 			if (!si_vm_reg_valid(reg))
4476 				return -EINVAL;
4477 		}
4478 		break;
4479 	case PACKET3_COPY_DW:
4480 		if (idx_value & 0x2) {
4481 			reg = ib[idx + 3] * 4;
4482 			if (!si_vm_reg_valid(reg))
4483 				return -EINVAL;
4484 		}
4485 		break;
4486 	case PACKET3_CP_DMA:
4487 		r = si_vm_packet3_cp_dma_check(ib, idx);
4488 		if (r)
4489 			return r;
4490 		break;
4491 	default:
4492 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4493 		return -EINVAL;
4494 	}
4495 	return 0;
4496 }
4497 
4498 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4499 {
4500 	int ret = 0;
4501 	u32 idx = 0;
4502 	struct radeon_cs_packet pkt;
4503 
4504 	do {
4505 		pkt.idx = idx;
4506 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4507 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4508 		pkt.one_reg_wr = 0;
4509 		switch (pkt.type) {
4510 		case RADEON_PACKET_TYPE0:
4511 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4512 			ret = -EINVAL;
4513 			break;
4514 		case RADEON_PACKET_TYPE2:
4515 			idx += 1;
4516 			break;
4517 		case RADEON_PACKET_TYPE3:
4518 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4519 			if (ib->is_const_ib)
4520 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4521 			else {
4522 				switch (ib->ring) {
4523 				case RADEON_RING_TYPE_GFX_INDEX:
4524 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4525 					break;
4526 				case CAYMAN_RING_TYPE_CP1_INDEX:
4527 				case CAYMAN_RING_TYPE_CP2_INDEX:
4528 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4529 					break;
4530 				default:
4531 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4532 					ret = -EINVAL;
4533 					break;
4534 				}
4535 			}
4536 			idx += pkt.count + 2;
4537 			break;
4538 		default:
4539 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4540 			ret = -EINVAL;
4541 			break;
4542 		}
4543 		if (ret)
4544 			break;
4545 	} while (idx < ib->length_dw);
4546 
4547 	return ret;
4548 }
4549 
4550 /*
4551  * vm
4552  */
4553 int si_vm_init(struct radeon_device *rdev)
4554 {
4555 	/* number of VMs */
4556 	rdev->vm_manager.nvm = 16;
4557 	/* base offset of vram pages */
4558 	rdev->vm_manager.vram_base_offset = 0;
4559 
4560 	return 0;
4561 }
4562 
4563 void si_vm_fini(struct radeon_device *rdev)
4564 {
4565 }
4566 
4567 /**
4568  * si_vm_decode_fault - print human readable fault info
4569  *
4570  * @rdev: radeon_device pointer
4571  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4572  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4573  *
4574  * Print human readable fault information (SI).
4575  */
4576 static void si_vm_decode_fault(struct radeon_device *rdev,
4577 			       u32 status, u32 addr)
4578 {
4579 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4580 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4581 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4582 	char *block;
4583 
4584 	if (rdev->family == CHIP_TAHITI) {
4585 		switch (mc_id) {
4586 		case 160:
4587 		case 144:
4588 		case 96:
4589 		case 80:
4590 		case 224:
4591 		case 208:
4592 		case 32:
4593 		case 16:
4594 			block = "CB";
4595 			break;
4596 		case 161:
4597 		case 145:
4598 		case 97:
4599 		case 81:
4600 		case 225:
4601 		case 209:
4602 		case 33:
4603 		case 17:
4604 			block = "CB_FMASK";
4605 			break;
4606 		case 162:
4607 		case 146:
4608 		case 98:
4609 		case 82:
4610 		case 226:
4611 		case 210:
4612 		case 34:
4613 		case 18:
4614 			block = "CB_CMASK";
4615 			break;
4616 		case 163:
4617 		case 147:
4618 		case 99:
4619 		case 83:
4620 		case 227:
4621 		case 211:
4622 		case 35:
4623 		case 19:
4624 			block = "CB_IMMED";
4625 			break;
4626 		case 164:
4627 		case 148:
4628 		case 100:
4629 		case 84:
4630 		case 228:
4631 		case 212:
4632 		case 36:
4633 		case 20:
4634 			block = "DB";
4635 			break;
4636 		case 165:
4637 		case 149:
4638 		case 101:
4639 		case 85:
4640 		case 229:
4641 		case 213:
4642 		case 37:
4643 		case 21:
4644 			block = "DB_HTILE";
4645 			break;
4646 		case 167:
4647 		case 151:
4648 		case 103:
4649 		case 87:
4650 		case 231:
4651 		case 215:
4652 		case 39:
4653 		case 23:
4654 			block = "DB_STEN";
4655 			break;
4656 		case 72:
4657 		case 68:
4658 		case 64:
4659 		case 8:
4660 		case 4:
4661 		case 0:
4662 		case 136:
4663 		case 132:
4664 		case 128:
4665 		case 200:
4666 		case 196:
4667 		case 192:
4668 			block = "TC";
4669 			break;
4670 		case 112:
4671 		case 48:
4672 			block = "CP";
4673 			break;
4674 		case 49:
4675 		case 177:
4676 		case 50:
4677 		case 178:
4678 			block = "SH";
4679 			break;
4680 		case 53:
4681 		case 190:
4682 			block = "VGT";
4683 			break;
4684 		case 117:
4685 			block = "IH";
4686 			break;
4687 		case 51:
4688 		case 115:
4689 			block = "RLC";
4690 			break;
4691 		case 119:
4692 		case 183:
4693 			block = "DMA0";
4694 			break;
4695 		case 61:
4696 			block = "DMA1";
4697 			break;
4698 		case 248:
4699 		case 120:
4700 			block = "HDP";
4701 			break;
4702 		default:
4703 			block = "unknown";
4704 			break;
4705 		}
4706 	} else {
4707 		switch (mc_id) {
4708 		case 32:
4709 		case 16:
4710 		case 96:
4711 		case 80:
4712 		case 160:
4713 		case 144:
4714 		case 224:
4715 		case 208:
4716 			block = "CB";
4717 			break;
4718 		case 33:
4719 		case 17:
4720 		case 97:
4721 		case 81:
4722 		case 161:
4723 		case 145:
4724 		case 225:
4725 		case 209:
4726 			block = "CB_FMASK";
4727 			break;
4728 		case 34:
4729 		case 18:
4730 		case 98:
4731 		case 82:
4732 		case 162:
4733 		case 146:
4734 		case 226:
4735 		case 210:
4736 			block = "CB_CMASK";
4737 			break;
4738 		case 35:
4739 		case 19:
4740 		case 99:
4741 		case 83:
4742 		case 163:
4743 		case 147:
4744 		case 227:
4745 		case 211:
4746 			block = "CB_IMMED";
4747 			break;
4748 		case 36:
4749 		case 20:
4750 		case 100:
4751 		case 84:
4752 		case 164:
4753 		case 148:
4754 		case 228:
4755 		case 212:
4756 			block = "DB";
4757 			break;
4758 		case 37:
4759 		case 21:
4760 		case 101:
4761 		case 85:
4762 		case 165:
4763 		case 149:
4764 		case 229:
4765 		case 213:
4766 			block = "DB_HTILE";
4767 			break;
4768 		case 39:
4769 		case 23:
4770 		case 103:
4771 		case 87:
4772 		case 167:
4773 		case 151:
4774 		case 231:
4775 		case 215:
4776 			block = "DB_STEN";
4777 			break;
4778 		case 72:
4779 		case 68:
4780 		case 8:
4781 		case 4:
4782 		case 136:
4783 		case 132:
4784 		case 200:
4785 		case 196:
4786 			block = "TC";
4787 			break;
4788 		case 112:
4789 		case 48:
4790 			block = "CP";
4791 			break;
4792 		case 49:
4793 		case 177:
4794 		case 50:
4795 		case 178:
4796 			block = "SH";
4797 			break;
4798 		case 53:
4799 			block = "VGT";
4800 			break;
4801 		case 117:
4802 			block = "IH";
4803 			break;
4804 		case 51:
4805 		case 115:
4806 			block = "RLC";
4807 			break;
4808 		case 119:
4809 		case 183:
4810 			block = "DMA0";
4811 			break;
4812 		case 61:
4813 			block = "DMA1";
4814 			break;
4815 		case 248:
4816 		case 120:
4817 			block = "HDP";
4818 			break;
4819 		default:
4820 			block = "unknown";
4821 			break;
4822 		}
4823 	}
4824 
4825 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4826 	       protections, vmid, addr,
4827 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4828 	       block, mc_id);
4829 }
4830 
4831 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4832 {
4833 	struct radeon_ring *ring = &rdev->ring[ridx];
4834 
4835 	if (vm == NULL)
4836 		return;
4837 
4838 	/* write new base address */
4839 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4840 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4841 				 WRITE_DATA_DST_SEL(0)));
4842 
4843 	if (vm->id < 8) {
4844 		radeon_ring_write(ring,
4845 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4846 	} else {
4847 		radeon_ring_write(ring,
4848 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4849 	}
4850 	radeon_ring_write(ring, 0);
4851 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4852 
4853 	/* flush hdp cache */
4854 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4855 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4856 				 WRITE_DATA_DST_SEL(0)));
4857 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4858 	radeon_ring_write(ring, 0);
4859 	radeon_ring_write(ring, 0x1);
4860 
4861 	/* bits 0-15 are the VM contexts0-15 */
4862 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4863 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4864 				 WRITE_DATA_DST_SEL(0)));
4865 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4866 	radeon_ring_write(ring, 0);
4867 	radeon_ring_write(ring, 1 << vm->id);
4868 
4869 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4870 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4871 	radeon_ring_write(ring, 0x0);
4872 }
4873 
4874 /*
4875  *  Power and clock gating
4876  */
4877 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4878 {
4879 	int i;
4880 
4881 	for (i = 0; i < rdev->usec_timeout; i++) {
4882 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4883 			break;
4884 		udelay(1);
4885 	}
4886 
4887 	for (i = 0; i < rdev->usec_timeout; i++) {
4888 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4889 			break;
4890 		udelay(1);
4891 	}
4892 }
4893 
4894 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4895 					 bool enable)
4896 {
4897 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4898 	u32 mask;
4899 	int i;
4900 
4901 	if (enable)
4902 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4903 	else
4904 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4905 	WREG32(CP_INT_CNTL_RING0, tmp);
4906 
4907 	if (!enable) {
4908 		/* read a gfx register */
4909 		tmp = RREG32(DB_DEPTH_INFO);
4910 
4911 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4912 		for (i = 0; i < rdev->usec_timeout; i++) {
4913 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4914 				break;
4915 			udelay(1);
4916 		}
4917 	}
4918 }
4919 
4920 static void si_set_uvd_dcm(struct radeon_device *rdev,
4921 			   bool sw_mode)
4922 {
4923 	u32 tmp, tmp2;
4924 
4925 	tmp = RREG32(UVD_CGC_CTRL);
4926 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4927 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4928 
4929 	if (sw_mode) {
4930 		tmp &= ~0x7ffff800;
4931 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4932 	} else {
4933 		tmp |= 0x7ffff800;
4934 		tmp2 = 0;
4935 	}
4936 
4937 	WREG32(UVD_CGC_CTRL, tmp);
4938 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4939 }
4940 
4941 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4942 {
4943 	bool hw_mode = true;
4944 
4945 	if (hw_mode) {
4946 		si_set_uvd_dcm(rdev, false);
4947 	} else {
4948 		u32 tmp = RREG32(UVD_CGC_CTRL);
4949 		tmp &= ~DCM;
4950 		WREG32(UVD_CGC_CTRL, tmp);
4951 	}
4952 }
4953 
4954 static u32 si_halt_rlc(struct radeon_device *rdev)
4955 {
4956 	u32 data, orig;
4957 
4958 	orig = data = RREG32(RLC_CNTL);
4959 
4960 	if (data & RLC_ENABLE) {
4961 		data &= ~RLC_ENABLE;
4962 		WREG32(RLC_CNTL, data);
4963 
4964 		si_wait_for_rlc_serdes(rdev);
4965 	}
4966 
4967 	return orig;
4968 }
4969 
4970 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4971 {
4972 	u32 tmp;
4973 
4974 	tmp = RREG32(RLC_CNTL);
4975 	if (tmp != rlc)
4976 		WREG32(RLC_CNTL, rlc);
4977 }
4978 
4979 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4980 {
4981 	u32 data, orig;
4982 
4983 	orig = data = RREG32(DMA_PG);
4984 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4985 		data |= PG_CNTL_ENABLE;
4986 	else
4987 		data &= ~PG_CNTL_ENABLE;
4988 	if (orig != data)
4989 		WREG32(DMA_PG, data);
4990 }
4991 
4992 static void si_init_dma_pg(struct radeon_device *rdev)
4993 {
4994 	u32 tmp;
4995 
4996 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
4997 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4998 
4999 	for (tmp = 0; tmp < 5; tmp++)
5000 		WREG32(DMA_PGFSM_WRITE, 0);
5001 }
5002 
5003 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5004 			       bool enable)
5005 {
5006 	u32 tmp;
5007 
5008 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5009 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5010 		WREG32(RLC_TTOP_D, tmp);
5011 
5012 		tmp = RREG32(RLC_PG_CNTL);
5013 		tmp |= GFX_PG_ENABLE;
5014 		WREG32(RLC_PG_CNTL, tmp);
5015 
5016 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5017 		tmp |= AUTO_PG_EN;
5018 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5019 	} else {
5020 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5021 		tmp &= ~AUTO_PG_EN;
5022 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5023 
5024 		tmp = RREG32(DB_RENDER_CONTROL);
5025 	}
5026 }
5027 
5028 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5029 {
5030 	u32 tmp;
5031 
5032 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5033 
5034 	tmp = RREG32(RLC_PG_CNTL);
5035 	tmp |= GFX_PG_SRC;
5036 	WREG32(RLC_PG_CNTL, tmp);
5037 
5038 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5039 
5040 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5041 
5042 	tmp &= ~GRBM_REG_SGIT_MASK;
5043 	tmp |= GRBM_REG_SGIT(0x700);
5044 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5045 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5046 }
5047 
5048 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5049 {
5050 	u32 mask = 0, tmp, tmp1;
5051 	int i;
5052 
5053 	si_select_se_sh(rdev, se, sh);
5054 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5055 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5056 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5057 
5058 	tmp &= 0xffff0000;
5059 
5060 	tmp |= tmp1;
5061 	tmp >>= 16;
5062 
5063 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5064 		mask <<= 1;
5065 		mask |= 1;
5066 	}
5067 
5068 	return (~tmp) & mask;
5069 }
5070 
5071 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5072 {
5073 	u32 i, j, k, active_cu_number = 0;
5074 	u32 mask, counter, cu_bitmap;
5075 	u32 tmp = 0;
5076 
5077 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5078 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5079 			mask = 1;
5080 			cu_bitmap = 0;
5081 			counter  = 0;
5082 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5083 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5084 					if (counter < 2)
5085 						cu_bitmap |= mask;
5086 					counter++;
5087 				}
5088 				mask <<= 1;
5089 			}
5090 
5091 			active_cu_number += counter;
5092 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5093 		}
5094 	}
5095 
5096 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5097 
5098 	tmp = RREG32(RLC_MAX_PG_CU);
5099 	tmp &= ~MAX_PU_CU_MASK;
5100 	tmp |= MAX_PU_CU(active_cu_number);
5101 	WREG32(RLC_MAX_PG_CU, tmp);
5102 }
5103 
5104 static void si_enable_cgcg(struct radeon_device *rdev,
5105 			   bool enable)
5106 {
5107 	u32 data, orig, tmp;
5108 
5109 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5110 
5111 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5112 		si_enable_gui_idle_interrupt(rdev, true);
5113 
5114 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5115 
5116 		tmp = si_halt_rlc(rdev);
5117 
5118 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5119 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5120 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5121 
5122 		si_wait_for_rlc_serdes(rdev);
5123 
5124 		si_update_rlc(rdev, tmp);
5125 
5126 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5127 
5128 		data |= CGCG_EN | CGLS_EN;
5129 	} else {
5130 		si_enable_gui_idle_interrupt(rdev, false);
5131 
5132 		RREG32(CB_CGTT_SCLK_CTRL);
5133 		RREG32(CB_CGTT_SCLK_CTRL);
5134 		RREG32(CB_CGTT_SCLK_CTRL);
5135 		RREG32(CB_CGTT_SCLK_CTRL);
5136 
5137 		data &= ~(CGCG_EN | CGLS_EN);
5138 	}
5139 
5140 	if (orig != data)
5141 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5142 }
5143 
5144 static void si_enable_mgcg(struct radeon_device *rdev,
5145 			   bool enable)
5146 {
5147 	u32 data, orig, tmp = 0;
5148 
5149 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5150 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5151 		data = 0x96940200;
5152 		if (orig != data)
5153 			WREG32(CGTS_SM_CTRL_REG, data);
5154 
5155 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5156 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5157 			data |= CP_MEM_LS_EN;
5158 			if (orig != data)
5159 				WREG32(CP_MEM_SLP_CNTL, data);
5160 		}
5161 
5162 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5163 		data &= 0xffffffc0;
5164 		if (orig != data)
5165 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5166 
5167 		tmp = si_halt_rlc(rdev);
5168 
5169 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5170 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5171 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5172 
5173 		si_update_rlc(rdev, tmp);
5174 	} else {
5175 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5176 		data |= 0x00000003;
5177 		if (orig != data)
5178 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5179 
5180 		data = RREG32(CP_MEM_SLP_CNTL);
5181 		if (data & CP_MEM_LS_EN) {
5182 			data &= ~CP_MEM_LS_EN;
5183 			WREG32(CP_MEM_SLP_CNTL, data);
5184 		}
5185 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5186 		data |= LS_OVERRIDE | OVERRIDE;
5187 		if (orig != data)
5188 			WREG32(CGTS_SM_CTRL_REG, data);
5189 
5190 		tmp = si_halt_rlc(rdev);
5191 
5192 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5193 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5194 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5195 
5196 		si_update_rlc(rdev, tmp);
5197 	}
5198 }
5199 
5200 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5201 			       bool enable)
5202 {
5203 	u32 orig, data, tmp;
5204 
5205 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5206 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5207 		tmp |= 0x3fff;
5208 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5209 
5210 		orig = data = RREG32(UVD_CGC_CTRL);
5211 		data |= DCM;
5212 		if (orig != data)
5213 			WREG32(UVD_CGC_CTRL, data);
5214 
5215 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5216 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5217 	} else {
5218 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5219 		tmp &= ~0x3fff;
5220 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5221 
5222 		orig = data = RREG32(UVD_CGC_CTRL);
5223 		data &= ~DCM;
5224 		if (orig != data)
5225 			WREG32(UVD_CGC_CTRL, data);
5226 
5227 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5228 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5229 	}
5230 }
5231 
5232 static const u32 mc_cg_registers[] =
5233 {
5234 	MC_HUB_MISC_HUB_CG,
5235 	MC_HUB_MISC_SIP_CG,
5236 	MC_HUB_MISC_VM_CG,
5237 	MC_XPB_CLK_GAT,
5238 	ATC_MISC_CG,
5239 	MC_CITF_MISC_WR_CG,
5240 	MC_CITF_MISC_RD_CG,
5241 	MC_CITF_MISC_VM_CG,
5242 	VM_L2_CG,
5243 };
5244 
5245 static void si_enable_mc_ls(struct radeon_device *rdev,
5246 			    bool enable)
5247 {
5248 	int i;
5249 	u32 orig, data;
5250 
5251 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5252 		orig = data = RREG32(mc_cg_registers[i]);
5253 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5254 			data |= MC_LS_ENABLE;
5255 		else
5256 			data &= ~MC_LS_ENABLE;
5257 		if (data != orig)
5258 			WREG32(mc_cg_registers[i], data);
5259 	}
5260 }
5261 
5262 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5263 			       bool enable)
5264 {
5265 	int i;
5266 	u32 orig, data;
5267 
5268 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5269 		orig = data = RREG32(mc_cg_registers[i]);
5270 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5271 			data |= MC_CG_ENABLE;
5272 		else
5273 			data &= ~MC_CG_ENABLE;
5274 		if (data != orig)
5275 			WREG32(mc_cg_registers[i], data);
5276 	}
5277 }
5278 
5279 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5280 			       bool enable)
5281 {
5282 	u32 orig, data, offset;
5283 	int i;
5284 
5285 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5286 		for (i = 0; i < 2; i++) {
5287 			if (i == 0)
5288 				offset = DMA0_REGISTER_OFFSET;
5289 			else
5290 				offset = DMA1_REGISTER_OFFSET;
5291 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5292 			data &= ~MEM_POWER_OVERRIDE;
5293 			if (data != orig)
5294 				WREG32(DMA_POWER_CNTL + offset, data);
5295 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5296 		}
5297 	} else {
5298 		for (i = 0; i < 2; i++) {
5299 			if (i == 0)
5300 				offset = DMA0_REGISTER_OFFSET;
5301 			else
5302 				offset = DMA1_REGISTER_OFFSET;
5303 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5304 			data |= MEM_POWER_OVERRIDE;
5305 			if (data != orig)
5306 				WREG32(DMA_POWER_CNTL + offset, data);
5307 
5308 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5309 			data = 0xff000000;
5310 			if (data != orig)
5311 				WREG32(DMA_CLK_CTRL + offset, data);
5312 		}
5313 	}
5314 }
5315 
5316 static void si_enable_bif_mgls(struct radeon_device *rdev,
5317 			       bool enable)
5318 {
5319 	u32 orig, data;
5320 
5321 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5322 
5323 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5324 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5325 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5326 	else
5327 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5328 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5329 
5330 	if (orig != data)
5331 		WREG32_PCIE(PCIE_CNTL2, data);
5332 }
5333 
5334 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5335 			       bool enable)
5336 {
5337 	u32 orig, data;
5338 
5339 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5340 
5341 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5342 		data &= ~CLOCK_GATING_DIS;
5343 	else
5344 		data |= CLOCK_GATING_DIS;
5345 
5346 	if (orig != data)
5347 		WREG32(HDP_HOST_PATH_CNTL, data);
5348 }
5349 
5350 static void si_enable_hdp_ls(struct radeon_device *rdev,
5351 			     bool enable)
5352 {
5353 	u32 orig, data;
5354 
5355 	orig = data = RREG32(HDP_MEM_POWER_LS);
5356 
5357 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5358 		data |= HDP_LS_ENABLE;
5359 	else
5360 		data &= ~HDP_LS_ENABLE;
5361 
5362 	if (orig != data)
5363 		WREG32(HDP_MEM_POWER_LS, data);
5364 }
5365 
5366 static void si_update_cg(struct radeon_device *rdev,
5367 			 u32 block, bool enable)
5368 {
5369 	if (block & RADEON_CG_BLOCK_GFX) {
5370 		si_enable_gui_idle_interrupt(rdev, false);
5371 		/* order matters! */
5372 		if (enable) {
5373 			si_enable_mgcg(rdev, true);
5374 			si_enable_cgcg(rdev, true);
5375 		} else {
5376 			si_enable_cgcg(rdev, false);
5377 			si_enable_mgcg(rdev, false);
5378 		}
5379 		si_enable_gui_idle_interrupt(rdev, true);
5380 	}
5381 
5382 	if (block & RADEON_CG_BLOCK_MC) {
5383 		si_enable_mc_mgcg(rdev, enable);
5384 		si_enable_mc_ls(rdev, enable);
5385 	}
5386 
5387 	if (block & RADEON_CG_BLOCK_SDMA) {
5388 		si_enable_dma_mgcg(rdev, enable);
5389 	}
5390 
5391 	if (block & RADEON_CG_BLOCK_BIF) {
5392 		si_enable_bif_mgls(rdev, enable);
5393 	}
5394 
5395 	if (block & RADEON_CG_BLOCK_UVD) {
5396 		if (rdev->has_uvd) {
5397 			si_enable_uvd_mgcg(rdev, enable);
5398 		}
5399 	}
5400 
5401 	if (block & RADEON_CG_BLOCK_HDP) {
5402 		si_enable_hdp_mgcg(rdev, enable);
5403 		si_enable_hdp_ls(rdev, enable);
5404 	}
5405 }
5406 
5407 static void si_init_cg(struct radeon_device *rdev)
5408 {
5409 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5410 			    RADEON_CG_BLOCK_MC |
5411 			    RADEON_CG_BLOCK_SDMA |
5412 			    RADEON_CG_BLOCK_BIF |
5413 			    RADEON_CG_BLOCK_HDP), true);
5414 	if (rdev->has_uvd) {
5415 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5416 		si_init_uvd_internal_cg(rdev);
5417 	}
5418 }
5419 
5420 static void si_fini_cg(struct radeon_device *rdev)
5421 {
5422 	if (rdev->has_uvd) {
5423 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5424 	}
5425 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5426 			    RADEON_CG_BLOCK_MC |
5427 			    RADEON_CG_BLOCK_SDMA |
5428 			    RADEON_CG_BLOCK_BIF |
5429 			    RADEON_CG_BLOCK_HDP), false);
5430 }
5431 
5432 u32 si_get_csb_size(struct radeon_device *rdev)
5433 {
5434 	u32 count = 0;
5435 	const struct cs_section_def *sect = NULL;
5436 	const struct cs_extent_def *ext = NULL;
5437 
5438 	if (rdev->rlc.cs_data == NULL)
5439 		return 0;
5440 
5441 	/* begin clear state */
5442 	count += 2;
5443 	/* context control state */
5444 	count += 3;
5445 
5446 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5447 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5448 			if (sect->id == SECT_CONTEXT)
5449 				count += 2 + ext->reg_count;
5450 			else
5451 				return 0;
5452 		}
5453 	}
5454 	/* pa_sc_raster_config */
5455 	count += 3;
5456 	/* end clear state */
5457 	count += 2;
5458 	/* clear state */
5459 	count += 2;
5460 
5461 	return count;
5462 }
5463 
5464 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5465 {
5466 	u32 count = 0, i;
5467 	const struct cs_section_def *sect = NULL;
5468 	const struct cs_extent_def *ext = NULL;
5469 
5470 	if (rdev->rlc.cs_data == NULL)
5471 		return;
5472 	if (buffer == NULL)
5473 		return;
5474 
5475 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5476 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5477 
5478 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5479 	buffer[count++] = cpu_to_le32(0x80000000);
5480 	buffer[count++] = cpu_to_le32(0x80000000);
5481 
5482 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5483 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5484 			if (sect->id == SECT_CONTEXT) {
5485 				buffer[count++] =
5486 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5487 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5488 				for (i = 0; i < ext->reg_count; i++)
5489 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5490 			} else {
5491 				return;
5492 			}
5493 		}
5494 	}
5495 
5496 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5497 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5498 	switch (rdev->family) {
5499 	case CHIP_TAHITI:
5500 	case CHIP_PITCAIRN:
5501 		buffer[count++] = cpu_to_le32(0x2a00126a);
5502 		break;
5503 	case CHIP_VERDE:
5504 		buffer[count++] = cpu_to_le32(0x0000124a);
5505 		break;
5506 	case CHIP_OLAND:
5507 		buffer[count++] = cpu_to_le32(0x00000082);
5508 		break;
5509 	case CHIP_HAINAN:
5510 		buffer[count++] = cpu_to_le32(0x00000000);
5511 		break;
5512 	default:
5513 		buffer[count++] = cpu_to_le32(0x00000000);
5514 		break;
5515 	}
5516 
5517 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5518 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5519 
5520 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5521 	buffer[count++] = cpu_to_le32(0);
5522 }
5523 
5524 static void si_init_pg(struct radeon_device *rdev)
5525 {
5526 	if (rdev->pg_flags) {
5527 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5528 			si_init_dma_pg(rdev);
5529 		}
5530 		si_init_ao_cu_mask(rdev);
5531 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5532 			si_init_gfx_cgpg(rdev);
5533 		} else {
5534 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5535 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5536 		}
5537 		si_enable_dma_pg(rdev, true);
5538 		si_enable_gfx_cgpg(rdev, true);
5539 	} else {
5540 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5541 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5542 	}
5543 }
5544 
5545 static void si_fini_pg(struct radeon_device *rdev)
5546 {
5547 	if (rdev->pg_flags) {
5548 		si_enable_dma_pg(rdev, false);
5549 		si_enable_gfx_cgpg(rdev, false);
5550 	}
5551 }
5552 
5553 /*
5554  * RLC
5555  */
5556 void si_rlc_reset(struct radeon_device *rdev)
5557 {
5558 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5559 
5560 	tmp |= SOFT_RESET_RLC;
5561 	WREG32(GRBM_SOFT_RESET, tmp);
5562 	udelay(50);
5563 	tmp &= ~SOFT_RESET_RLC;
5564 	WREG32(GRBM_SOFT_RESET, tmp);
5565 	udelay(50);
5566 }
5567 
5568 static void si_rlc_stop(struct radeon_device *rdev)
5569 {
5570 	WREG32(RLC_CNTL, 0);
5571 
5572 	si_enable_gui_idle_interrupt(rdev, false);
5573 
5574 	si_wait_for_rlc_serdes(rdev);
5575 }
5576 
5577 static void si_rlc_start(struct radeon_device *rdev)
5578 {
5579 	WREG32(RLC_CNTL, RLC_ENABLE);
5580 
5581 	si_enable_gui_idle_interrupt(rdev, true);
5582 
5583 	udelay(50);
5584 }
5585 
5586 static bool si_lbpw_supported(struct radeon_device *rdev)
5587 {
5588 	u32 tmp;
5589 
5590 	/* Enable LBPW only for DDR3 */
5591 	tmp = RREG32(MC_SEQ_MISC0);
5592 	if ((tmp & 0xF0000000) == 0xB0000000)
5593 		return true;
5594 	return false;
5595 }
5596 
5597 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5598 {
5599 	u32 tmp;
5600 
5601 	tmp = RREG32(RLC_LB_CNTL);
5602 	if (enable)
5603 		tmp |= LOAD_BALANCE_ENABLE;
5604 	else
5605 		tmp &= ~LOAD_BALANCE_ENABLE;
5606 	WREG32(RLC_LB_CNTL, tmp);
5607 
5608 	if (!enable) {
5609 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5610 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5611 	}
5612 }
5613 
5614 static int si_rlc_resume(struct radeon_device *rdev)
5615 {
5616 	u32 i;
5617 	const __be32 *fw_data;
5618 
5619 	if (!rdev->rlc_fw)
5620 		return -EINVAL;
5621 
5622 	si_rlc_stop(rdev);
5623 
5624 	si_rlc_reset(rdev);
5625 
5626 	si_init_pg(rdev);
5627 
5628 	si_init_cg(rdev);
5629 
5630 	WREG32(RLC_RL_BASE, 0);
5631 	WREG32(RLC_RL_SIZE, 0);
5632 	WREG32(RLC_LB_CNTL, 0);
5633 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5634 	WREG32(RLC_LB_CNTR_INIT, 0);
5635 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5636 
5637 	WREG32(RLC_MC_CNTL, 0);
5638 	WREG32(RLC_UCODE_CNTL, 0);
5639 
5640 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5641 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5642 		WREG32(RLC_UCODE_ADDR, i);
5643 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5644 	}
5645 	WREG32(RLC_UCODE_ADDR, 0);
5646 
5647 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5648 
5649 	si_rlc_start(rdev);
5650 
5651 	return 0;
5652 }
5653 
5654 static void si_enable_interrupts(struct radeon_device *rdev)
5655 {
5656 	u32 ih_cntl = RREG32(IH_CNTL);
5657 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5658 
5659 	ih_cntl |= ENABLE_INTR;
5660 	ih_rb_cntl |= IH_RB_ENABLE;
5661 	WREG32(IH_CNTL, ih_cntl);
5662 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5663 	rdev->ih.enabled = true;
5664 }
5665 
5666 static void si_disable_interrupts(struct radeon_device *rdev)
5667 {
5668 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5669 	u32 ih_cntl = RREG32(IH_CNTL);
5670 
5671 	ih_rb_cntl &= ~IH_RB_ENABLE;
5672 	ih_cntl &= ~ENABLE_INTR;
5673 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5674 	WREG32(IH_CNTL, ih_cntl);
5675 	/* set rptr, wptr to 0 */
5676 	WREG32(IH_RB_RPTR, 0);
5677 	WREG32(IH_RB_WPTR, 0);
5678 	rdev->ih.enabled = false;
5679 	rdev->ih.rptr = 0;
5680 }
5681 
5682 static void si_disable_interrupt_state(struct radeon_device *rdev)
5683 {
5684 	u32 tmp;
5685 
5686 	tmp = RREG32(CP_INT_CNTL_RING0) &
5687 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5688 	WREG32(CP_INT_CNTL_RING0, tmp);
5689 	WREG32(CP_INT_CNTL_RING1, 0);
5690 	WREG32(CP_INT_CNTL_RING2, 0);
5691 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5692 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5693 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5694 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5695 	WREG32(GRBM_INT_CNTL, 0);
5696 	if (rdev->num_crtc >= 2) {
5697 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5698 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5699 	}
5700 	if (rdev->num_crtc >= 4) {
5701 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5702 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5703 	}
5704 	if (rdev->num_crtc >= 6) {
5705 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5706 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5707 	}
5708 
5709 	if (rdev->num_crtc >= 2) {
5710 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5711 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5712 	}
5713 	if (rdev->num_crtc >= 4) {
5714 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5715 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5716 	}
5717 	if (rdev->num_crtc >= 6) {
5718 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5719 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5720 	}
5721 
5722 	if (!ASIC_IS_NODCE(rdev)) {
5723 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5724 
5725 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5726 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5727 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5728 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5729 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5730 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5731 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5732 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5733 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5734 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5735 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5736 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5737 	}
5738 }
5739 
5740 static int si_irq_init(struct radeon_device *rdev)
5741 {
5742 	int ret = 0;
5743 	int rb_bufsz;
5744 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5745 
5746 	/* allocate ring */
5747 	ret = r600_ih_ring_alloc(rdev);
5748 	if (ret)
5749 		return ret;
5750 
5751 	/* disable irqs */
5752 	si_disable_interrupts(rdev);
5753 
5754 	/* init rlc */
5755 	ret = si_rlc_resume(rdev);
5756 	if (ret) {
5757 		r600_ih_ring_fini(rdev);
5758 		return ret;
5759 	}
5760 
5761 	/* setup interrupt control */
5762 	/* set dummy read address to ring address */
5763 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5764 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5765 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5766 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5767 	 */
5768 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5769 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5770 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5771 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5772 
5773 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5774 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5775 
5776 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5777 		      IH_WPTR_OVERFLOW_CLEAR |
5778 		      (rb_bufsz << 1));
5779 
5780 	if (rdev->wb.enabled)
5781 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5782 
5783 	/* set the writeback address whether it's enabled or not */
5784 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5785 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5786 
5787 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5788 
5789 	/* set rptr, wptr to 0 */
5790 	WREG32(IH_RB_RPTR, 0);
5791 	WREG32(IH_RB_WPTR, 0);
5792 
5793 	/* Default settings for IH_CNTL (disabled at first) */
5794 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5795 	/* RPTR_REARM only works if msi's are enabled */
5796 	if (rdev->msi_enabled)
5797 		ih_cntl |= RPTR_REARM;
5798 	WREG32(IH_CNTL, ih_cntl);
5799 
5800 	/* force the active interrupt state to all disabled */
5801 	si_disable_interrupt_state(rdev);
5802 
5803 	pci_enable_busmaster(rdev->dev);
5804 
5805 	/* enable irqs */
5806 	si_enable_interrupts(rdev);
5807 
5808 	return ret;
5809 }
5810 
5811 int si_irq_set(struct radeon_device *rdev)
5812 {
5813 	u32 cp_int_cntl;
5814 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5815 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5816 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5817 	u32 grbm_int_cntl = 0;
5818 	u32 dma_cntl, dma_cntl1;
5819 	u32 thermal_int = 0;
5820 
5821 	if (!rdev->irq.installed) {
5822 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5823 		return -EINVAL;
5824 	}
5825 	/* don't enable anything if the ih is disabled */
5826 	if (!rdev->ih.enabled) {
5827 		si_disable_interrupts(rdev);
5828 		/* force the active interrupt state to all disabled */
5829 		si_disable_interrupt_state(rdev);
5830 		return 0;
5831 	}
5832 
5833 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
5834 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5835 
5836 	if (!ASIC_IS_NODCE(rdev)) {
5837 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5838 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5839 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5840 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5841 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5842 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5843 	}
5844 
5845 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5846 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5847 
5848 	thermal_int = RREG32(CG_THERMAL_INT) &
5849 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5850 
5851 	/* enable CP interrupts on all rings */
5852 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5853 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5854 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5855 	}
5856 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5857 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5858 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5859 	}
5860 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5861 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5862 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5863 	}
5864 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5865 		DRM_DEBUG("si_irq_set: sw int dma\n");
5866 		dma_cntl |= TRAP_ENABLE;
5867 	}
5868 
5869 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5870 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5871 		dma_cntl1 |= TRAP_ENABLE;
5872 	}
5873 	if (rdev->irq.crtc_vblank_int[0] ||
5874 	    atomic_read(&rdev->irq.pflip[0])) {
5875 		DRM_DEBUG("si_irq_set: vblank 0\n");
5876 		crtc1 |= VBLANK_INT_MASK;
5877 	}
5878 	if (rdev->irq.crtc_vblank_int[1] ||
5879 	    atomic_read(&rdev->irq.pflip[1])) {
5880 		DRM_DEBUG("si_irq_set: vblank 1\n");
5881 		crtc2 |= VBLANK_INT_MASK;
5882 	}
5883 	if (rdev->irq.crtc_vblank_int[2] ||
5884 	    atomic_read(&rdev->irq.pflip[2])) {
5885 		DRM_DEBUG("si_irq_set: vblank 2\n");
5886 		crtc3 |= VBLANK_INT_MASK;
5887 	}
5888 	if (rdev->irq.crtc_vblank_int[3] ||
5889 	    atomic_read(&rdev->irq.pflip[3])) {
5890 		DRM_DEBUG("si_irq_set: vblank 3\n");
5891 		crtc4 |= VBLANK_INT_MASK;
5892 	}
5893 	if (rdev->irq.crtc_vblank_int[4] ||
5894 	    atomic_read(&rdev->irq.pflip[4])) {
5895 		DRM_DEBUG("si_irq_set: vblank 4\n");
5896 		crtc5 |= VBLANK_INT_MASK;
5897 	}
5898 	if (rdev->irq.crtc_vblank_int[5] ||
5899 	    atomic_read(&rdev->irq.pflip[5])) {
5900 		DRM_DEBUG("si_irq_set: vblank 5\n");
5901 		crtc6 |= VBLANK_INT_MASK;
5902 	}
5903 	if (rdev->irq.hpd[0]) {
5904 		DRM_DEBUG("si_irq_set: hpd 1\n");
5905 		hpd1 |= DC_HPDx_INT_EN;
5906 	}
5907 	if (rdev->irq.hpd[1]) {
5908 		DRM_DEBUG("si_irq_set: hpd 2\n");
5909 		hpd2 |= DC_HPDx_INT_EN;
5910 	}
5911 	if (rdev->irq.hpd[2]) {
5912 		DRM_DEBUG("si_irq_set: hpd 3\n");
5913 		hpd3 |= DC_HPDx_INT_EN;
5914 	}
5915 	if (rdev->irq.hpd[3]) {
5916 		DRM_DEBUG("si_irq_set: hpd 4\n");
5917 		hpd4 |= DC_HPDx_INT_EN;
5918 	}
5919 	if (rdev->irq.hpd[4]) {
5920 		DRM_DEBUG("si_irq_set: hpd 5\n");
5921 		hpd5 |= DC_HPDx_INT_EN;
5922 	}
5923 	if (rdev->irq.hpd[5]) {
5924 		DRM_DEBUG("si_irq_set: hpd 6\n");
5925 		hpd6 |= DC_HPDx_INT_EN;
5926 	}
5927 
5928 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5929 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5930 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5931 
5932 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5933 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5934 
5935 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5936 
5937 	if (rdev->irq.dpm_thermal) {
5938 		DRM_DEBUG("dpm thermal\n");
5939 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5940 	}
5941 
5942 	if (rdev->num_crtc >= 2) {
5943 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5944 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5945 	}
5946 	if (rdev->num_crtc >= 4) {
5947 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5948 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5949 	}
5950 	if (rdev->num_crtc >= 6) {
5951 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5952 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5953 	}
5954 
5955 	if (rdev->num_crtc >= 2) {
5956 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
5957 		       GRPH_PFLIP_INT_MASK);
5958 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
5959 		       GRPH_PFLIP_INT_MASK);
5960 	}
5961 	if (rdev->num_crtc >= 4) {
5962 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
5963 		       GRPH_PFLIP_INT_MASK);
5964 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
5965 		       GRPH_PFLIP_INT_MASK);
5966 	}
5967 	if (rdev->num_crtc >= 6) {
5968 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
5969 		       GRPH_PFLIP_INT_MASK);
5970 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
5971 		       GRPH_PFLIP_INT_MASK);
5972 	}
5973 
5974 	if (!ASIC_IS_NODCE(rdev)) {
5975 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5976 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5977 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5978 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5979 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5980 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5981 	}
5982 
5983 	WREG32(CG_THERMAL_INT, thermal_int);
5984 
5985 	return 0;
5986 }
5987 
5988 static inline void si_irq_ack(struct radeon_device *rdev)
5989 {
5990 	u32 tmp;
5991 
5992 	if (ASIC_IS_NODCE(rdev))
5993 		return;
5994 
5995 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5996 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5997 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5998 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5999 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6000 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6001 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6002 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6003 	if (rdev->num_crtc >= 4) {
6004 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6005 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6006 	}
6007 	if (rdev->num_crtc >= 6) {
6008 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6009 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6010 	}
6011 
6012 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6013 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6014 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6015 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6016 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6017 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6018 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6019 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6020 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6021 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6022 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6023 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6024 
6025 	if (rdev->num_crtc >= 4) {
6026 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6027 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6028 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6029 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6030 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6031 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6032 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6033 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6034 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6035 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6036 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6037 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6038 	}
6039 
6040 	if (rdev->num_crtc >= 6) {
6041 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6042 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6043 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6044 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6045 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6046 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6047 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6048 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6049 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6050 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6051 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6052 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6053 	}
6054 
6055 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6056 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6057 		tmp |= DC_HPDx_INT_ACK;
6058 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6059 	}
6060 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6061 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6062 		tmp |= DC_HPDx_INT_ACK;
6063 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6064 	}
6065 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6066 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6067 		tmp |= DC_HPDx_INT_ACK;
6068 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6069 	}
6070 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6071 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6072 		tmp |= DC_HPDx_INT_ACK;
6073 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6074 	}
6075 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6076 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6077 		tmp |= DC_HPDx_INT_ACK;
6078 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6079 	}
6080 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6081 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6082 		tmp |= DC_HPDx_INT_ACK;
6083 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6084 	}
6085 }
6086 
6087 static void si_irq_disable(struct radeon_device *rdev)
6088 {
6089 	si_disable_interrupts(rdev);
6090 	/* Wait and acknowledge irq */
6091 	mdelay(1);
6092 	si_irq_ack(rdev);
6093 	si_disable_interrupt_state(rdev);
6094 }
6095 
6096 static void si_irq_suspend(struct radeon_device *rdev)
6097 {
6098 	si_irq_disable(rdev);
6099 	si_rlc_stop(rdev);
6100 }
6101 
6102 static void si_irq_fini(struct radeon_device *rdev)
6103 {
6104 	si_irq_suspend(rdev);
6105 	r600_ih_ring_fini(rdev);
6106 }
6107 
6108 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6109 {
6110 	u32 wptr, tmp;
6111 
6112 	if (rdev->wb.enabled)
6113 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6114 	else
6115 		wptr = RREG32(IH_RB_WPTR);
6116 
6117 	if (wptr & RB_OVERFLOW) {
6118 		wptr &= ~RB_OVERFLOW;
6119 		/* When a ring buffer overflow happen start parsing interrupt
6120 		 * from the last not overwritten vector (wptr + 16). Hopefully
6121 		 * this should allow us to catchup.
6122 		 */
6123 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6124 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6125 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6126 		tmp = RREG32(IH_RB_CNTL);
6127 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6128 		WREG32(IH_RB_CNTL, tmp);
6129 	}
6130 	return (wptr & rdev->ih.ptr_mask);
6131 }
6132 
6133 /*        SI IV Ring
6134  * Each IV ring entry is 128 bits:
6135  * [7:0]    - interrupt source id
6136  * [31:8]   - reserved
6137  * [59:32]  - interrupt source data
6138  * [63:60]  - reserved
6139  * [71:64]  - RINGID
6140  * [79:72]  - VMID
6141  * [127:80] - reserved
6142  */
6143 irqreturn_t si_irq_process(struct radeon_device *rdev)
6144 {
6145 	u32 wptr;
6146 	u32 rptr;
6147 	u32 src_id, src_data, ring_id;
6148 	u32 ring_index;
6149 	bool queue_hotplug = false;
6150 	bool queue_thermal = false;
6151 	u32 status, addr;
6152 
6153 	if (!rdev->ih.enabled || rdev->shutdown)
6154 		return IRQ_NONE;
6155 
6156 	wptr = si_get_ih_wptr(rdev);
6157 
6158 restart_ih:
6159 	/* is somebody else already processing irqs? */
6160 	if (atomic_xchg(&rdev->ih.lock, 1))
6161 		return IRQ_NONE;
6162 
6163 	rptr = rdev->ih.rptr;
6164 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6165 
6166 	/* Order reading of wptr vs. reading of IH ring data */
6167 	rmb();
6168 
6169 	/* display interrupts */
6170 	si_irq_ack(rdev);
6171 
6172 	while (rptr != wptr) {
6173 		/* wptr/rptr are in bytes! */
6174 		ring_index = rptr / 4;
6175 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6176 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6177 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6178 
6179 		switch (src_id) {
6180 		case 1: /* D1 vblank/vline */
6181 			switch (src_data) {
6182 			case 0: /* D1 vblank */
6183 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6184 					if (rdev->irq.crtc_vblank_int[0]) {
6185 						drm_handle_vblank(rdev->ddev, 0);
6186 						rdev->pm.vblank_sync = true;
6187 						wake_up(&rdev->irq.vblank_queue);
6188 					}
6189 					if (atomic_read(&rdev->irq.pflip[0]))
6190 						radeon_crtc_handle_vblank(rdev, 0);
6191 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6192 					DRM_DEBUG("IH: D1 vblank\n");
6193 				}
6194 				break;
6195 			case 1: /* D1 vline */
6196 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6197 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6198 					DRM_DEBUG("IH: D1 vline\n");
6199 				}
6200 				break;
6201 			default:
6202 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6203 				break;
6204 			}
6205 			break;
6206 		case 2: /* D2 vblank/vline */
6207 			switch (src_data) {
6208 			case 0: /* D2 vblank */
6209 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6210 					if (rdev->irq.crtc_vblank_int[1]) {
6211 						drm_handle_vblank(rdev->ddev, 1);
6212 						rdev->pm.vblank_sync = true;
6213 						wake_up(&rdev->irq.vblank_queue);
6214 					}
6215 					if (atomic_read(&rdev->irq.pflip[1]))
6216 						radeon_crtc_handle_vblank(rdev, 1);
6217 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6218 					DRM_DEBUG("IH: D2 vblank\n");
6219 				}
6220 				break;
6221 			case 1: /* D2 vline */
6222 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6223 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6224 					DRM_DEBUG("IH: D2 vline\n");
6225 				}
6226 				break;
6227 			default:
6228 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6229 				break;
6230 			}
6231 			break;
6232 		case 3: /* D3 vblank/vline */
6233 			switch (src_data) {
6234 			case 0: /* D3 vblank */
6235 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6236 					if (rdev->irq.crtc_vblank_int[2]) {
6237 						drm_handle_vblank(rdev->ddev, 2);
6238 						rdev->pm.vblank_sync = true;
6239 						wake_up(&rdev->irq.vblank_queue);
6240 					}
6241 					if (atomic_read(&rdev->irq.pflip[2]))
6242 						radeon_crtc_handle_vblank(rdev, 2);
6243 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6244 					DRM_DEBUG("IH: D3 vblank\n");
6245 				}
6246 				break;
6247 			case 1: /* D3 vline */
6248 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6249 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6250 					DRM_DEBUG("IH: D3 vline\n");
6251 				}
6252 				break;
6253 			default:
6254 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6255 				break;
6256 			}
6257 			break;
6258 		case 4: /* D4 vblank/vline */
6259 			switch (src_data) {
6260 			case 0: /* D4 vblank */
6261 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6262 					if (rdev->irq.crtc_vblank_int[3]) {
6263 						drm_handle_vblank(rdev->ddev, 3);
6264 						rdev->pm.vblank_sync = true;
6265 						wake_up(&rdev->irq.vblank_queue);
6266 					}
6267 					if (atomic_read(&rdev->irq.pflip[3]))
6268 						radeon_crtc_handle_vblank(rdev, 3);
6269 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6270 					DRM_DEBUG("IH: D4 vblank\n");
6271 				}
6272 				break;
6273 			case 1: /* D4 vline */
6274 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6275 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6276 					DRM_DEBUG("IH: D4 vline\n");
6277 				}
6278 				break;
6279 			default:
6280 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6281 				break;
6282 			}
6283 			break;
6284 		case 5: /* D5 vblank/vline */
6285 			switch (src_data) {
6286 			case 0: /* D5 vblank */
6287 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6288 					if (rdev->irq.crtc_vblank_int[4]) {
6289 						drm_handle_vblank(rdev->ddev, 4);
6290 						rdev->pm.vblank_sync = true;
6291 						wake_up(&rdev->irq.vblank_queue);
6292 					}
6293 					if (atomic_read(&rdev->irq.pflip[4]))
6294 						radeon_crtc_handle_vblank(rdev, 4);
6295 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6296 					DRM_DEBUG("IH: D5 vblank\n");
6297 				}
6298 				break;
6299 			case 1: /* D5 vline */
6300 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6301 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6302 					DRM_DEBUG("IH: D5 vline\n");
6303 				}
6304 				break;
6305 			default:
6306 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6307 				break;
6308 			}
6309 			break;
6310 		case 6: /* D6 vblank/vline */
6311 			switch (src_data) {
6312 			case 0: /* D6 vblank */
6313 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6314 					if (rdev->irq.crtc_vblank_int[5]) {
6315 						drm_handle_vblank(rdev->ddev, 5);
6316 						rdev->pm.vblank_sync = true;
6317 						wake_up(&rdev->irq.vblank_queue);
6318 					}
6319 					if (atomic_read(&rdev->irq.pflip[5]))
6320 						radeon_crtc_handle_vblank(rdev, 5);
6321 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6322 					DRM_DEBUG("IH: D6 vblank\n");
6323 				}
6324 				break;
6325 			case 1: /* D6 vline */
6326 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6327 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6328 					DRM_DEBUG("IH: D6 vline\n");
6329 				}
6330 				break;
6331 			default:
6332 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6333 				break;
6334 			}
6335 			break;
6336 		case 8: /* D1 page flip */
6337 		case 10: /* D2 page flip */
6338 		case 12: /* D3 page flip */
6339 		case 14: /* D4 page flip */
6340 		case 16: /* D5 page flip */
6341 		case 18: /* D6 page flip */
6342 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6343 			if (radeon_use_pflipirq > 0)
6344 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6345 			break;
6346 		case 42: /* HPD hotplug */
6347 			switch (src_data) {
6348 			case 0:
6349 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6350 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6351 					queue_hotplug = true;
6352 					DRM_DEBUG("IH: HPD1\n");
6353 				}
6354 				break;
6355 			case 1:
6356 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6357 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6358 					queue_hotplug = true;
6359 					DRM_DEBUG("IH: HPD2\n");
6360 				}
6361 				break;
6362 			case 2:
6363 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6364 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6365 					queue_hotplug = true;
6366 					DRM_DEBUG("IH: HPD3\n");
6367 				}
6368 				break;
6369 			case 3:
6370 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6371 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6372 					queue_hotplug = true;
6373 					DRM_DEBUG("IH: HPD4\n");
6374 				}
6375 				break;
6376 			case 4:
6377 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6378 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6379 					queue_hotplug = true;
6380 					DRM_DEBUG("IH: HPD5\n");
6381 				}
6382 				break;
6383 			case 5:
6384 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6385 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6386 					queue_hotplug = true;
6387 					DRM_DEBUG("IH: HPD6\n");
6388 				}
6389 				break;
6390 			default:
6391 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6392 				break;
6393 			}
6394 			break;
6395 		case 124: /* UVD */
6396 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6397 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6398 			break;
6399 		case 146:
6400 		case 147:
6401 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6402 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6403 			/* reset addr and status */
6404 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6405 			if (addr == 0x0 && status == 0x0)
6406 				break;
6407 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6408 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6409 				addr);
6410 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6411 				status);
6412 			si_vm_decode_fault(rdev, status, addr);
6413 			break;
6414 		case 176: /* RINGID0 CP_INT */
6415 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6416 			break;
6417 		case 177: /* RINGID1 CP_INT */
6418 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6419 			break;
6420 		case 178: /* RINGID2 CP_INT */
6421 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6422 			break;
6423 		case 181: /* CP EOP event */
6424 			DRM_DEBUG("IH: CP EOP\n");
6425 			switch (ring_id) {
6426 			case 0:
6427 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6428 				break;
6429 			case 1:
6430 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6431 				break;
6432 			case 2:
6433 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6434 				break;
6435 			}
6436 			break;
6437 		case 224: /* DMA trap event */
6438 			DRM_DEBUG("IH: DMA trap\n");
6439 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6440 			break;
6441 		case 230: /* thermal low to high */
6442 			DRM_DEBUG("IH: thermal low to high\n");
6443 			rdev->pm.dpm.thermal.high_to_low = false;
6444 			queue_thermal = true;
6445 			break;
6446 		case 231: /* thermal high to low */
6447 			DRM_DEBUG("IH: thermal high to low\n");
6448 			rdev->pm.dpm.thermal.high_to_low = true;
6449 			queue_thermal = true;
6450 			break;
6451 		case 233: /* GUI IDLE */
6452 			DRM_DEBUG("IH: GUI idle\n");
6453 			break;
6454 		case 244: /* DMA trap event */
6455 			DRM_DEBUG("IH: DMA1 trap\n");
6456 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6457 			break;
6458 		default:
6459 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6460 			break;
6461 		}
6462 
6463 		/* wptr/rptr are in bytes! */
6464 		rptr += 16;
6465 		rptr &= rdev->ih.ptr_mask;
6466 		WREG32(IH_RB_RPTR, rptr);
6467 	}
6468 	if (queue_hotplug)
6469 		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
6470 	if (queue_thermal && rdev->pm.dpm_enabled)
6471 		taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
6472 	rdev->ih.rptr = rptr;
6473 	atomic_set(&rdev->ih.lock, 0);
6474 
6475 	/* make sure wptr hasn't changed while processing */
6476 	wptr = si_get_ih_wptr(rdev);
6477 	if (wptr != rptr)
6478 		goto restart_ih;
6479 
6480 	return IRQ_HANDLED;
6481 }
6482 
6483 /*
6484  * startup/shutdown callbacks
6485  */
6486 static int si_startup(struct radeon_device *rdev)
6487 {
6488 	struct radeon_ring *ring;
6489 	int r;
6490 
6491 	/* enable pcie gen2/3 link */
6492 	si_pcie_gen3_enable(rdev);
6493 	/* enable aspm */
6494 	si_program_aspm(rdev);
6495 
6496 	/* scratch needs to be initialized before MC */
6497 	r = r600_vram_scratch_init(rdev);
6498 	if (r)
6499 		return r;
6500 
6501 	si_mc_program(rdev);
6502 
6503 	if (!rdev->pm.dpm_enabled) {
6504 		r = si_mc_load_microcode(rdev);
6505 		if (r) {
6506 			DRM_ERROR("Failed to load MC firmware!\n");
6507 			return r;
6508 		}
6509 	}
6510 
6511 	r = si_pcie_gart_enable(rdev);
6512 	if (r)
6513 		return r;
6514 	si_gpu_init(rdev);
6515 
6516 	/* allocate rlc buffers */
6517 	if (rdev->family == CHIP_VERDE) {
6518 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6519 		rdev->rlc.reg_list_size =
6520 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6521 	}
6522 	rdev->rlc.cs_data = si_cs_data;
6523 	r = sumo_rlc_init(rdev);
6524 	if (r) {
6525 		DRM_ERROR("Failed to init rlc BOs!\n");
6526 		return r;
6527 	}
6528 
6529 	/* allocate wb buffer */
6530 	r = radeon_wb_init(rdev);
6531 	if (r)
6532 		return r;
6533 
6534 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6535 	if (r) {
6536 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6537 		return r;
6538 	}
6539 
6540 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6541 	if (r) {
6542 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6543 		return r;
6544 	}
6545 
6546 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6547 	if (r) {
6548 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6549 		return r;
6550 	}
6551 
6552 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6553 	if (r) {
6554 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6555 		return r;
6556 	}
6557 
6558 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6559 	if (r) {
6560 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6561 		return r;
6562 	}
6563 
6564 	if (rdev->has_uvd) {
6565 		r = uvd_v2_2_resume(rdev);
6566 		if (!r) {
6567 			r = radeon_fence_driver_start_ring(rdev,
6568 							   R600_RING_TYPE_UVD_INDEX);
6569 			if (r)
6570 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6571 		}
6572 		if (r)
6573 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6574 	}
6575 
6576 	/* Enable IRQ */
6577 	if (!rdev->irq.installed) {
6578 		r = radeon_irq_kms_init(rdev);
6579 		if (r)
6580 			return r;
6581 	}
6582 
6583 	r = si_irq_init(rdev);
6584 	if (r) {
6585 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6586 		radeon_irq_kms_fini(rdev);
6587 		return r;
6588 	}
6589 	si_irq_set(rdev);
6590 
6591 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6592 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6593 			     RADEON_CP_PACKET2);
6594 	if (r)
6595 		return r;
6596 
6597 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6598 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6599 			     RADEON_CP_PACKET2);
6600 	if (r)
6601 		return r;
6602 
6603 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6604 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6605 			     RADEON_CP_PACKET2);
6606 	if (r)
6607 		return r;
6608 
6609 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6610 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6611 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6612 	if (r)
6613 		return r;
6614 
6615 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6616 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6617 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6618 	if (r)
6619 		return r;
6620 
6621 	r = si_cp_load_microcode(rdev);
6622 	if (r)
6623 		return r;
6624 	r = si_cp_resume(rdev);
6625 	if (r)
6626 		return r;
6627 
6628 	r = cayman_dma_resume(rdev);
6629 	if (r)
6630 		return r;
6631 
6632 	if (rdev->has_uvd) {
6633 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6634 		if (ring->ring_size) {
6635 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6636 					     RADEON_CP_PACKET2);
6637 			if (!r)
6638 				r = uvd_v1_0_init(rdev);
6639 			if (r)
6640 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6641 		}
6642 	}
6643 
6644 	r = radeon_ib_pool_init(rdev);
6645 	if (r) {
6646 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6647 		return r;
6648 	}
6649 
6650 	r = radeon_vm_manager_init(rdev);
6651 	if (r) {
6652 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6653 		return r;
6654 	}
6655 
6656 	r = dce6_audio_init(rdev);
6657 	if (r)
6658 		return r;
6659 
6660 	return 0;
6661 }
6662 
6663 int si_resume(struct radeon_device *rdev)
6664 {
6665 	int r;
6666 
6667 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6668 	 * posting will perform necessary task to bring back GPU into good
6669 	 * shape.
6670 	 */
6671 	/* post card */
6672 	atom_asic_init(rdev->mode_info.atom_context);
6673 
6674 	/* init golden registers */
6675 	si_init_golden_registers(rdev);
6676 
6677 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6678 		radeon_pm_resume(rdev);
6679 
6680 	rdev->accel_working = true;
6681 	r = si_startup(rdev);
6682 	if (r) {
6683 		DRM_ERROR("si startup failed on resume\n");
6684 		rdev->accel_working = false;
6685 		return r;
6686 	}
6687 
6688 	return r;
6689 
6690 }
6691 
6692 int si_suspend(struct radeon_device *rdev)
6693 {
6694 	radeon_pm_suspend(rdev);
6695 	dce6_audio_fini(rdev);
6696 	radeon_vm_manager_fini(rdev);
6697 	si_cp_enable(rdev, false);
6698 	cayman_dma_stop(rdev);
6699 	if (rdev->has_uvd) {
6700 		uvd_v1_0_fini(rdev);
6701 		radeon_uvd_suspend(rdev);
6702 	}
6703 	si_fini_pg(rdev);
6704 	si_fini_cg(rdev);
6705 	si_irq_suspend(rdev);
6706 	radeon_wb_disable(rdev);
6707 	si_pcie_gart_disable(rdev);
6708 	return 0;
6709 }
6710 
6711 /* Plan is to move initialization in that function and use
6712  * helper function so that radeon_device_init pretty much
6713  * do nothing more than calling asic specific function. This
6714  * should also allow to remove a bunch of callback function
6715  * like vram_info.
6716  */
6717 int si_init(struct radeon_device *rdev)
6718 {
6719 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6720 	int r;
6721 
6722 	/* Read BIOS */
6723 	if (!radeon_get_bios(rdev)) {
6724 		if (ASIC_IS_AVIVO(rdev))
6725 			return -EINVAL;
6726 	}
6727 	/* Must be an ATOMBIOS */
6728 	if (!rdev->is_atom_bios) {
6729 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6730 		return -EINVAL;
6731 	}
6732 	r = radeon_atombios_init(rdev);
6733 	if (r)
6734 		return r;
6735 
6736 	/* Post card if necessary */
6737 	if (!radeon_card_posted(rdev)) {
6738 		if (!rdev->bios) {
6739 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6740 			return -EINVAL;
6741 		}
6742 		DRM_INFO("GPU not posted. posting now...\n");
6743 		atom_asic_init(rdev->mode_info.atom_context);
6744 	}
6745 	/* init golden registers */
6746 	si_init_golden_registers(rdev);
6747 	/* Initialize scratch registers */
6748 	si_scratch_init(rdev);
6749 	/* Initialize surface registers */
6750 	radeon_surface_init(rdev);
6751 	/* Initialize clocks */
6752 	radeon_get_clock_info(rdev->ddev);
6753 
6754 	/* Fence driver */
6755 	r = radeon_fence_driver_init(rdev);
6756 	if (r)
6757 		return r;
6758 
6759 	/* initialize memory controller */
6760 	r = si_mc_init(rdev);
6761 	if (r)
6762 		return r;
6763 	/* Memory manager */
6764 	r = radeon_bo_init(rdev);
6765 	if (r)
6766 		return r;
6767 
6768 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6769 	    !rdev->rlc_fw || !rdev->mc_fw) {
6770 		r = si_init_microcode(rdev);
6771 		if (r) {
6772 			DRM_ERROR("Failed to load firmware!\n");
6773 			return r;
6774 		}
6775 	}
6776 
6777 	/* Initialize power management */
6778 	radeon_pm_init(rdev);
6779 
6780 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6781 	ring->ring_obj = NULL;
6782 	r600_ring_init(rdev, ring, 1024 * 1024);
6783 
6784 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6785 	ring->ring_obj = NULL;
6786 	r600_ring_init(rdev, ring, 1024 * 1024);
6787 
6788 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6789 	ring->ring_obj = NULL;
6790 	r600_ring_init(rdev, ring, 1024 * 1024);
6791 
6792 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6793 	ring->ring_obj = NULL;
6794 	r600_ring_init(rdev, ring, 64 * 1024);
6795 
6796 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6797 	ring->ring_obj = NULL;
6798 	r600_ring_init(rdev, ring, 64 * 1024);
6799 
6800 	if (rdev->has_uvd) {
6801 		r = radeon_uvd_init(rdev);
6802 		if (!r) {
6803 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6804 			ring->ring_obj = NULL;
6805 			r600_ring_init(rdev, ring, 4096);
6806 		}
6807 	}
6808 
6809 	rdev->ih.ring_obj = NULL;
6810 	r600_ih_ring_init(rdev, 64 * 1024);
6811 
6812 	r = r600_pcie_gart_init(rdev);
6813 	if (r)
6814 		return r;
6815 
6816 	rdev->accel_working = true;
6817 	r = si_startup(rdev);
6818 	if (r) {
6819 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6820 		si_cp_fini(rdev);
6821 		cayman_dma_fini(rdev);
6822 		si_irq_fini(rdev);
6823 		sumo_rlc_fini(rdev);
6824 		radeon_wb_fini(rdev);
6825 		radeon_ib_pool_fini(rdev);
6826 		radeon_vm_manager_fini(rdev);
6827 		radeon_irq_kms_fini(rdev);
6828 		si_pcie_gart_fini(rdev);
6829 		rdev->accel_working = false;
6830 	}
6831 
6832 	/* Don't start up if the MC ucode is missing.
6833 	 * The default clocks and voltages before the MC ucode
6834 	 * is loaded are not suffient for advanced operations.
6835 	 */
6836 	if (!rdev->mc_fw) {
6837 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6838 		return -EINVAL;
6839 	}
6840 
6841 	return 0;
6842 }
6843 
6844 void si_fini(struct radeon_device *rdev)
6845 {
6846 	radeon_pm_fini(rdev);
6847 	si_cp_fini(rdev);
6848 	cayman_dma_fini(rdev);
6849 	si_fini_pg(rdev);
6850 	si_fini_cg(rdev);
6851 	si_irq_fini(rdev);
6852 	sumo_rlc_fini(rdev);
6853 	radeon_wb_fini(rdev);
6854 	radeon_vm_manager_fini(rdev);
6855 	radeon_ib_pool_fini(rdev);
6856 	radeon_irq_kms_fini(rdev);
6857 	if (rdev->has_uvd) {
6858 		uvd_v1_0_fini(rdev);
6859 		radeon_uvd_fini(rdev);
6860 	}
6861 	si_pcie_gart_fini(rdev);
6862 	r600_vram_scratch_fini(rdev);
6863 	radeon_gem_fini(rdev);
6864 	radeon_fence_driver_fini(rdev);
6865 	radeon_bo_fini(rdev);
6866 	radeon_atombios_fini(rdev);
6867 	si_fini_microcode(rdev);
6868 	kfree(rdev->bios);
6869 	rdev->bios = NULL;
6870 }
6871 
6872 /**
6873  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6874  *
6875  * @rdev: radeon_device pointer
6876  *
6877  * Fetches a GPU clock counter snapshot (SI).
6878  * Returns the 64 bit clock counter snapshot.
6879  */
6880 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6881 {
6882 	uint64_t clock;
6883 
6884 	spin_lock(&rdev->gpu_clock_mutex);
6885 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6886 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6887 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6888 	spin_unlock(&rdev->gpu_clock_mutex);
6889 	return clock;
6890 }
6891 
6892 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6893 {
6894 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6895 	int r;
6896 
6897 	/* bypass vclk and dclk with bclk */
6898 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6899 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6900 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6901 
6902 	/* put PLL in bypass mode */
6903 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6904 
6905 	if (!vclk || !dclk) {
6906 		/* keep the Bypass mode, put PLL to sleep */
6907 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6908 		return 0;
6909 	}
6910 
6911 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6912 					  16384, 0x03FFFFFF, 0, 128, 5,
6913 					  &fb_div, &vclk_div, &dclk_div);
6914 	if (r)
6915 		return r;
6916 
6917 	/* set RESET_ANTI_MUX to 0 */
6918 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6919 
6920 	/* set VCO_MODE to 1 */
6921 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6922 
6923 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6924 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6925 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6926 
6927 	/* deassert UPLL_RESET */
6928 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6929 
6930 	mdelay(1);
6931 
6932 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6933 	if (r)
6934 		return r;
6935 
6936 	/* assert UPLL_RESET again */
6937 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6938 
6939 	/* disable spread spectrum. */
6940 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6941 
6942 	/* set feedback divider */
6943 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6944 
6945 	/* set ref divider to 0 */
6946 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6947 
6948 	if (fb_div < 307200)
6949 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6950 	else
6951 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6952 
6953 	/* set PDIV_A and PDIV_B */
6954 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6955 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6956 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6957 
6958 	/* give the PLL some time to settle */
6959 	mdelay(15);
6960 
6961 	/* deassert PLL_RESET */
6962 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6963 
6964 	mdelay(15);
6965 
6966 	/* switch from bypass mode to normal mode */
6967 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6968 
6969 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6970 	if (r)
6971 		return r;
6972 
6973 	/* switch VCLK and DCLK selection */
6974 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6975 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6976 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6977 
6978 	mdelay(100);
6979 
6980 	return 0;
6981 }
6982 
6983 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6984 {
6985 	struct pci_dev *root = rdev->pdev->bus->self;
6986 	int bridge_pos, gpu_pos;
6987 	u32 speed_cntl, mask, current_data_rate;
6988 	int ret, i;
6989 	u16 tmp16;
6990 
6991 	if (radeon_pcie_gen2 == 0)
6992 		return;
6993 
6994 	if (rdev->flags & RADEON_IS_IGP)
6995 		return;
6996 
6997 	if (!(rdev->flags & RADEON_IS_PCIE))
6998 		return;
6999 
7000 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7001 	if (ret != 0)
7002 		return;
7003 
7004 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7005 		return;
7006 
7007 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7008 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7009 		LC_CURRENT_DATA_RATE_SHIFT;
7010 	if (mask & DRM_PCIE_SPEED_80) {
7011 		if (current_data_rate == 2) {
7012 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7013 			return;
7014 		}
7015 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7016 	} else if (mask & DRM_PCIE_SPEED_50) {
7017 		if (current_data_rate == 1) {
7018 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7019 			return;
7020 		}
7021 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7022 	}
7023 
7024 	bridge_pos = pci_get_pciecap_ptr(root->dev);
7025 	if (!bridge_pos)
7026 		return;
7027 
7028 	gpu_pos = pci_get_pciecap_ptr(rdev->pdev->dev);
7029 	if (!gpu_pos)
7030 		return;
7031 
7032 	if (mask & DRM_PCIE_SPEED_80) {
7033 		/* re-try equalization if gen3 is not already enabled */
7034 		if (current_data_rate != 2) {
7035 			u16 bridge_cfg, gpu_cfg;
7036 			u16 bridge_cfg2, gpu_cfg2;
7037 			u32 max_lw, current_lw, tmp;
7038 
7039 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7040 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7041 
7042 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7043 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7044 
7045 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7046 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7047 
7048 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7049 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7050 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7051 
7052 			if (current_lw < max_lw) {
7053 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7054 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7055 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7056 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7057 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7058 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7059 				}
7060 			}
7061 
7062 			for (i = 0; i < 10; i++) {
7063 				/* check status */
7064 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7065 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7066 					break;
7067 
7068 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7069 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7070 
7071 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7072 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7073 
7074 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7075 				tmp |= LC_SET_QUIESCE;
7076 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7077 
7078 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7079 				tmp |= LC_REDO_EQ;
7080 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7081 
7082 				mdelay(100);
7083 
7084 				/* linkctl */
7085 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7086 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7087 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7088 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7089 
7090 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7091 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7092 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7093 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7094 
7095 				/* linkctl2 */
7096 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7097 				tmp16 &= ~((1 << 4) | (7 << 9));
7098 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7099 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7100 
7101 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7102 				tmp16 &= ~((1 << 4) | (7 << 9));
7103 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7104 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7105 
7106 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7107 				tmp &= ~LC_SET_QUIESCE;
7108 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7109 			}
7110 		}
7111 	}
7112 
7113 	/* set the link speed */
7114 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7115 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7116 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7117 
7118 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7119 	tmp16 &= ~0xf;
7120 	if (mask & DRM_PCIE_SPEED_80)
7121 		tmp16 |= 3; /* gen3 */
7122 	else if (mask & DRM_PCIE_SPEED_50)
7123 		tmp16 |= 2; /* gen2 */
7124 	else
7125 		tmp16 |= 1; /* gen1 */
7126 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7127 
7128 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7129 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7130 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7131 
7132 	for (i = 0; i < rdev->usec_timeout; i++) {
7133 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7134 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7135 			break;
7136 		udelay(1);
7137 	}
7138 }
7139 
7140 static void si_program_aspm(struct radeon_device *rdev)
7141 {
7142 	u32 data, orig;
7143 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7144 	bool disable_clkreq = false;
7145 
7146 	if (radeon_aspm == 0)
7147 		return;
7148 
7149 	if (!(rdev->flags & RADEON_IS_PCIE))
7150 		return;
7151 
7152 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7153 	data &= ~LC_XMIT_N_FTS_MASK;
7154 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7155 	if (orig != data)
7156 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7157 
7158 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7159 	data |= LC_GO_TO_RECOVERY;
7160 	if (orig != data)
7161 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7162 
7163 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7164 	data |= P_IGNORE_EDB_ERR;
7165 	if (orig != data)
7166 		WREG32_PCIE(PCIE_P_CNTL, data);
7167 
7168 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7169 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7170 	data |= LC_PMI_TO_L1_DIS;
7171 	if (!disable_l0s)
7172 		data |= LC_L0S_INACTIVITY(7);
7173 
7174 	if (!disable_l1) {
7175 		data |= LC_L1_INACTIVITY(7);
7176 		data &= ~LC_PMI_TO_L1_DIS;
7177 		if (orig != data)
7178 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7179 
7180 		if (!disable_plloff_in_l1) {
7181 			bool clk_req_support;
7182 
7183 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7184 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7185 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7186 			if (orig != data)
7187 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7188 
7189 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7190 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7191 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7192 			if (orig != data)
7193 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7194 
7195 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7196 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7197 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7198 			if (orig != data)
7199 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7200 
7201 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7202 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7203 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7204 			if (orig != data)
7205 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7206 
7207 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7208 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7209 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7210 				if (orig != data)
7211 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7212 
7213 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7214 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7215 				if (orig != data)
7216 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7217 
7218 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7219 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7220 				if (orig != data)
7221 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7222 
7223 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7224 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7225 				if (orig != data)
7226 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7227 
7228 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7229 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7230 				if (orig != data)
7231 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7232 
7233 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7234 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7235 				if (orig != data)
7236 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7237 
7238 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7239 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7240 				if (orig != data)
7241 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7242 
7243 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7244 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7245 				if (orig != data)
7246 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7247 			}
7248 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7249 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7250 			data |= LC_DYN_LANES_PWR_STATE(3);
7251 			if (orig != data)
7252 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7253 
7254 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7255 			data &= ~LS2_EXIT_TIME_MASK;
7256 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7257 				data |= LS2_EXIT_TIME(5);
7258 			if (orig != data)
7259 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7260 
7261 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7262 			data &= ~LS2_EXIT_TIME_MASK;
7263 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7264 				data |= LS2_EXIT_TIME(5);
7265 			if (orig != data)
7266 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7267 
7268 			if (!disable_clkreq) {
7269 #ifdef zMN_TODO
7270 				struct pci_dev *root = rdev->pdev->bus->self;
7271 				u32 lnkcap;
7272 
7273 				clk_req_support = false;
7274 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7275 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7276 					clk_req_support = true;
7277 #else
7278 				clk_req_support = false;
7279 #endif
7280 			} else {
7281 				clk_req_support = false;
7282 			}
7283 
7284 			if (clk_req_support) {
7285 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7286 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7287 				if (orig != data)
7288 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7289 
7290 				orig = data = RREG32(THM_CLK_CNTL);
7291 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7292 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7293 				if (orig != data)
7294 					WREG32(THM_CLK_CNTL, data);
7295 
7296 				orig = data = RREG32(MISC_CLK_CNTL);
7297 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7298 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7299 				if (orig != data)
7300 					WREG32(MISC_CLK_CNTL, data);
7301 
7302 				orig = data = RREG32(CG_CLKPIN_CNTL);
7303 				data &= ~BCLK_AS_XCLK;
7304 				if (orig != data)
7305 					WREG32(CG_CLKPIN_CNTL, data);
7306 
7307 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7308 				data &= ~FORCE_BIF_REFCLK_EN;
7309 				if (orig != data)
7310 					WREG32(CG_CLKPIN_CNTL_2, data);
7311 
7312 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7313 				data &= ~MPLL_CLKOUT_SEL_MASK;
7314 				data |= MPLL_CLKOUT_SEL(4);
7315 				if (orig != data)
7316 					WREG32(MPLL_BYPASSCLK_SEL, data);
7317 
7318 				orig = data = RREG32(SPLL_CNTL_MODE);
7319 				data &= ~SPLL_REFCLK_SEL_MASK;
7320 				if (orig != data)
7321 					WREG32(SPLL_CNTL_MODE, data);
7322 			}
7323 		}
7324 	} else {
7325 		if (orig != data)
7326 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7327 	}
7328 
7329 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7330 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7331 	if (orig != data)
7332 		WREG32_PCIE(PCIE_CNTL2, data);
7333 
7334 	if (!disable_l0s) {
7335 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7336 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7337 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7338 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7339 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7340 				data &= ~LC_L0S_INACTIVITY_MASK;
7341 				if (orig != data)
7342 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7343 			}
7344 		}
7345 	}
7346 }
7347