xref: /dragonfly/sys/dev/drm/radeon/si.c (revision be09fc23)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  *
24  * $FreeBSD: head/sys/dev/drm2/radeon/si.c 254885 2013-08-25 19:37:15Z dumbbell $
25  */
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include <uapi_drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37 
38 
39 #define PCI_EXP_LNKCTL PCIER_LINKCTRL /* 16 */
40 #define PCI_EXP_LNKCTL2 48
41 #define PCI_EXP_LNKCTL_HAWD PCIEM_LNKCTL_HAWD /* 0x0200 */
42 #define PCI_EXP_DEVSTA PCIER_DEVSTS /* 10 */
43 #define PCI_EXP_DEVSTA_TRPND 0x0020
44 #define PCI_EXP_LNKCAP_CLKPM 0x00040000
45 
46 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
48 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
49 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
50 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
51 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
52 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
53 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
54 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
58 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
59 MODULE_FIRMWARE("radeon/VERDE_me.bin");
60 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
61 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
62 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
63 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
64 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
65 MODULE_FIRMWARE("radeon/OLAND_me.bin");
66 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
67 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
68 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
69 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
70 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
71 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
72 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
73 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
74 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
75 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
76 
77 static void si_pcie_gen3_enable(struct radeon_device *rdev);
78 static void si_program_aspm(struct radeon_device *rdev);
79 
80 static const u32 verde_rlc_save_restore_register_list[] =
81 {
82 	(0x8000 << 16) | (0x98f4 >> 2),
83 	0x00000000,
84 	(0x8040 << 16) | (0x98f4 >> 2),
85 	0x00000000,
86 	(0x8000 << 16) | (0xe80 >> 2),
87 	0x00000000,
88 	(0x8040 << 16) | (0xe80 >> 2),
89 	0x00000000,
90 	(0x8000 << 16) | (0x89bc >> 2),
91 	0x00000000,
92 	(0x8040 << 16) | (0x89bc >> 2),
93 	0x00000000,
94 	(0x8000 << 16) | (0x8c1c >> 2),
95 	0x00000000,
96 	(0x8040 << 16) | (0x8c1c >> 2),
97 	0x00000000,
98 	(0x9c00 << 16) | (0x98f0 >> 2),
99 	0x00000000,
100 	(0x9c00 << 16) | (0xe7c >> 2),
101 	0x00000000,
102 	(0x8000 << 16) | (0x9148 >> 2),
103 	0x00000000,
104 	(0x8040 << 16) | (0x9148 >> 2),
105 	0x00000000,
106 	(0x9c00 << 16) | (0x9150 >> 2),
107 	0x00000000,
108 	(0x9c00 << 16) | (0x897c >> 2),
109 	0x00000000,
110 	(0x9c00 << 16) | (0x8d8c >> 2),
111 	0x00000000,
112 	(0x9c00 << 16) | (0xac54 >> 2),
113 	0X00000000,
114 	0x3,
115 	(0x9c00 << 16) | (0x98f8 >> 2),
116 	0x00000000,
117 	(0x9c00 << 16) | (0x9910 >> 2),
118 	0x00000000,
119 	(0x9c00 << 16) | (0x9914 >> 2),
120 	0x00000000,
121 	(0x9c00 << 16) | (0x9918 >> 2),
122 	0x00000000,
123 	(0x9c00 << 16) | (0x991c >> 2),
124 	0x00000000,
125 	(0x9c00 << 16) | (0x9920 >> 2),
126 	0x00000000,
127 	(0x9c00 << 16) | (0x9924 >> 2),
128 	0x00000000,
129 	(0x9c00 << 16) | (0x9928 >> 2),
130 	0x00000000,
131 	(0x9c00 << 16) | (0x992c >> 2),
132 	0x00000000,
133 	(0x9c00 << 16) | (0x9930 >> 2),
134 	0x00000000,
135 	(0x9c00 << 16) | (0x9934 >> 2),
136 	0x00000000,
137 	(0x9c00 << 16) | (0x9938 >> 2),
138 	0x00000000,
139 	(0x9c00 << 16) | (0x993c >> 2),
140 	0x00000000,
141 	(0x9c00 << 16) | (0x9940 >> 2),
142 	0x00000000,
143 	(0x9c00 << 16) | (0x9944 >> 2),
144 	0x00000000,
145 	(0x9c00 << 16) | (0x9948 >> 2),
146 	0x00000000,
147 	(0x9c00 << 16) | (0x994c >> 2),
148 	0x00000000,
149 	(0x9c00 << 16) | (0x9950 >> 2),
150 	0x00000000,
151 	(0x9c00 << 16) | (0x9954 >> 2),
152 	0x00000000,
153 	(0x9c00 << 16) | (0x9958 >> 2),
154 	0x00000000,
155 	(0x9c00 << 16) | (0x995c >> 2),
156 	0x00000000,
157 	(0x9c00 << 16) | (0x9960 >> 2),
158 	0x00000000,
159 	(0x9c00 << 16) | (0x9964 >> 2),
160 	0x00000000,
161 	(0x9c00 << 16) | (0x9968 >> 2),
162 	0x00000000,
163 	(0x9c00 << 16) | (0x996c >> 2),
164 	0x00000000,
165 	(0x9c00 << 16) | (0x9970 >> 2),
166 	0x00000000,
167 	(0x9c00 << 16) | (0x9974 >> 2),
168 	0x00000000,
169 	(0x9c00 << 16) | (0x9978 >> 2),
170 	0x00000000,
171 	(0x9c00 << 16) | (0x997c >> 2),
172 	0x00000000,
173 	(0x9c00 << 16) | (0x9980 >> 2),
174 	0x00000000,
175 	(0x9c00 << 16) | (0x9984 >> 2),
176 	0x00000000,
177 	(0x9c00 << 16) | (0x9988 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x998c >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x8c00 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x8c14 >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x8c04 >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x8c08 >> 2),
188 	0x00000000,
189 	(0x8000 << 16) | (0x9b7c >> 2),
190 	0x00000000,
191 	(0x8040 << 16) | (0x9b7c >> 2),
192 	0x00000000,
193 	(0x8000 << 16) | (0xe84 >> 2),
194 	0x00000000,
195 	(0x8040 << 16) | (0xe84 >> 2),
196 	0x00000000,
197 	(0x8000 << 16) | (0x89c0 >> 2),
198 	0x00000000,
199 	(0x8040 << 16) | (0x89c0 >> 2),
200 	0x00000000,
201 	(0x8000 << 16) | (0x914c >> 2),
202 	0x00000000,
203 	(0x8040 << 16) | (0x914c >> 2),
204 	0x00000000,
205 	(0x8000 << 16) | (0x8c20 >> 2),
206 	0x00000000,
207 	(0x8040 << 16) | (0x8c20 >> 2),
208 	0x00000000,
209 	(0x8000 << 16) | (0x9354 >> 2),
210 	0x00000000,
211 	(0x8040 << 16) | (0x9354 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9060 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9364 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9100 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x913c >> 2),
220 	0x00000000,
221 	(0x8000 << 16) | (0x90e0 >> 2),
222 	0x00000000,
223 	(0x8000 << 16) | (0x90e4 >> 2),
224 	0x00000000,
225 	(0x8000 << 16) | (0x90e8 >> 2),
226 	0x00000000,
227 	(0x8040 << 16) | (0x90e0 >> 2),
228 	0x00000000,
229 	(0x8040 << 16) | (0x90e4 >> 2),
230 	0x00000000,
231 	(0x8040 << 16) | (0x90e8 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x8bcc >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x8b24 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x88c4 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x8e50 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x8c0c >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x8e58 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8e5c >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x9508 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x950c >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0x9494 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0xac0c >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0xac10 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0xac14 >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0xae00 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0xac08 >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x88d4 >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0x88c8 >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0x88cc >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0x89b0 >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0x8b10 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x8a14 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x9830 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x9834 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x9838 >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x9a10 >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0x9870 >> 2),
284 	0x00000000,
285 	(0x8000 << 16) | (0x9874 >> 2),
286 	0x00000000,
287 	(0x8001 << 16) | (0x9870 >> 2),
288 	0x00000000,
289 	(0x8001 << 16) | (0x9874 >> 2),
290 	0x00000000,
291 	(0x8040 << 16) | (0x9870 >> 2),
292 	0x00000000,
293 	(0x8040 << 16) | (0x9874 >> 2),
294 	0x00000000,
295 	(0x8041 << 16) | (0x9870 >> 2),
296 	0x00000000,
297 	(0x8041 << 16) | (0x9874 >> 2),
298 	0x00000000,
299 	0x00000000
300 };
301 
302 static const u32 tahiti_golden_rlc_registers[] =
303 {
304 	0xc424, 0xffffffff, 0x00601005,
305 	0xc47c, 0xffffffff, 0x10104040,
306 	0xc488, 0xffffffff, 0x0100000a,
307 	0xc314, 0xffffffff, 0x00000800,
308 	0xc30c, 0xffffffff, 0x800000f4,
309 	0xf4a8, 0xffffffff, 0x00000000
310 };
311 
312 static const u32 tahiti_golden_registers[] =
313 {
314 	0x9a10, 0x00010000, 0x00018208,
315 	0x9830, 0xffffffff, 0x00000000,
316 	0x9834, 0xf00fffff, 0x00000400,
317 	0x9838, 0x0002021c, 0x00020200,
318 	0xc78, 0x00000080, 0x00000000,
319 	0xd030, 0x000300c0, 0x00800040,
320 	0xd830, 0x000300c0, 0x00800040,
321 	0x5bb0, 0x000000f0, 0x00000070,
322 	0x5bc0, 0x00200000, 0x50100000,
323 	0x7030, 0x31000311, 0x00000011,
324 	0x277c, 0x00000003, 0x000007ff,
325 	0x240c, 0x000007ff, 0x00000000,
326 	0x8a14, 0xf000001f, 0x00000007,
327 	0x8b24, 0xffffffff, 0x00ffffff,
328 	0x8b10, 0x0000ff0f, 0x00000000,
329 	0x28a4c, 0x07ffffff, 0x4e000000,
330 	0x28350, 0x3f3f3fff, 0x2a00126a,
331 	0x30, 0x000000ff, 0x0040,
332 	0x34, 0x00000040, 0x00004040,
333 	0x9100, 0x07ffffff, 0x03000000,
334 	0x8e88, 0x01ff1f3f, 0x00000000,
335 	0x8e84, 0x01ff1f3f, 0x00000000,
336 	0x9060, 0x0000007f, 0x00000020,
337 	0x9508, 0x00010000, 0x00010000,
338 	0xac14, 0x00000200, 0x000002fb,
339 	0xac10, 0xffffffff, 0x0000543b,
340 	0xac0c, 0xffffffff, 0xa9210876,
341 	0x88d0, 0xffffffff, 0x000fff40,
342 	0x88d4, 0x0000001f, 0x00000010,
343 	0x1410, 0x20000000, 0x20fffed8,
344 	0x15c0, 0x000c0fc0, 0x000c0400
345 };
346 
347 static const u32 tahiti_golden_registers2[] =
348 {
349 	0xc64, 0x00000001, 0x00000001
350 };
351 
352 static const u32 pitcairn_golden_rlc_registers[] =
353 {
354 	0xc424, 0xffffffff, 0x00601004,
355 	0xc47c, 0xffffffff, 0x10102020,
356 	0xc488, 0xffffffff, 0x01000020,
357 	0xc314, 0xffffffff, 0x00000800,
358 	0xc30c, 0xffffffff, 0x800000a4
359 };
360 
361 static const u32 pitcairn_golden_registers[] =
362 {
363 	0x9a10, 0x00010000, 0x00018208,
364 	0x9830, 0xffffffff, 0x00000000,
365 	0x9834, 0xf00fffff, 0x00000400,
366 	0x9838, 0x0002021c, 0x00020200,
367 	0xc78, 0x00000080, 0x00000000,
368 	0xd030, 0x000300c0, 0x00800040,
369 	0xd830, 0x000300c0, 0x00800040,
370 	0x5bb0, 0x000000f0, 0x00000070,
371 	0x5bc0, 0x00200000, 0x50100000,
372 	0x7030, 0x31000311, 0x00000011,
373 	0x2ae4, 0x00073ffe, 0x000022a2,
374 	0x240c, 0x000007ff, 0x00000000,
375 	0x8a14, 0xf000001f, 0x00000007,
376 	0x8b24, 0xffffffff, 0x00ffffff,
377 	0x8b10, 0x0000ff0f, 0x00000000,
378 	0x28a4c, 0x07ffffff, 0x4e000000,
379 	0x28350, 0x3f3f3fff, 0x2a00126a,
380 	0x30, 0x000000ff, 0x0040,
381 	0x34, 0x00000040, 0x00004040,
382 	0x9100, 0x07ffffff, 0x03000000,
383 	0x9060, 0x0000007f, 0x00000020,
384 	0x9508, 0x00010000, 0x00010000,
385 	0xac14, 0x000003ff, 0x000000f7,
386 	0xac10, 0xffffffff, 0x00000000,
387 	0xac0c, 0xffffffff, 0x32761054,
388 	0x88d4, 0x0000001f, 0x00000010,
389 	0x15c0, 0x000c0fc0, 0x000c0400
390 };
391 
392 static const u32 verde_golden_rlc_registers[] =
393 {
394 	0xc424, 0xffffffff, 0x033f1005,
395 	0xc47c, 0xffffffff, 0x10808020,
396 	0xc488, 0xffffffff, 0x00800008,
397 	0xc314, 0xffffffff, 0x00001000,
398 	0xc30c, 0xffffffff, 0x80010014
399 };
400 
401 static const u32 verde_golden_registers[] =
402 {
403 	0x9a10, 0x00010000, 0x00018208,
404 	0x9830, 0xffffffff, 0x00000000,
405 	0x9834, 0xf00fffff, 0x00000400,
406 	0x9838, 0x0002021c, 0x00020200,
407 	0xc78, 0x00000080, 0x00000000,
408 	0xd030, 0x000300c0, 0x00800040,
409 	0xd030, 0x000300c0, 0x00800040,
410 	0xd830, 0x000300c0, 0x00800040,
411 	0xd830, 0x000300c0, 0x00800040,
412 	0x5bb0, 0x000000f0, 0x00000070,
413 	0x5bc0, 0x00200000, 0x50100000,
414 	0x7030, 0x31000311, 0x00000011,
415 	0x2ae4, 0x00073ffe, 0x000022a2,
416 	0x2ae4, 0x00073ffe, 0x000022a2,
417 	0x2ae4, 0x00073ffe, 0x000022a2,
418 	0x240c, 0x000007ff, 0x00000000,
419 	0x240c, 0x000007ff, 0x00000000,
420 	0x240c, 0x000007ff, 0x00000000,
421 	0x8a14, 0xf000001f, 0x00000007,
422 	0x8a14, 0xf000001f, 0x00000007,
423 	0x8a14, 0xf000001f, 0x00000007,
424 	0x8b24, 0xffffffff, 0x00ffffff,
425 	0x8b10, 0x0000ff0f, 0x00000000,
426 	0x28a4c, 0x07ffffff, 0x4e000000,
427 	0x28350, 0x3f3f3fff, 0x0000124a,
428 	0x28350, 0x3f3f3fff, 0x0000124a,
429 	0x28350, 0x3f3f3fff, 0x0000124a,
430 	0x30, 0x000000ff, 0x0040,
431 	0x34, 0x00000040, 0x00004040,
432 	0x9100, 0x07ffffff, 0x03000000,
433 	0x9100, 0x07ffffff, 0x03000000,
434 	0x8e88, 0x01ff1f3f, 0x00000000,
435 	0x8e88, 0x01ff1f3f, 0x00000000,
436 	0x8e88, 0x01ff1f3f, 0x00000000,
437 	0x8e84, 0x01ff1f3f, 0x00000000,
438 	0x8e84, 0x01ff1f3f, 0x00000000,
439 	0x8e84, 0x01ff1f3f, 0x00000000,
440 	0x9060, 0x0000007f, 0x00000020,
441 	0x9508, 0x00010000, 0x00010000,
442 	0xac14, 0x000003ff, 0x00000003,
443 	0xac14, 0x000003ff, 0x00000003,
444 	0xac14, 0x000003ff, 0x00000003,
445 	0xac10, 0xffffffff, 0x00000000,
446 	0xac10, 0xffffffff, 0x00000000,
447 	0xac10, 0xffffffff, 0x00000000,
448 	0xac0c, 0xffffffff, 0x00001032,
449 	0xac0c, 0xffffffff, 0x00001032,
450 	0xac0c, 0xffffffff, 0x00001032,
451 	0x88d4, 0x0000001f, 0x00000010,
452 	0x88d4, 0x0000001f, 0x00000010,
453 	0x88d4, 0x0000001f, 0x00000010,
454 	0x15c0, 0x000c0fc0, 0x000c0400
455 };
456 
457 static const u32 oland_golden_rlc_registers[] =
458 {
459 	0xc424, 0xffffffff, 0x00601005,
460 	0xc47c, 0xffffffff, 0x10104040,
461 	0xc488, 0xffffffff, 0x0100000a,
462 	0xc314, 0xffffffff, 0x00000800,
463 	0xc30c, 0xffffffff, 0x800000f4
464 };
465 
466 static const u32 oland_golden_registers[] =
467 {
468 	0x9a10, 0x00010000, 0x00018208,
469 	0x9830, 0xffffffff, 0x00000000,
470 	0x9834, 0xf00fffff, 0x00000400,
471 	0x9838, 0x0002021c, 0x00020200,
472 	0xc78, 0x00000080, 0x00000000,
473 	0xd030, 0x000300c0, 0x00800040,
474 	0xd830, 0x000300c0, 0x00800040,
475 	0x5bb0, 0x000000f0, 0x00000070,
476 	0x5bc0, 0x00200000, 0x50100000,
477 	0x7030, 0x31000311, 0x00000011,
478 	0x2ae4, 0x00073ffe, 0x000022a2,
479 	0x240c, 0x000007ff, 0x00000000,
480 	0x8a14, 0xf000001f, 0x00000007,
481 	0x8b24, 0xffffffff, 0x00ffffff,
482 	0x8b10, 0x0000ff0f, 0x00000000,
483 	0x28a4c, 0x07ffffff, 0x4e000000,
484 	0x28350, 0x3f3f3fff, 0x00000082,
485 	0x30, 0x000000ff, 0x0040,
486 	0x34, 0x00000040, 0x00004040,
487 	0x9100, 0x07ffffff, 0x03000000,
488 	0x9060, 0x0000007f, 0x00000020,
489 	0x9508, 0x00010000, 0x00010000,
490 	0xac14, 0x000003ff, 0x000000f3,
491 	0xac10, 0xffffffff, 0x00000000,
492 	0xac0c, 0xffffffff, 0x00003210,
493 	0x88d4, 0x0000001f, 0x00000010,
494 	0x15c0, 0x000c0fc0, 0x000c0400
495 };
496 
497 static const u32 hainan_golden_registers[] =
498 {
499 	0x9a10, 0x00010000, 0x00018208,
500 	0x9830, 0xffffffff, 0x00000000,
501 	0x9834, 0xf00fffff, 0x00000400,
502 	0x9838, 0x0002021c, 0x00020200,
503 	0xd0c0, 0xff000fff, 0x00000100,
504 	0xd030, 0x000300c0, 0x00800040,
505 	0xd8c0, 0xff000fff, 0x00000100,
506 	0xd830, 0x000300c0, 0x00800040,
507 	0x2ae4, 0x00073ffe, 0x000022a2,
508 	0x240c, 0x000007ff, 0x00000000,
509 	0x8a14, 0xf000001f, 0x00000007,
510 	0x8b24, 0xffffffff, 0x00ffffff,
511 	0x8b10, 0x0000ff0f, 0x00000000,
512 	0x28a4c, 0x07ffffff, 0x4e000000,
513 	0x28350, 0x3f3f3fff, 0x00000000,
514 	0x30, 0x000000ff, 0x0040,
515 	0x34, 0x00000040, 0x00004040,
516 	0x9100, 0x03e00000, 0x03600000,
517 	0x9060, 0x0000007f, 0x00000020,
518 	0x9508, 0x00010000, 0x00010000,
519 	0xac14, 0x000003ff, 0x000000f1,
520 	0xac10, 0xffffffff, 0x00000000,
521 	0xac0c, 0xffffffff, 0x00003210,
522 	0x88d4, 0x0000001f, 0x00000010,
523 	0x15c0, 0x000c0fc0, 0x000c0400
524 };
525 
526 static const u32 hainan_golden_registers2[] =
527 {
528 	0x98f8, 0xffffffff, 0x02010001
529 };
530 
531 static const u32 tahiti_mgcg_cgcg_init[] =
532 {
533 	0xc400, 0xffffffff, 0xfffffffc,
534 	0x802c, 0xffffffff, 0xe0000000,
535 	0x9a60, 0xffffffff, 0x00000100,
536 	0x92a4, 0xffffffff, 0x00000100,
537 	0xc164, 0xffffffff, 0x00000100,
538 	0x9774, 0xffffffff, 0x00000100,
539 	0x8984, 0xffffffff, 0x06000100,
540 	0x8a18, 0xffffffff, 0x00000100,
541 	0x92a0, 0xffffffff, 0x00000100,
542 	0xc380, 0xffffffff, 0x00000100,
543 	0x8b28, 0xffffffff, 0x00000100,
544 	0x9144, 0xffffffff, 0x00000100,
545 	0x8d88, 0xffffffff, 0x00000100,
546 	0x8d8c, 0xffffffff, 0x00000100,
547 	0x9030, 0xffffffff, 0x00000100,
548 	0x9034, 0xffffffff, 0x00000100,
549 	0x9038, 0xffffffff, 0x00000100,
550 	0x903c, 0xffffffff, 0x00000100,
551 	0xad80, 0xffffffff, 0x00000100,
552 	0xac54, 0xffffffff, 0x00000100,
553 	0x897c, 0xffffffff, 0x06000100,
554 	0x9868, 0xffffffff, 0x00000100,
555 	0x9510, 0xffffffff, 0x00000100,
556 	0xaf04, 0xffffffff, 0x00000100,
557 	0xae04, 0xffffffff, 0x00000100,
558 	0x949c, 0xffffffff, 0x00000100,
559 	0x802c, 0xffffffff, 0xe0000000,
560 	0x9160, 0xffffffff, 0x00010000,
561 	0x9164, 0xffffffff, 0x00030002,
562 	0x9168, 0xffffffff, 0x00040007,
563 	0x916c, 0xffffffff, 0x00060005,
564 	0x9170, 0xffffffff, 0x00090008,
565 	0x9174, 0xffffffff, 0x00020001,
566 	0x9178, 0xffffffff, 0x00040003,
567 	0x917c, 0xffffffff, 0x00000007,
568 	0x9180, 0xffffffff, 0x00060005,
569 	0x9184, 0xffffffff, 0x00090008,
570 	0x9188, 0xffffffff, 0x00030002,
571 	0x918c, 0xffffffff, 0x00050004,
572 	0x9190, 0xffffffff, 0x00000008,
573 	0x9194, 0xffffffff, 0x00070006,
574 	0x9198, 0xffffffff, 0x000a0009,
575 	0x919c, 0xffffffff, 0x00040003,
576 	0x91a0, 0xffffffff, 0x00060005,
577 	0x91a4, 0xffffffff, 0x00000009,
578 	0x91a8, 0xffffffff, 0x00080007,
579 	0x91ac, 0xffffffff, 0x000b000a,
580 	0x91b0, 0xffffffff, 0x00050004,
581 	0x91b4, 0xffffffff, 0x00070006,
582 	0x91b8, 0xffffffff, 0x0008000b,
583 	0x91bc, 0xffffffff, 0x000a0009,
584 	0x91c0, 0xffffffff, 0x000d000c,
585 	0x91c4, 0xffffffff, 0x00060005,
586 	0x91c8, 0xffffffff, 0x00080007,
587 	0x91cc, 0xffffffff, 0x0000000b,
588 	0x91d0, 0xffffffff, 0x000a0009,
589 	0x91d4, 0xffffffff, 0x000d000c,
590 	0x91d8, 0xffffffff, 0x00070006,
591 	0x91dc, 0xffffffff, 0x00090008,
592 	0x91e0, 0xffffffff, 0x0000000c,
593 	0x91e4, 0xffffffff, 0x000b000a,
594 	0x91e8, 0xffffffff, 0x000e000d,
595 	0x91ec, 0xffffffff, 0x00080007,
596 	0x91f0, 0xffffffff, 0x000a0009,
597 	0x91f4, 0xffffffff, 0x0000000d,
598 	0x91f8, 0xffffffff, 0x000c000b,
599 	0x91fc, 0xffffffff, 0x000f000e,
600 	0x9200, 0xffffffff, 0x00090008,
601 	0x9204, 0xffffffff, 0x000b000a,
602 	0x9208, 0xffffffff, 0x000c000f,
603 	0x920c, 0xffffffff, 0x000e000d,
604 	0x9210, 0xffffffff, 0x00110010,
605 	0x9214, 0xffffffff, 0x000a0009,
606 	0x9218, 0xffffffff, 0x000c000b,
607 	0x921c, 0xffffffff, 0x0000000f,
608 	0x9220, 0xffffffff, 0x000e000d,
609 	0x9224, 0xffffffff, 0x00110010,
610 	0x9228, 0xffffffff, 0x000b000a,
611 	0x922c, 0xffffffff, 0x000d000c,
612 	0x9230, 0xffffffff, 0x00000010,
613 	0x9234, 0xffffffff, 0x000f000e,
614 	0x9238, 0xffffffff, 0x00120011,
615 	0x923c, 0xffffffff, 0x000c000b,
616 	0x9240, 0xffffffff, 0x000e000d,
617 	0x9244, 0xffffffff, 0x00000011,
618 	0x9248, 0xffffffff, 0x0010000f,
619 	0x924c, 0xffffffff, 0x00130012,
620 	0x9250, 0xffffffff, 0x000d000c,
621 	0x9254, 0xffffffff, 0x000f000e,
622 	0x9258, 0xffffffff, 0x00100013,
623 	0x925c, 0xffffffff, 0x00120011,
624 	0x9260, 0xffffffff, 0x00150014,
625 	0x9264, 0xffffffff, 0x000e000d,
626 	0x9268, 0xffffffff, 0x0010000f,
627 	0x926c, 0xffffffff, 0x00000013,
628 	0x9270, 0xffffffff, 0x00120011,
629 	0x9274, 0xffffffff, 0x00150014,
630 	0x9278, 0xffffffff, 0x000f000e,
631 	0x927c, 0xffffffff, 0x00110010,
632 	0x9280, 0xffffffff, 0x00000014,
633 	0x9284, 0xffffffff, 0x00130012,
634 	0x9288, 0xffffffff, 0x00160015,
635 	0x928c, 0xffffffff, 0x0010000f,
636 	0x9290, 0xffffffff, 0x00120011,
637 	0x9294, 0xffffffff, 0x00000015,
638 	0x9298, 0xffffffff, 0x00140013,
639 	0x929c, 0xffffffff, 0x00170016,
640 	0x9150, 0xffffffff, 0x96940200,
641 	0x8708, 0xffffffff, 0x00900100,
642 	0xc478, 0xffffffff, 0x00000080,
643 	0xc404, 0xffffffff, 0x0020003f,
644 	0x30, 0xffffffff, 0x0000001c,
645 	0x34, 0x000f0000, 0x000f0000,
646 	0x160c, 0xffffffff, 0x00000100,
647 	0x1024, 0xffffffff, 0x00000100,
648 	0x102c, 0x00000101, 0x00000000,
649 	0x20a8, 0xffffffff, 0x00000104,
650 	0x264c, 0x000c0000, 0x000c0000,
651 	0x2648, 0x000c0000, 0x000c0000,
652 	0x55e4, 0xff000fff, 0x00000100,
653 	0x55e8, 0x00000001, 0x00000001,
654 	0x2f50, 0x00000001, 0x00000001,
655 	0x30cc, 0xc0000fff, 0x00000104,
656 	0xc1e4, 0x00000001, 0x00000001,
657 	0xd0c0, 0xfffffff0, 0x00000100,
658 	0xd8c0, 0xfffffff0, 0x00000100
659 };
660 
661 static const u32 pitcairn_mgcg_cgcg_init[] =
662 {
663 	0xc400, 0xffffffff, 0xfffffffc,
664 	0x802c, 0xffffffff, 0xe0000000,
665 	0x9a60, 0xffffffff, 0x00000100,
666 	0x92a4, 0xffffffff, 0x00000100,
667 	0xc164, 0xffffffff, 0x00000100,
668 	0x9774, 0xffffffff, 0x00000100,
669 	0x8984, 0xffffffff, 0x06000100,
670 	0x8a18, 0xffffffff, 0x00000100,
671 	0x92a0, 0xffffffff, 0x00000100,
672 	0xc380, 0xffffffff, 0x00000100,
673 	0x8b28, 0xffffffff, 0x00000100,
674 	0x9144, 0xffffffff, 0x00000100,
675 	0x8d88, 0xffffffff, 0x00000100,
676 	0x8d8c, 0xffffffff, 0x00000100,
677 	0x9030, 0xffffffff, 0x00000100,
678 	0x9034, 0xffffffff, 0x00000100,
679 	0x9038, 0xffffffff, 0x00000100,
680 	0x903c, 0xffffffff, 0x00000100,
681 	0xad80, 0xffffffff, 0x00000100,
682 	0xac54, 0xffffffff, 0x00000100,
683 	0x897c, 0xffffffff, 0x06000100,
684 	0x9868, 0xffffffff, 0x00000100,
685 	0x9510, 0xffffffff, 0x00000100,
686 	0xaf04, 0xffffffff, 0x00000100,
687 	0xae04, 0xffffffff, 0x00000100,
688 	0x949c, 0xffffffff, 0x00000100,
689 	0x802c, 0xffffffff, 0xe0000000,
690 	0x9160, 0xffffffff, 0x00010000,
691 	0x9164, 0xffffffff, 0x00030002,
692 	0x9168, 0xffffffff, 0x00040007,
693 	0x916c, 0xffffffff, 0x00060005,
694 	0x9170, 0xffffffff, 0x00090008,
695 	0x9174, 0xffffffff, 0x00020001,
696 	0x9178, 0xffffffff, 0x00040003,
697 	0x917c, 0xffffffff, 0x00000007,
698 	0x9180, 0xffffffff, 0x00060005,
699 	0x9184, 0xffffffff, 0x00090008,
700 	0x9188, 0xffffffff, 0x00030002,
701 	0x918c, 0xffffffff, 0x00050004,
702 	0x9190, 0xffffffff, 0x00000008,
703 	0x9194, 0xffffffff, 0x00070006,
704 	0x9198, 0xffffffff, 0x000a0009,
705 	0x919c, 0xffffffff, 0x00040003,
706 	0x91a0, 0xffffffff, 0x00060005,
707 	0x91a4, 0xffffffff, 0x00000009,
708 	0x91a8, 0xffffffff, 0x00080007,
709 	0x91ac, 0xffffffff, 0x000b000a,
710 	0x91b0, 0xffffffff, 0x00050004,
711 	0x91b4, 0xffffffff, 0x00070006,
712 	0x91b8, 0xffffffff, 0x0008000b,
713 	0x91bc, 0xffffffff, 0x000a0009,
714 	0x91c0, 0xffffffff, 0x000d000c,
715 	0x9200, 0xffffffff, 0x00090008,
716 	0x9204, 0xffffffff, 0x000b000a,
717 	0x9208, 0xffffffff, 0x000c000f,
718 	0x920c, 0xffffffff, 0x000e000d,
719 	0x9210, 0xffffffff, 0x00110010,
720 	0x9214, 0xffffffff, 0x000a0009,
721 	0x9218, 0xffffffff, 0x000c000b,
722 	0x921c, 0xffffffff, 0x0000000f,
723 	0x9220, 0xffffffff, 0x000e000d,
724 	0x9224, 0xffffffff, 0x00110010,
725 	0x9228, 0xffffffff, 0x000b000a,
726 	0x922c, 0xffffffff, 0x000d000c,
727 	0x9230, 0xffffffff, 0x00000010,
728 	0x9234, 0xffffffff, 0x000f000e,
729 	0x9238, 0xffffffff, 0x00120011,
730 	0x923c, 0xffffffff, 0x000c000b,
731 	0x9240, 0xffffffff, 0x000e000d,
732 	0x9244, 0xffffffff, 0x00000011,
733 	0x9248, 0xffffffff, 0x0010000f,
734 	0x924c, 0xffffffff, 0x00130012,
735 	0x9250, 0xffffffff, 0x000d000c,
736 	0x9254, 0xffffffff, 0x000f000e,
737 	0x9258, 0xffffffff, 0x00100013,
738 	0x925c, 0xffffffff, 0x00120011,
739 	0x9260, 0xffffffff, 0x00150014,
740 	0x9150, 0xffffffff, 0x96940200,
741 	0x8708, 0xffffffff, 0x00900100,
742 	0xc478, 0xffffffff, 0x00000080,
743 	0xc404, 0xffffffff, 0x0020003f,
744 	0x30, 0xffffffff, 0x0000001c,
745 	0x34, 0x000f0000, 0x000f0000,
746 	0x160c, 0xffffffff, 0x00000100,
747 	0x1024, 0xffffffff, 0x00000100,
748 	0x102c, 0x00000101, 0x00000000,
749 	0x20a8, 0xffffffff, 0x00000104,
750 	0x55e4, 0xff000fff, 0x00000100,
751 	0x55e8, 0x00000001, 0x00000001,
752 	0x2f50, 0x00000001, 0x00000001,
753 	0x30cc, 0xc0000fff, 0x00000104,
754 	0xc1e4, 0x00000001, 0x00000001,
755 	0xd0c0, 0xfffffff0, 0x00000100,
756 	0xd8c0, 0xfffffff0, 0x00000100
757 };
758 
759 static const u32 verde_mgcg_cgcg_init[] =
760 {
761 	0xc400, 0xffffffff, 0xfffffffc,
762 	0x802c, 0xffffffff, 0xe0000000,
763 	0x9a60, 0xffffffff, 0x00000100,
764 	0x92a4, 0xffffffff, 0x00000100,
765 	0xc164, 0xffffffff, 0x00000100,
766 	0x9774, 0xffffffff, 0x00000100,
767 	0x8984, 0xffffffff, 0x06000100,
768 	0x8a18, 0xffffffff, 0x00000100,
769 	0x92a0, 0xffffffff, 0x00000100,
770 	0xc380, 0xffffffff, 0x00000100,
771 	0x8b28, 0xffffffff, 0x00000100,
772 	0x9144, 0xffffffff, 0x00000100,
773 	0x8d88, 0xffffffff, 0x00000100,
774 	0x8d8c, 0xffffffff, 0x00000100,
775 	0x9030, 0xffffffff, 0x00000100,
776 	0x9034, 0xffffffff, 0x00000100,
777 	0x9038, 0xffffffff, 0x00000100,
778 	0x903c, 0xffffffff, 0x00000100,
779 	0xad80, 0xffffffff, 0x00000100,
780 	0xac54, 0xffffffff, 0x00000100,
781 	0x897c, 0xffffffff, 0x06000100,
782 	0x9868, 0xffffffff, 0x00000100,
783 	0x9510, 0xffffffff, 0x00000100,
784 	0xaf04, 0xffffffff, 0x00000100,
785 	0xae04, 0xffffffff, 0x00000100,
786 	0x949c, 0xffffffff, 0x00000100,
787 	0x802c, 0xffffffff, 0xe0000000,
788 	0x9160, 0xffffffff, 0x00010000,
789 	0x9164, 0xffffffff, 0x00030002,
790 	0x9168, 0xffffffff, 0x00040007,
791 	0x916c, 0xffffffff, 0x00060005,
792 	0x9170, 0xffffffff, 0x00090008,
793 	0x9174, 0xffffffff, 0x00020001,
794 	0x9178, 0xffffffff, 0x00040003,
795 	0x917c, 0xffffffff, 0x00000007,
796 	0x9180, 0xffffffff, 0x00060005,
797 	0x9184, 0xffffffff, 0x00090008,
798 	0x9188, 0xffffffff, 0x00030002,
799 	0x918c, 0xffffffff, 0x00050004,
800 	0x9190, 0xffffffff, 0x00000008,
801 	0x9194, 0xffffffff, 0x00070006,
802 	0x9198, 0xffffffff, 0x000a0009,
803 	0x919c, 0xffffffff, 0x00040003,
804 	0x91a0, 0xffffffff, 0x00060005,
805 	0x91a4, 0xffffffff, 0x00000009,
806 	0x91a8, 0xffffffff, 0x00080007,
807 	0x91ac, 0xffffffff, 0x000b000a,
808 	0x91b0, 0xffffffff, 0x00050004,
809 	0x91b4, 0xffffffff, 0x00070006,
810 	0x91b8, 0xffffffff, 0x0008000b,
811 	0x91bc, 0xffffffff, 0x000a0009,
812 	0x91c0, 0xffffffff, 0x000d000c,
813 	0x9200, 0xffffffff, 0x00090008,
814 	0x9204, 0xffffffff, 0x000b000a,
815 	0x9208, 0xffffffff, 0x000c000f,
816 	0x920c, 0xffffffff, 0x000e000d,
817 	0x9210, 0xffffffff, 0x00110010,
818 	0x9214, 0xffffffff, 0x000a0009,
819 	0x9218, 0xffffffff, 0x000c000b,
820 	0x921c, 0xffffffff, 0x0000000f,
821 	0x9220, 0xffffffff, 0x000e000d,
822 	0x9224, 0xffffffff, 0x00110010,
823 	0x9228, 0xffffffff, 0x000b000a,
824 	0x922c, 0xffffffff, 0x000d000c,
825 	0x9230, 0xffffffff, 0x00000010,
826 	0x9234, 0xffffffff, 0x000f000e,
827 	0x9238, 0xffffffff, 0x00120011,
828 	0x923c, 0xffffffff, 0x000c000b,
829 	0x9240, 0xffffffff, 0x000e000d,
830 	0x9244, 0xffffffff, 0x00000011,
831 	0x9248, 0xffffffff, 0x0010000f,
832 	0x924c, 0xffffffff, 0x00130012,
833 	0x9250, 0xffffffff, 0x000d000c,
834 	0x9254, 0xffffffff, 0x000f000e,
835 	0x9258, 0xffffffff, 0x00100013,
836 	0x925c, 0xffffffff, 0x00120011,
837 	0x9260, 0xffffffff, 0x00150014,
838 	0x9150, 0xffffffff, 0x96940200,
839 	0x8708, 0xffffffff, 0x00900100,
840 	0xc478, 0xffffffff, 0x00000080,
841 	0xc404, 0xffffffff, 0x0020003f,
842 	0x30, 0xffffffff, 0x0000001c,
843 	0x34, 0x000f0000, 0x000f0000,
844 	0x160c, 0xffffffff, 0x00000100,
845 	0x1024, 0xffffffff, 0x00000100,
846 	0x102c, 0x00000101, 0x00000000,
847 	0x20a8, 0xffffffff, 0x00000104,
848 	0x264c, 0x000c0000, 0x000c0000,
849 	0x2648, 0x000c0000, 0x000c0000,
850 	0x55e4, 0xff000fff, 0x00000100,
851 	0x55e8, 0x00000001, 0x00000001,
852 	0x2f50, 0x00000001, 0x00000001,
853 	0x30cc, 0xc0000fff, 0x00000104,
854 	0xc1e4, 0x00000001, 0x00000001,
855 	0xd0c0, 0xfffffff0, 0x00000100,
856 	0xd8c0, 0xfffffff0, 0x00000100
857 };
858 
859 static const u32 oland_mgcg_cgcg_init[] =
860 {
861 	0xc400, 0xffffffff, 0xfffffffc,
862 	0x802c, 0xffffffff, 0xe0000000,
863 	0x9a60, 0xffffffff, 0x00000100,
864 	0x92a4, 0xffffffff, 0x00000100,
865 	0xc164, 0xffffffff, 0x00000100,
866 	0x9774, 0xffffffff, 0x00000100,
867 	0x8984, 0xffffffff, 0x06000100,
868 	0x8a18, 0xffffffff, 0x00000100,
869 	0x92a0, 0xffffffff, 0x00000100,
870 	0xc380, 0xffffffff, 0x00000100,
871 	0x8b28, 0xffffffff, 0x00000100,
872 	0x9144, 0xffffffff, 0x00000100,
873 	0x8d88, 0xffffffff, 0x00000100,
874 	0x8d8c, 0xffffffff, 0x00000100,
875 	0x9030, 0xffffffff, 0x00000100,
876 	0x9034, 0xffffffff, 0x00000100,
877 	0x9038, 0xffffffff, 0x00000100,
878 	0x903c, 0xffffffff, 0x00000100,
879 	0xad80, 0xffffffff, 0x00000100,
880 	0xac54, 0xffffffff, 0x00000100,
881 	0x897c, 0xffffffff, 0x06000100,
882 	0x9868, 0xffffffff, 0x00000100,
883 	0x9510, 0xffffffff, 0x00000100,
884 	0xaf04, 0xffffffff, 0x00000100,
885 	0xae04, 0xffffffff, 0x00000100,
886 	0x949c, 0xffffffff, 0x00000100,
887 	0x802c, 0xffffffff, 0xe0000000,
888 	0x9160, 0xffffffff, 0x00010000,
889 	0x9164, 0xffffffff, 0x00030002,
890 	0x9168, 0xffffffff, 0x00040007,
891 	0x916c, 0xffffffff, 0x00060005,
892 	0x9170, 0xffffffff, 0x00090008,
893 	0x9174, 0xffffffff, 0x00020001,
894 	0x9178, 0xffffffff, 0x00040003,
895 	0x917c, 0xffffffff, 0x00000007,
896 	0x9180, 0xffffffff, 0x00060005,
897 	0x9184, 0xffffffff, 0x00090008,
898 	0x9188, 0xffffffff, 0x00030002,
899 	0x918c, 0xffffffff, 0x00050004,
900 	0x9190, 0xffffffff, 0x00000008,
901 	0x9194, 0xffffffff, 0x00070006,
902 	0x9198, 0xffffffff, 0x000a0009,
903 	0x919c, 0xffffffff, 0x00040003,
904 	0x91a0, 0xffffffff, 0x00060005,
905 	0x91a4, 0xffffffff, 0x00000009,
906 	0x91a8, 0xffffffff, 0x00080007,
907 	0x91ac, 0xffffffff, 0x000b000a,
908 	0x91b0, 0xffffffff, 0x00050004,
909 	0x91b4, 0xffffffff, 0x00070006,
910 	0x91b8, 0xffffffff, 0x0008000b,
911 	0x91bc, 0xffffffff, 0x000a0009,
912 	0x91c0, 0xffffffff, 0x000d000c,
913 	0x91c4, 0xffffffff, 0x00060005,
914 	0x91c8, 0xffffffff, 0x00080007,
915 	0x91cc, 0xffffffff, 0x0000000b,
916 	0x91d0, 0xffffffff, 0x000a0009,
917 	0x91d4, 0xffffffff, 0x000d000c,
918 	0x9150, 0xffffffff, 0x96940200,
919 	0x8708, 0xffffffff, 0x00900100,
920 	0xc478, 0xffffffff, 0x00000080,
921 	0xc404, 0xffffffff, 0x0020003f,
922 	0x30, 0xffffffff, 0x0000001c,
923 	0x34, 0x000f0000, 0x000f0000,
924 	0x160c, 0xffffffff, 0x00000100,
925 	0x1024, 0xffffffff, 0x00000100,
926 	0x102c, 0x00000101, 0x00000000,
927 	0x20a8, 0xffffffff, 0x00000104,
928 	0x264c, 0x000c0000, 0x000c0000,
929 	0x2648, 0x000c0000, 0x000c0000,
930 	0x55e4, 0xff000fff, 0x00000100,
931 	0x55e8, 0x00000001, 0x00000001,
932 	0x2f50, 0x00000001, 0x00000001,
933 	0x30cc, 0xc0000fff, 0x00000104,
934 	0xc1e4, 0x00000001, 0x00000001,
935 	0xd0c0, 0xfffffff0, 0x00000100,
936 	0xd8c0, 0xfffffff0, 0x00000100
937 };
938 
939 static const u32 hainan_mgcg_cgcg_init[] =
940 {
941 	0xc400, 0xffffffff, 0xfffffffc,
942 	0x802c, 0xffffffff, 0xe0000000,
943 	0x9a60, 0xffffffff, 0x00000100,
944 	0x92a4, 0xffffffff, 0x00000100,
945 	0xc164, 0xffffffff, 0x00000100,
946 	0x9774, 0xffffffff, 0x00000100,
947 	0x8984, 0xffffffff, 0x06000100,
948 	0x8a18, 0xffffffff, 0x00000100,
949 	0x92a0, 0xffffffff, 0x00000100,
950 	0xc380, 0xffffffff, 0x00000100,
951 	0x8b28, 0xffffffff, 0x00000100,
952 	0x9144, 0xffffffff, 0x00000100,
953 	0x8d88, 0xffffffff, 0x00000100,
954 	0x8d8c, 0xffffffff, 0x00000100,
955 	0x9030, 0xffffffff, 0x00000100,
956 	0x9034, 0xffffffff, 0x00000100,
957 	0x9038, 0xffffffff, 0x00000100,
958 	0x903c, 0xffffffff, 0x00000100,
959 	0xad80, 0xffffffff, 0x00000100,
960 	0xac54, 0xffffffff, 0x00000100,
961 	0x897c, 0xffffffff, 0x06000100,
962 	0x9868, 0xffffffff, 0x00000100,
963 	0x9510, 0xffffffff, 0x00000100,
964 	0xaf04, 0xffffffff, 0x00000100,
965 	0xae04, 0xffffffff, 0x00000100,
966 	0x949c, 0xffffffff, 0x00000100,
967 	0x802c, 0xffffffff, 0xe0000000,
968 	0x9160, 0xffffffff, 0x00010000,
969 	0x9164, 0xffffffff, 0x00030002,
970 	0x9168, 0xffffffff, 0x00040007,
971 	0x916c, 0xffffffff, 0x00060005,
972 	0x9170, 0xffffffff, 0x00090008,
973 	0x9174, 0xffffffff, 0x00020001,
974 	0x9178, 0xffffffff, 0x00040003,
975 	0x917c, 0xffffffff, 0x00000007,
976 	0x9180, 0xffffffff, 0x00060005,
977 	0x9184, 0xffffffff, 0x00090008,
978 	0x9188, 0xffffffff, 0x00030002,
979 	0x918c, 0xffffffff, 0x00050004,
980 	0x9190, 0xffffffff, 0x00000008,
981 	0x9194, 0xffffffff, 0x00070006,
982 	0x9198, 0xffffffff, 0x000a0009,
983 	0x919c, 0xffffffff, 0x00040003,
984 	0x91a0, 0xffffffff, 0x00060005,
985 	0x91a4, 0xffffffff, 0x00000009,
986 	0x91a8, 0xffffffff, 0x00080007,
987 	0x91ac, 0xffffffff, 0x000b000a,
988 	0x91b0, 0xffffffff, 0x00050004,
989 	0x91b4, 0xffffffff, 0x00070006,
990 	0x91b8, 0xffffffff, 0x0008000b,
991 	0x91bc, 0xffffffff, 0x000a0009,
992 	0x91c0, 0xffffffff, 0x000d000c,
993 	0x91c4, 0xffffffff, 0x00060005,
994 	0x91c8, 0xffffffff, 0x00080007,
995 	0x91cc, 0xffffffff, 0x0000000b,
996 	0x91d0, 0xffffffff, 0x000a0009,
997 	0x91d4, 0xffffffff, 0x000d000c,
998 	0x9150, 0xffffffff, 0x96940200,
999 	0x8708, 0xffffffff, 0x00900100,
1000 	0xc478, 0xffffffff, 0x00000080,
1001 	0xc404, 0xffffffff, 0x0020003f,
1002 	0x30, 0xffffffff, 0x0000001c,
1003 	0x34, 0x000f0000, 0x000f0000,
1004 	0x160c, 0xffffffff, 0x00000100,
1005 	0x1024, 0xffffffff, 0x00000100,
1006 	0x20a8, 0xffffffff, 0x00000104,
1007 	0x264c, 0x000c0000, 0x000c0000,
1008 	0x2648, 0x000c0000, 0x000c0000,
1009 	0x2f50, 0x00000001, 0x00000001,
1010 	0x30cc, 0xc0000fff, 0x00000104,
1011 	0xc1e4, 0x00000001, 0x00000001,
1012 	0xd0c0, 0xfffffff0, 0x00000100,
1013 	0xd8c0, 0xfffffff0, 0x00000100
1014 };
1015 
1016 static u32 verde_pg_init[] =
1017 {
1018 	0x353c, 0xffffffff, 0x40000,
1019 	0x3538, 0xffffffff, 0x200010ff,
1020 	0x353c, 0xffffffff, 0x0,
1021 	0x353c, 0xffffffff, 0x0,
1022 	0x353c, 0xffffffff, 0x0,
1023 	0x353c, 0xffffffff, 0x0,
1024 	0x353c, 0xffffffff, 0x0,
1025 	0x353c, 0xffffffff, 0x7007,
1026 	0x3538, 0xffffffff, 0x300010ff,
1027 	0x353c, 0xffffffff, 0x0,
1028 	0x353c, 0xffffffff, 0x0,
1029 	0x353c, 0xffffffff, 0x0,
1030 	0x353c, 0xffffffff, 0x0,
1031 	0x353c, 0xffffffff, 0x0,
1032 	0x353c, 0xffffffff, 0x400000,
1033 	0x3538, 0xffffffff, 0x100010ff,
1034 	0x353c, 0xffffffff, 0x0,
1035 	0x353c, 0xffffffff, 0x0,
1036 	0x353c, 0xffffffff, 0x0,
1037 	0x353c, 0xffffffff, 0x0,
1038 	0x353c, 0xffffffff, 0x0,
1039 	0x353c, 0xffffffff, 0x120200,
1040 	0x3538, 0xffffffff, 0x500010ff,
1041 	0x353c, 0xffffffff, 0x0,
1042 	0x353c, 0xffffffff, 0x0,
1043 	0x353c, 0xffffffff, 0x0,
1044 	0x353c, 0xffffffff, 0x0,
1045 	0x353c, 0xffffffff, 0x0,
1046 	0x353c, 0xffffffff, 0x1e1e16,
1047 	0x3538, 0xffffffff, 0x600010ff,
1048 	0x353c, 0xffffffff, 0x0,
1049 	0x353c, 0xffffffff, 0x0,
1050 	0x353c, 0xffffffff, 0x0,
1051 	0x353c, 0xffffffff, 0x0,
1052 	0x353c, 0xffffffff, 0x0,
1053 	0x353c, 0xffffffff, 0x171f1e,
1054 	0x3538, 0xffffffff, 0x700010ff,
1055 	0x353c, 0xffffffff, 0x0,
1056 	0x353c, 0xffffffff, 0x0,
1057 	0x353c, 0xffffffff, 0x0,
1058 	0x353c, 0xffffffff, 0x0,
1059 	0x353c, 0xffffffff, 0x0,
1060 	0x353c, 0xffffffff, 0x0,
1061 	0x3538, 0xffffffff, 0x9ff,
1062 	0x3500, 0xffffffff, 0x0,
1063 	0x3504, 0xffffffff, 0x10000800,
1064 	0x3504, 0xffffffff, 0xf,
1065 	0x3504, 0xffffffff, 0xf,
1066 	0x3500, 0xffffffff, 0x4,
1067 	0x3504, 0xffffffff, 0x1000051e,
1068 	0x3504, 0xffffffff, 0xffff,
1069 	0x3504, 0xffffffff, 0xffff,
1070 	0x3500, 0xffffffff, 0x8,
1071 	0x3504, 0xffffffff, 0x80500,
1072 	0x3500, 0xffffffff, 0x12,
1073 	0x3504, 0xffffffff, 0x9050c,
1074 	0x3500, 0xffffffff, 0x1d,
1075 	0x3504, 0xffffffff, 0xb052c,
1076 	0x3500, 0xffffffff, 0x2a,
1077 	0x3504, 0xffffffff, 0x1053e,
1078 	0x3500, 0xffffffff, 0x2d,
1079 	0x3504, 0xffffffff, 0x10546,
1080 	0x3500, 0xffffffff, 0x30,
1081 	0x3504, 0xffffffff, 0xa054e,
1082 	0x3500, 0xffffffff, 0x3c,
1083 	0x3504, 0xffffffff, 0x1055f,
1084 	0x3500, 0xffffffff, 0x3f,
1085 	0x3504, 0xffffffff, 0x10567,
1086 	0x3500, 0xffffffff, 0x42,
1087 	0x3504, 0xffffffff, 0x1056f,
1088 	0x3500, 0xffffffff, 0x45,
1089 	0x3504, 0xffffffff, 0x10572,
1090 	0x3500, 0xffffffff, 0x48,
1091 	0x3504, 0xffffffff, 0x20575,
1092 	0x3500, 0xffffffff, 0x4c,
1093 	0x3504, 0xffffffff, 0x190801,
1094 	0x3500, 0xffffffff, 0x67,
1095 	0x3504, 0xffffffff, 0x1082a,
1096 	0x3500, 0xffffffff, 0x6a,
1097 	0x3504, 0xffffffff, 0x1b082d,
1098 	0x3500, 0xffffffff, 0x87,
1099 	0x3504, 0xffffffff, 0x310851,
1100 	0x3500, 0xffffffff, 0xba,
1101 	0x3504, 0xffffffff, 0x891,
1102 	0x3500, 0xffffffff, 0xbc,
1103 	0x3504, 0xffffffff, 0x893,
1104 	0x3500, 0xffffffff, 0xbe,
1105 	0x3504, 0xffffffff, 0x20895,
1106 	0x3500, 0xffffffff, 0xc2,
1107 	0x3504, 0xffffffff, 0x20899,
1108 	0x3500, 0xffffffff, 0xc6,
1109 	0x3504, 0xffffffff, 0x2089d,
1110 	0x3500, 0xffffffff, 0xca,
1111 	0x3504, 0xffffffff, 0x8a1,
1112 	0x3500, 0xffffffff, 0xcc,
1113 	0x3504, 0xffffffff, 0x8a3,
1114 	0x3500, 0xffffffff, 0xce,
1115 	0x3504, 0xffffffff, 0x308a5,
1116 	0x3500, 0xffffffff, 0xd3,
1117 	0x3504, 0xffffffff, 0x6d08cd,
1118 	0x3500, 0xffffffff, 0x142,
1119 	0x3504, 0xffffffff, 0x2000095a,
1120 	0x3504, 0xffffffff, 0x1,
1121 	0x3500, 0xffffffff, 0x144,
1122 	0x3504, 0xffffffff, 0x301f095b,
1123 	0x3500, 0xffffffff, 0x165,
1124 	0x3504, 0xffffffff, 0xc094d,
1125 	0x3500, 0xffffffff, 0x173,
1126 	0x3504, 0xffffffff, 0xf096d,
1127 	0x3500, 0xffffffff, 0x184,
1128 	0x3504, 0xffffffff, 0x15097f,
1129 	0x3500, 0xffffffff, 0x19b,
1130 	0x3504, 0xffffffff, 0xc0998,
1131 	0x3500, 0xffffffff, 0x1a9,
1132 	0x3504, 0xffffffff, 0x409a7,
1133 	0x3500, 0xffffffff, 0x1af,
1134 	0x3504, 0xffffffff, 0xcdc,
1135 	0x3500, 0xffffffff, 0x1b1,
1136 	0x3504, 0xffffffff, 0x800,
1137 	0x3508, 0xffffffff, 0x6c9b2000,
1138 	0x3510, 0xfc00, 0x2000,
1139 	0x3544, 0xffffffff, 0xfc0,
1140 	0x28d4, 0x00000100, 0x100
1141 };
1142 
1143 static void si_init_golden_registers(struct radeon_device *rdev)
1144 {
1145 	switch (rdev->family) {
1146 	case CHIP_TAHITI:
1147 		radeon_program_register_sequence(rdev,
1148 						 tahiti_golden_registers,
1149 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1150 		radeon_program_register_sequence(rdev,
1151 						 tahiti_golden_rlc_registers,
1152 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1153 		radeon_program_register_sequence(rdev,
1154 						 tahiti_mgcg_cgcg_init,
1155 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1156 		radeon_program_register_sequence(rdev,
1157 						 tahiti_golden_registers2,
1158 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1159 		break;
1160 	case CHIP_PITCAIRN:
1161 		radeon_program_register_sequence(rdev,
1162 						 pitcairn_golden_registers,
1163 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1164 		radeon_program_register_sequence(rdev,
1165 						 pitcairn_golden_rlc_registers,
1166 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1167 		radeon_program_register_sequence(rdev,
1168 						 pitcairn_mgcg_cgcg_init,
1169 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1170 		break;
1171 	case CHIP_VERDE:
1172 		radeon_program_register_sequence(rdev,
1173 						 verde_golden_registers,
1174 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1175 		radeon_program_register_sequence(rdev,
1176 						 verde_golden_rlc_registers,
1177 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1178 		radeon_program_register_sequence(rdev,
1179 						 verde_mgcg_cgcg_init,
1180 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1181 		radeon_program_register_sequence(rdev,
1182 						 verde_pg_init,
1183 						 (const u32)ARRAY_SIZE(verde_pg_init));
1184 		break;
1185 	case CHIP_OLAND:
1186 		radeon_program_register_sequence(rdev,
1187 						 oland_golden_registers,
1188 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1189 		radeon_program_register_sequence(rdev,
1190 						 oland_golden_rlc_registers,
1191 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1192 		radeon_program_register_sequence(rdev,
1193 						 oland_mgcg_cgcg_init,
1194 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1195 		break;
1196 	case CHIP_HAINAN:
1197 		radeon_program_register_sequence(rdev,
1198 						 hainan_golden_registers,
1199 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1200 		radeon_program_register_sequence(rdev,
1201 						 hainan_golden_registers2,
1202 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1203 		radeon_program_register_sequence(rdev,
1204 						 hainan_mgcg_cgcg_init,
1205 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1206 		break;
1207 	default:
1208 		break;
1209 	}
1210 }
1211 
1212 #define PCIE_BUS_CLK                10000
1213 #define TCLK                        (PCIE_BUS_CLK / 10)
1214 
1215 /**
1216  * si_get_xclk - get the xclk
1217  *
1218  * @rdev: radeon_device pointer
1219  *
1220  * Returns the reference clock used by the gfx engine
1221  * (SI).
1222  */
1223 u32 si_get_xclk(struct radeon_device *rdev)
1224 {
1225         u32 reference_clock = rdev->clock.spll.reference_freq;
1226 	u32 tmp;
1227 
1228 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1229 	if (tmp & MUX_TCLK_TO_XCLK)
1230 		return TCLK;
1231 
1232 	tmp = RREG32(CG_CLKPIN_CNTL);
1233 	if (tmp & XTALIN_DIVIDE)
1234 		return reference_clock / 4;
1235 
1236 	return reference_clock;
1237 }
1238 
1239 /* get temperature in millidegrees */
1240 int si_get_temp(struct radeon_device *rdev)
1241 {
1242 	u32 temp;
1243 	int actual_temp = 0;
1244 
1245 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1246 		CTF_TEMP_SHIFT;
1247 
1248 	if (temp & 0x200)
1249 		actual_temp = 255;
1250 	else
1251 		actual_temp = temp & 0x1ff;
1252 
1253 	actual_temp = (actual_temp * 1000);
1254 
1255 	return actual_temp;
1256 }
1257 
1258 #define TAHITI_IO_MC_REGS_SIZE 36
1259 
1260 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1261 	{0x0000006f, 0x03044000},
1262 	{0x00000070, 0x0480c018},
1263 	{0x00000071, 0x00000040},
1264 	{0x00000072, 0x01000000},
1265 	{0x00000074, 0x000000ff},
1266 	{0x00000075, 0x00143400},
1267 	{0x00000076, 0x08ec0800},
1268 	{0x00000077, 0x040000cc},
1269 	{0x00000079, 0x00000000},
1270 	{0x0000007a, 0x21000409},
1271 	{0x0000007c, 0x00000000},
1272 	{0x0000007d, 0xe8000000},
1273 	{0x0000007e, 0x044408a8},
1274 	{0x0000007f, 0x00000003},
1275 	{0x00000080, 0x00000000},
1276 	{0x00000081, 0x01000000},
1277 	{0x00000082, 0x02000000},
1278 	{0x00000083, 0x00000000},
1279 	{0x00000084, 0xe3f3e4f4},
1280 	{0x00000085, 0x00052024},
1281 	{0x00000087, 0x00000000},
1282 	{0x00000088, 0x66036603},
1283 	{0x00000089, 0x01000000},
1284 	{0x0000008b, 0x1c0a0000},
1285 	{0x0000008c, 0xff010000},
1286 	{0x0000008e, 0xffffefff},
1287 	{0x0000008f, 0xfff3efff},
1288 	{0x00000090, 0xfff3efbf},
1289 	{0x00000094, 0x00101101},
1290 	{0x00000095, 0x00000fff},
1291 	{0x00000096, 0x00116fff},
1292 	{0x00000097, 0x60010000},
1293 	{0x00000098, 0x10010000},
1294 	{0x00000099, 0x00006000},
1295 	{0x0000009a, 0x00001000},
1296 	{0x0000009f, 0x00a77400}
1297 };
1298 
1299 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1300 	{0x0000006f, 0x03044000},
1301 	{0x00000070, 0x0480c018},
1302 	{0x00000071, 0x00000040},
1303 	{0x00000072, 0x01000000},
1304 	{0x00000074, 0x000000ff},
1305 	{0x00000075, 0x00143400},
1306 	{0x00000076, 0x08ec0800},
1307 	{0x00000077, 0x040000cc},
1308 	{0x00000079, 0x00000000},
1309 	{0x0000007a, 0x21000409},
1310 	{0x0000007c, 0x00000000},
1311 	{0x0000007d, 0xe8000000},
1312 	{0x0000007e, 0x044408a8},
1313 	{0x0000007f, 0x00000003},
1314 	{0x00000080, 0x00000000},
1315 	{0x00000081, 0x01000000},
1316 	{0x00000082, 0x02000000},
1317 	{0x00000083, 0x00000000},
1318 	{0x00000084, 0xe3f3e4f4},
1319 	{0x00000085, 0x00052024},
1320 	{0x00000087, 0x00000000},
1321 	{0x00000088, 0x66036603},
1322 	{0x00000089, 0x01000000},
1323 	{0x0000008b, 0x1c0a0000},
1324 	{0x0000008c, 0xff010000},
1325 	{0x0000008e, 0xffffefff},
1326 	{0x0000008f, 0xfff3efff},
1327 	{0x00000090, 0xfff3efbf},
1328 	{0x00000094, 0x00101101},
1329 	{0x00000095, 0x00000fff},
1330 	{0x00000096, 0x00116fff},
1331 	{0x00000097, 0x60010000},
1332 	{0x00000098, 0x10010000},
1333 	{0x00000099, 0x00006000},
1334 	{0x0000009a, 0x00001000},
1335 	{0x0000009f, 0x00a47400}
1336 };
1337 
1338 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1339 	{0x0000006f, 0x03044000},
1340 	{0x00000070, 0x0480c018},
1341 	{0x00000071, 0x00000040},
1342 	{0x00000072, 0x01000000},
1343 	{0x00000074, 0x000000ff},
1344 	{0x00000075, 0x00143400},
1345 	{0x00000076, 0x08ec0800},
1346 	{0x00000077, 0x040000cc},
1347 	{0x00000079, 0x00000000},
1348 	{0x0000007a, 0x21000409},
1349 	{0x0000007c, 0x00000000},
1350 	{0x0000007d, 0xe8000000},
1351 	{0x0000007e, 0x044408a8},
1352 	{0x0000007f, 0x00000003},
1353 	{0x00000080, 0x00000000},
1354 	{0x00000081, 0x01000000},
1355 	{0x00000082, 0x02000000},
1356 	{0x00000083, 0x00000000},
1357 	{0x00000084, 0xe3f3e4f4},
1358 	{0x00000085, 0x00052024},
1359 	{0x00000087, 0x00000000},
1360 	{0x00000088, 0x66036603},
1361 	{0x00000089, 0x01000000},
1362 	{0x0000008b, 0x1c0a0000},
1363 	{0x0000008c, 0xff010000},
1364 	{0x0000008e, 0xffffefff},
1365 	{0x0000008f, 0xfff3efff},
1366 	{0x00000090, 0xfff3efbf},
1367 	{0x00000094, 0x00101101},
1368 	{0x00000095, 0x00000fff},
1369 	{0x00000096, 0x00116fff},
1370 	{0x00000097, 0x60010000},
1371 	{0x00000098, 0x10010000},
1372 	{0x00000099, 0x00006000},
1373 	{0x0000009a, 0x00001000},
1374 	{0x0000009f, 0x00a37400}
1375 };
1376 
1377 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1378 	{0x0000006f, 0x03044000},
1379 	{0x00000070, 0x0480c018},
1380 	{0x00000071, 0x00000040},
1381 	{0x00000072, 0x01000000},
1382 	{0x00000074, 0x000000ff},
1383 	{0x00000075, 0x00143400},
1384 	{0x00000076, 0x08ec0800},
1385 	{0x00000077, 0x040000cc},
1386 	{0x00000079, 0x00000000},
1387 	{0x0000007a, 0x21000409},
1388 	{0x0000007c, 0x00000000},
1389 	{0x0000007d, 0xe8000000},
1390 	{0x0000007e, 0x044408a8},
1391 	{0x0000007f, 0x00000003},
1392 	{0x00000080, 0x00000000},
1393 	{0x00000081, 0x01000000},
1394 	{0x00000082, 0x02000000},
1395 	{0x00000083, 0x00000000},
1396 	{0x00000084, 0xe3f3e4f4},
1397 	{0x00000085, 0x00052024},
1398 	{0x00000087, 0x00000000},
1399 	{0x00000088, 0x66036603},
1400 	{0x00000089, 0x01000000},
1401 	{0x0000008b, 0x1c0a0000},
1402 	{0x0000008c, 0xff010000},
1403 	{0x0000008e, 0xffffefff},
1404 	{0x0000008f, 0xfff3efff},
1405 	{0x00000090, 0xfff3efbf},
1406 	{0x00000094, 0x00101101},
1407 	{0x00000095, 0x00000fff},
1408 	{0x00000096, 0x00116fff},
1409 	{0x00000097, 0x60010000},
1410 	{0x00000098, 0x10010000},
1411 	{0x00000099, 0x00006000},
1412 	{0x0000009a, 0x00001000},
1413 	{0x0000009f, 0x00a17730}
1414 };
1415 
1416 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1417 	{0x0000006f, 0x03044000},
1418 	{0x00000070, 0x0480c018},
1419 	{0x00000071, 0x00000040},
1420 	{0x00000072, 0x01000000},
1421 	{0x00000074, 0x000000ff},
1422 	{0x00000075, 0x00143400},
1423 	{0x00000076, 0x08ec0800},
1424 	{0x00000077, 0x040000cc},
1425 	{0x00000079, 0x00000000},
1426 	{0x0000007a, 0x21000409},
1427 	{0x0000007c, 0x00000000},
1428 	{0x0000007d, 0xe8000000},
1429 	{0x0000007e, 0x044408a8},
1430 	{0x0000007f, 0x00000003},
1431 	{0x00000080, 0x00000000},
1432 	{0x00000081, 0x01000000},
1433 	{0x00000082, 0x02000000},
1434 	{0x00000083, 0x00000000},
1435 	{0x00000084, 0xe3f3e4f4},
1436 	{0x00000085, 0x00052024},
1437 	{0x00000087, 0x00000000},
1438 	{0x00000088, 0x66036603},
1439 	{0x00000089, 0x01000000},
1440 	{0x0000008b, 0x1c0a0000},
1441 	{0x0000008c, 0xff010000},
1442 	{0x0000008e, 0xffffefff},
1443 	{0x0000008f, 0xfff3efff},
1444 	{0x00000090, 0xfff3efbf},
1445 	{0x00000094, 0x00101101},
1446 	{0x00000095, 0x00000fff},
1447 	{0x00000096, 0x00116fff},
1448 	{0x00000097, 0x60010000},
1449 	{0x00000098, 0x10010000},
1450 	{0x00000099, 0x00006000},
1451 	{0x0000009a, 0x00001000},
1452 	{0x0000009f, 0x00a07730}
1453 };
1454 
1455 /* ucode loading */
1456 static int si_mc_load_microcode(struct radeon_device *rdev)
1457 {
1458 	const __be32 *fw_data;
1459 	u32 running, blackout = 0;
1460 	u32 *io_mc_regs;
1461 	int i, ucode_size, regs_size;
1462 
1463 	if (!rdev->mc_fw)
1464 		return -EINVAL;
1465 
1466 	switch (rdev->family) {
1467 	case CHIP_TAHITI:
1468 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1469 		ucode_size = SI_MC_UCODE_SIZE;
1470 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1471 		break;
1472 	case CHIP_PITCAIRN:
1473 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1474 		ucode_size = SI_MC_UCODE_SIZE;
1475 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1476 		break;
1477 	case CHIP_VERDE:
1478 	default:
1479 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1480 		ucode_size = SI_MC_UCODE_SIZE;
1481 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1482 		break;
1483 	case CHIP_OLAND:
1484 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1485 		ucode_size = OLAND_MC_UCODE_SIZE;
1486 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1487 		break;
1488 	case CHIP_HAINAN:
1489 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1490 		ucode_size = OLAND_MC_UCODE_SIZE;
1491 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1492 		break;
1493 	}
1494 
1495 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1496 
1497 	if (running == 0) {
1498 		if (running) {
1499 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1500 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1501 		}
1502 
1503 		/* reset the engine and set to writable */
1504 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1505 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1506 
1507 		/* load mc io regs */
1508 		for (i = 0; i < regs_size; i++) {
1509 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1510 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1511 		}
1512 		/* load the MC ucode */
1513 		fw_data = (const __be32 *)rdev->mc_fw->data;
1514 		for (i = 0; i < ucode_size; i++)
1515 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1516 
1517 		/* put the engine back into the active state */
1518 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1519 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1520 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1521 
1522 		/* wait for training to complete */
1523 		for (i = 0; i < rdev->usec_timeout; i++) {
1524 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1525 				break;
1526 			udelay(1);
1527 		}
1528 		for (i = 0; i < rdev->usec_timeout; i++) {
1529 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1530 				break;
1531 			udelay(1);
1532 		}
1533 
1534 		if (running)
1535 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1536 	}
1537 
1538 	return 0;
1539 }
1540 
1541 static int si_init_microcode(struct radeon_device *rdev)
1542 {
1543 	const char *chip_name;
1544 	const char *rlc_chip_name;
1545 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1546 	size_t smc_req_size;
1547 	char fw_name[30];
1548 	int err;
1549 
1550 	DRM_DEBUG("\n");
1551 
1552 	switch (rdev->family) {
1553 	case CHIP_TAHITI:
1554 		chip_name = "TAHITI";
1555 		rlc_chip_name = "TAHITI";
1556 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1557 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1558 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1559 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1560 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1561 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1562 		break;
1563 	case CHIP_PITCAIRN:
1564 		chip_name = "PITCAIRN";
1565 		rlc_chip_name = "PITCAIRN";
1566 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1567 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1568 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1569 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1570 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1571 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1572 		break;
1573 	case CHIP_VERDE:
1574 		chip_name = "VERDE";
1575 		rlc_chip_name = "VERDE";
1576 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1577 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1578 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1579 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1580 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1581 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1582 		break;
1583 	case CHIP_OLAND:
1584 		chip_name = "OLAND";
1585 		rlc_chip_name = "OLAND";
1586 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1587 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1588 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1589 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1590 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1591 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1592 		break;
1593 	case CHIP_HAINAN:
1594 		chip_name = "HAINAN";
1595 		rlc_chip_name = "HAINAN";
1596 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1597 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1598 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1599 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1600 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1601 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1602 		break;
1603 	default: BUG();
1604 	}
1605 
1606 	DRM_INFO("Loading %s Microcode\n", chip_name);
1607 
1608 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
1609 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1610 	if (err)
1611 		goto out;
1612 	if (rdev->pfp_fw->datasize != pfp_req_size) {
1613 		printk(KERN_ERR
1614 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1615 		       rdev->pfp_fw->datasize, fw_name);
1616 		err = -EINVAL;
1617 		goto out;
1618 	}
1619 
1620 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
1621 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1622 	if (err)
1623 		goto out;
1624 	if (rdev->me_fw->datasize != me_req_size) {
1625 		printk(KERN_ERR
1626 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1627 		       rdev->me_fw->datasize, fw_name);
1628 		err = -EINVAL;
1629 	}
1630 
1631 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
1632 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1633 	if (err)
1634 		goto out;
1635 	if (rdev->ce_fw->datasize != ce_req_size) {
1636 		printk(KERN_ERR
1637 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1638 		       rdev->ce_fw->datasize, fw_name);
1639 		err = -EINVAL;
1640 	}
1641 
1642 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", rlc_chip_name);
1643 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1644 	if (err)
1645 		goto out;
1646 	if (rdev->rlc_fw->datasize != rlc_req_size) {
1647 		printk(KERN_ERR
1648 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1649 		       rdev->rlc_fw->datasize, fw_name);
1650 		err = -EINVAL;
1651 	}
1652 
1653 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
1654 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1655 	if (err)
1656 		goto out;
1657 	if (rdev->mc_fw->datasize != mc_req_size) {
1658 		printk(KERN_ERR
1659 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1660 		       rdev->mc_fw->datasize, fw_name);
1661 		err = -EINVAL;
1662 	}
1663 
1664 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
1665 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1666 	if (err) {
1667 		printk(KERN_ERR
1668 		       "smc: error loading firmware \"%s\"\n",
1669 		       fw_name);
1670 		release_firmware(rdev->smc_fw);
1671 		rdev->smc_fw = NULL;
1672 	} else if (rdev->smc_fw->datasize != smc_req_size) {
1673 		printk(KERN_ERR
1674 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1675 		       rdev->smc_fw->datasize, fw_name);
1676 		err = -EINVAL;
1677 	}
1678 
1679 out:
1680 	if (err) {
1681 		if (err != -EINVAL)
1682 			printk(KERN_ERR
1683 			       "si_cp: Failed to load firmware \"%s\"\n",
1684 			       fw_name);
1685 		release_firmware(rdev->pfp_fw);
1686 		rdev->pfp_fw = NULL;
1687 		release_firmware(rdev->me_fw);
1688 		rdev->me_fw = NULL;
1689 		release_firmware(rdev->ce_fw);
1690 		rdev->ce_fw = NULL;
1691 		release_firmware(rdev->rlc_fw);
1692 		rdev->rlc_fw = NULL;
1693 		release_firmware(rdev->mc_fw);
1694 		rdev->mc_fw = NULL;
1695 		release_firmware(rdev->smc_fw);
1696 		rdev->smc_fw = NULL;
1697 	}
1698 	return err;
1699 }
1700 
1701 /**
1702  * si_fini_microcode - drop the firmwares image references
1703  *
1704  * @rdev: radeon_device pointer
1705  *
1706  * Drop the pfp, me, rlc, mc and ce firmware image references.
1707  * Called at driver shutdown.
1708  */
1709 static void si_fini_microcode(struct radeon_device *rdev)
1710 {
1711 	release_firmware(rdev->pfp_fw);
1712 	rdev->pfp_fw = NULL;
1713 	release_firmware(rdev->me_fw);
1714 	rdev->me_fw = NULL;
1715 	release_firmware(rdev->rlc_fw);
1716 	rdev->rlc_fw = NULL;
1717 	release_firmware(rdev->mc_fw);
1718 	rdev->mc_fw = NULL;
1719 	release_firmware(rdev->smc_fw);
1720 	rdev->smc_fw = NULL;
1721 	release_firmware(rdev->ce_fw);
1722 	rdev->ce_fw = NULL;
1723 }
1724 
1725 /* watermark setup */
1726 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1727 				   struct radeon_crtc *radeon_crtc,
1728 				   struct drm_display_mode *mode,
1729 				   struct drm_display_mode *other_mode)
1730 {
1731 	u32 tmp;
1732 	/*
1733 	 * Line Buffer Setup
1734 	 * There are 3 line buffers, each one shared by 2 display controllers.
1735 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1736 	 * the display controllers.  The paritioning is done via one of four
1737 	 * preset allocations specified in bits 21:20:
1738 	 *  0 - half lb
1739 	 *  2 - whole lb, other crtc must be disabled
1740 	 */
1741 	/* this can get tricky if we have two large displays on a paired group
1742 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1743 	 * non-linked crtcs for maximum line buffer allocation.
1744 	 */
1745 	if (radeon_crtc->base.enabled && mode) {
1746 		if (other_mode)
1747 			tmp = 0; /* 1/2 */
1748 		else
1749 			tmp = 2; /* whole */
1750 	} else
1751 		tmp = 0;
1752 
1753 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1754 	       DC_LB_MEMORY_CONFIG(tmp));
1755 
1756 	if (radeon_crtc->base.enabled && mode) {
1757 		switch (tmp) {
1758 		case 0:
1759 		default:
1760 			return 4096 * 2;
1761 		case 2:
1762 			return 8192 * 2;
1763 		}
1764 	}
1765 
1766 	/* controller not enabled, so no lb used */
1767 	return 0;
1768 }
1769 
1770 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1771 {
1772 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1773 
1774 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1775 	case 0:
1776 	default:
1777 		return 1;
1778 	case 1:
1779 		return 2;
1780 	case 2:
1781 		return 4;
1782 	case 3:
1783 		return 8;
1784 	case 4:
1785 		return 3;
1786 	case 5:
1787 		return 6;
1788 	case 6:
1789 		return 10;
1790 	case 7:
1791 		return 12;
1792 	case 8:
1793 		return 16;
1794 	}
1795 }
1796 
1797 struct dce6_wm_params {
1798 	u32 dram_channels; /* number of dram channels */
1799 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1800 	u32 sclk;          /* engine clock in kHz */
1801 	u32 disp_clk;      /* display clock in kHz */
1802 	u32 src_width;     /* viewport width */
1803 	u32 active_time;   /* active display time in ns */
1804 	u32 blank_time;    /* blank time in ns */
1805 	bool interlaced;    /* mode is interlaced */
1806 	fixed20_12 vsc;    /* vertical scale ratio */
1807 	u32 num_heads;     /* number of active crtcs */
1808 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1809 	u32 lb_size;       /* line buffer allocated to pipe */
1810 	u32 vtaps;         /* vertical scaler taps */
1811 };
1812 
1813 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1814 {
1815 	/* Calculate raw DRAM Bandwidth */
1816 	fixed20_12 dram_efficiency; /* 0.7 */
1817 	fixed20_12 yclk, dram_channels, bandwidth;
1818 	fixed20_12 a;
1819 
1820 	a.full = dfixed_const(1000);
1821 	yclk.full = dfixed_const(wm->yclk);
1822 	yclk.full = dfixed_div(yclk, a);
1823 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1824 	a.full = dfixed_const(10);
1825 	dram_efficiency.full = dfixed_const(7);
1826 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1827 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1828 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1829 
1830 	return dfixed_trunc(bandwidth);
1831 }
1832 
1833 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1834 {
1835 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1836 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1837 	fixed20_12 yclk, dram_channels, bandwidth;
1838 	fixed20_12 a;
1839 
1840 	a.full = dfixed_const(1000);
1841 	yclk.full = dfixed_const(wm->yclk);
1842 	yclk.full = dfixed_div(yclk, a);
1843 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1844 	a.full = dfixed_const(10);
1845 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1846 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1847 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1848 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1849 
1850 	return dfixed_trunc(bandwidth);
1851 }
1852 
1853 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1854 {
1855 	/* Calculate the display Data return Bandwidth */
1856 	fixed20_12 return_efficiency; /* 0.8 */
1857 	fixed20_12 sclk, bandwidth;
1858 	fixed20_12 a;
1859 
1860 	a.full = dfixed_const(1000);
1861 	sclk.full = dfixed_const(wm->sclk);
1862 	sclk.full = dfixed_div(sclk, a);
1863 	a.full = dfixed_const(10);
1864 	return_efficiency.full = dfixed_const(8);
1865 	return_efficiency.full = dfixed_div(return_efficiency, a);
1866 	a.full = dfixed_const(32);
1867 	bandwidth.full = dfixed_mul(a, sclk);
1868 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1869 
1870 	return dfixed_trunc(bandwidth);
1871 }
1872 
1873 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1874 {
1875 	return 32;
1876 }
1877 
1878 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1879 {
1880 	/* Calculate the DMIF Request Bandwidth */
1881 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1882 	fixed20_12 disp_clk, sclk, bandwidth;
1883 	fixed20_12 a, b1, b2;
1884 	u32 min_bandwidth;
1885 
1886 	a.full = dfixed_const(1000);
1887 	disp_clk.full = dfixed_const(wm->disp_clk);
1888 	disp_clk.full = dfixed_div(disp_clk, a);
1889 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1890 	b1.full = dfixed_mul(a, disp_clk);
1891 
1892 	a.full = dfixed_const(1000);
1893 	sclk.full = dfixed_const(wm->sclk);
1894 	sclk.full = dfixed_div(sclk, a);
1895 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1896 	b2.full = dfixed_mul(a, sclk);
1897 
1898 	a.full = dfixed_const(10);
1899 	disp_clk_request_efficiency.full = dfixed_const(8);
1900 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1901 
1902 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1903 
1904 	a.full = dfixed_const(min_bandwidth);
1905 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1906 
1907 	return dfixed_trunc(bandwidth);
1908 }
1909 
1910 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1911 {
1912 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1913 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1914 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1915 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1916 
1917 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1918 }
1919 
1920 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1921 {
1922 	/* Calculate the display mode Average Bandwidth
1923 	 * DisplayMode should contain the source and destination dimensions,
1924 	 * timing, etc.
1925 	 */
1926 	fixed20_12 bpp;
1927 	fixed20_12 line_time;
1928 	fixed20_12 src_width;
1929 	fixed20_12 bandwidth;
1930 	fixed20_12 a;
1931 
1932 	a.full = dfixed_const(1000);
1933 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1934 	line_time.full = dfixed_div(line_time, a);
1935 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1936 	src_width.full = dfixed_const(wm->src_width);
1937 	bandwidth.full = dfixed_mul(src_width, bpp);
1938 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1939 	bandwidth.full = dfixed_div(bandwidth, line_time);
1940 
1941 	return dfixed_trunc(bandwidth);
1942 }
1943 
1944 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1945 {
1946 	/* First calcualte the latency in ns */
1947 	u32 mc_latency = 2000; /* 2000 ns. */
1948 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1949 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1950 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1951 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1952 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1953 		(wm->num_heads * cursor_line_pair_return_time);
1954 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1955 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1956 	u32 tmp, dmif_size = 12288;
1957 	fixed20_12 a, b, c;
1958 
1959 	if (wm->num_heads == 0)
1960 		return 0;
1961 
1962 	a.full = dfixed_const(2);
1963 	b.full = dfixed_const(1);
1964 	if ((wm->vsc.full > a.full) ||
1965 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1966 	    (wm->vtaps >= 5) ||
1967 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1968 		max_src_lines_per_dst_line = 4;
1969 	else
1970 		max_src_lines_per_dst_line = 2;
1971 
1972 	a.full = dfixed_const(available_bandwidth);
1973 	b.full = dfixed_const(wm->num_heads);
1974 	a.full = dfixed_div(a, b);
1975 
1976 	b.full = dfixed_const(mc_latency + 512);
1977 	c.full = dfixed_const(wm->disp_clk);
1978 	b.full = dfixed_div(b, c);
1979 
1980 	c.full = dfixed_const(dmif_size);
1981 	b.full = dfixed_div(c, b);
1982 
1983 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1984 
1985 	b.full = dfixed_const(1000);
1986 	c.full = dfixed_const(wm->disp_clk);
1987 	b.full = dfixed_div(c, b);
1988 	c.full = dfixed_const(wm->bytes_per_pixel);
1989 	b.full = dfixed_mul(b, c);
1990 
1991 	lb_fill_bw = min(tmp, dfixed_trunc(b));
1992 
1993 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1994 	b.full = dfixed_const(1000);
1995 	c.full = dfixed_const(lb_fill_bw);
1996 	b.full = dfixed_div(c, b);
1997 	a.full = dfixed_div(a, b);
1998 	line_fill_time = dfixed_trunc(a);
1999 
2000 	if (line_fill_time < wm->active_time)
2001 		return latency;
2002 	else
2003 		return latency + (line_fill_time - wm->active_time);
2004 
2005 }
2006 
2007 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2008 {
2009 	if (dce6_average_bandwidth(wm) <=
2010 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2011 		return true;
2012 	else
2013 		return false;
2014 };
2015 
2016 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2017 {
2018 	if (dce6_average_bandwidth(wm) <=
2019 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2020 		return true;
2021 	else
2022 		return false;
2023 };
2024 
2025 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2026 {
2027 	u32 lb_partitions = wm->lb_size / wm->src_width;
2028 	u32 line_time = wm->active_time + wm->blank_time;
2029 	u32 latency_tolerant_lines;
2030 	u32 latency_hiding;
2031 	fixed20_12 a;
2032 
2033 	a.full = dfixed_const(1);
2034 	if (wm->vsc.full > a.full)
2035 		latency_tolerant_lines = 1;
2036 	else {
2037 		if (lb_partitions <= (wm->vtaps + 1))
2038 			latency_tolerant_lines = 1;
2039 		else
2040 			latency_tolerant_lines = 2;
2041 	}
2042 
2043 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2044 
2045 	if (dce6_latency_watermark(wm) <= latency_hiding)
2046 		return true;
2047 	else
2048 		return false;
2049 }
2050 
2051 static void dce6_program_watermarks(struct radeon_device *rdev,
2052 					 struct radeon_crtc *radeon_crtc,
2053 					 u32 lb_size, u32 num_heads)
2054 {
2055 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2056 	struct dce6_wm_params wm_low, wm_high;
2057 	u32 dram_channels;
2058 	u32 pixel_period;
2059 	u32 line_time = 0;
2060 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2061 	u32 priority_a_mark = 0, priority_b_mark = 0;
2062 	u32 priority_a_cnt = PRIORITY_OFF;
2063 	u32 priority_b_cnt = PRIORITY_OFF;
2064 	u32 tmp, arb_control3;
2065 	fixed20_12 a, b, c;
2066 
2067 	if (radeon_crtc->base.enabled && num_heads && mode) {
2068 		pixel_period = 1000000 / (u32)mode->clock;
2069 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2070 		priority_a_cnt = 0;
2071 		priority_b_cnt = 0;
2072 
2073 		if (rdev->family == CHIP_ARUBA)
2074 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2075 		else
2076 			dram_channels = si_get_number_of_dram_channels(rdev);
2077 
2078 		/* watermark for high clocks */
2079 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2080 			wm_high.yclk =
2081 				radeon_dpm_get_mclk(rdev, false) * 10;
2082 			wm_high.sclk =
2083 				radeon_dpm_get_sclk(rdev, false) * 10;
2084 		} else {
2085 			wm_high.yclk = rdev->pm.current_mclk * 10;
2086 			wm_high.sclk = rdev->pm.current_sclk * 10;
2087 		}
2088 
2089 		wm_high.disp_clk = mode->clock;
2090 		wm_high.src_width = mode->crtc_hdisplay;
2091 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2092 		wm_high.blank_time = line_time - wm_high.active_time;
2093 		wm_high.interlaced = false;
2094 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2095 			wm_high.interlaced = true;
2096 		wm_high.vsc = radeon_crtc->vsc;
2097 		wm_high.vtaps = 1;
2098 		if (radeon_crtc->rmx_type != RMX_OFF)
2099 			wm_high.vtaps = 2;
2100 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2101 		wm_high.lb_size = lb_size;
2102 		wm_high.dram_channels = dram_channels;
2103 		wm_high.num_heads = num_heads;
2104 
2105 		/* watermark for low clocks */
2106 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2107 			wm_low.yclk =
2108 				radeon_dpm_get_mclk(rdev, true) * 10;
2109 			wm_low.sclk =
2110 				radeon_dpm_get_sclk(rdev, true) * 10;
2111 		} else {
2112 			wm_low.yclk = rdev->pm.current_mclk * 10;
2113 			wm_low.sclk = rdev->pm.current_sclk * 10;
2114 		}
2115 
2116 		wm_low.disp_clk = mode->clock;
2117 		wm_low.src_width = mode->crtc_hdisplay;
2118 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2119 		wm_low.blank_time = line_time - wm_low.active_time;
2120 		wm_low.interlaced = false;
2121 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2122 			wm_low.interlaced = true;
2123 		wm_low.vsc = radeon_crtc->vsc;
2124 		wm_low.vtaps = 1;
2125 		if (radeon_crtc->rmx_type != RMX_OFF)
2126 			wm_low.vtaps = 2;
2127 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2128 		wm_low.lb_size = lb_size;
2129 		wm_low.dram_channels = dram_channels;
2130 		wm_low.num_heads = num_heads;
2131 
2132 		/* set for high clocks */
2133 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2134 		/* set for low clocks */
2135 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2136 
2137 		/* possibly force display priority to high */
2138 		/* should really do this at mode validation time... */
2139 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2140 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2141 		    !dce6_check_latency_hiding(&wm_high) ||
2142 		    (rdev->disp_priority == 2)) {
2143 			DRM_DEBUG_KMS("force priority to high\n");
2144 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2145 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2146 		}
2147 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2148 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2149 		    !dce6_check_latency_hiding(&wm_low) ||
2150 		    (rdev->disp_priority == 2)) {
2151 			DRM_DEBUG_KMS("force priority to high\n");
2152 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2153 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2154 		}
2155 
2156 		a.full = dfixed_const(1000);
2157 		b.full = dfixed_const(mode->clock);
2158 		b.full = dfixed_div(b, a);
2159 		c.full = dfixed_const(latency_watermark_a);
2160 		c.full = dfixed_mul(c, b);
2161 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2162 		c.full = dfixed_div(c, a);
2163 		a.full = dfixed_const(16);
2164 		c.full = dfixed_div(c, a);
2165 		priority_a_mark = dfixed_trunc(c);
2166 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2167 
2168 		a.full = dfixed_const(1000);
2169 		b.full = dfixed_const(mode->clock);
2170 		b.full = dfixed_div(b, a);
2171 		c.full = dfixed_const(latency_watermark_b);
2172 		c.full = dfixed_mul(c, b);
2173 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2174 		c.full = dfixed_div(c, a);
2175 		a.full = dfixed_const(16);
2176 		c.full = dfixed_div(c, a);
2177 		priority_b_mark = dfixed_trunc(c);
2178 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2179 	}
2180 
2181 	/* select wm A */
2182 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2183 	tmp = arb_control3;
2184 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2185 	tmp |= LATENCY_WATERMARK_MASK(1);
2186 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2187 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2188 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2189 		LATENCY_HIGH_WATERMARK(line_time)));
2190 	/* select wm B */
2191 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2192 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2193 	tmp |= LATENCY_WATERMARK_MASK(2);
2194 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2195 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2196 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2197 		LATENCY_HIGH_WATERMARK(line_time)));
2198 	/* restore original selection */
2199 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2200 
2201 	/* write the priority marks */
2202 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2203 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2204 
2205 	/* save values for DPM */
2206 	radeon_crtc->line_time = line_time;
2207 	radeon_crtc->wm_high = latency_watermark_a;
2208 	radeon_crtc->wm_low = latency_watermark_b;
2209 }
2210 
2211 void dce6_bandwidth_update(struct radeon_device *rdev)
2212 {
2213 	struct drm_display_mode *mode0 = NULL;
2214 	struct drm_display_mode *mode1 = NULL;
2215 	u32 num_heads = 0, lb_size;
2216 	int i;
2217 
2218 	radeon_update_display_priority(rdev);
2219 
2220 	for (i = 0; i < rdev->num_crtc; i++) {
2221 		if (rdev->mode_info.crtcs[i]->base.enabled)
2222 			num_heads++;
2223 	}
2224 	for (i = 0; i < rdev->num_crtc; i += 2) {
2225 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2226 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2227 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2228 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2229 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2230 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2231 	}
2232 }
2233 
2234 /*
2235  * Core functions
2236  */
2237 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2238 {
2239 	const u32 num_tile_mode_states = 32;
2240 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2241 
2242 	switch (rdev->config.si.mem_row_size_in_kb) {
2243 	case 1:
2244 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2245 		break;
2246 	case 2:
2247 	default:
2248 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2249 		break;
2250 	case 4:
2251 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2252 		break;
2253 	}
2254 
2255 	if ((rdev->family == CHIP_TAHITI) ||
2256 	    (rdev->family == CHIP_PITCAIRN)) {
2257 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2258 			switch (reg_offset) {
2259 			case 0:  /* non-AA compressed depth or any compressed stencil */
2260 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2262 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2263 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2264 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2265 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2267 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2268 				break;
2269 			case 1:  /* 2xAA/4xAA compressed depth only */
2270 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2271 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2272 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2273 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2274 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2275 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2277 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2278 				break;
2279 			case 2:  /* 8xAA compressed depth only */
2280 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2281 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2282 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2283 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2284 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2285 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2288 				break;
2289 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2290 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2292 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2293 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2294 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2295 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2296 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2297 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2298 				break;
2299 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2300 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2301 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2302 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2304 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2305 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2306 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2307 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2308 				break;
2309 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2310 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2311 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2312 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2313 						 TILE_SPLIT(split_equal_to_row_size) |
2314 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2315 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2316 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2317 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2318 				break;
2319 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2320 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2322 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2323 						 TILE_SPLIT(split_equal_to_row_size) |
2324 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2325 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2326 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2327 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2328 				break;
2329 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2330 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2332 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2333 						 TILE_SPLIT(split_equal_to_row_size) |
2334 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2335 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2337 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2338 				break;
2339 			case 8:  /* 1D and 1D Array Surfaces */
2340 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2341 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2342 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2343 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2344 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2345 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2346 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2347 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2348 				break;
2349 			case 9:  /* Displayable maps. */
2350 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2351 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2352 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2353 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2354 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2355 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2357 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2358 				break;
2359 			case 10:  /* Display 8bpp. */
2360 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2361 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2362 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2363 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2364 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2365 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2366 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2367 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2368 				break;
2369 			case 11:  /* Display 16bpp. */
2370 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2373 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2374 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2375 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2378 				break;
2379 			case 12:  /* Display 32bpp. */
2380 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2382 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2383 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2384 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2385 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2387 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2388 				break;
2389 			case 13:  /* Thin. */
2390 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2392 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2393 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2394 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2395 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2396 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2397 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2398 				break;
2399 			case 14:  /* Thin 8 bpp. */
2400 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2402 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2403 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2404 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2405 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2406 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2407 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2408 				break;
2409 			case 15:  /* Thin 16 bpp. */
2410 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2411 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2412 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2413 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2414 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2415 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2417 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2418 				break;
2419 			case 16:  /* Thin 32 bpp. */
2420 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2422 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2423 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2424 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2425 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2427 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2428 				break;
2429 			case 17:  /* Thin 64 bpp. */
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2433 						 TILE_SPLIT(split_equal_to_row_size) |
2434 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2435 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2436 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2437 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2438 				break;
2439 			case 21:  /* 8 bpp PRT. */
2440 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2441 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2442 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2443 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2444 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2445 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2446 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2448 				break;
2449 			case 22:  /* 16 bpp PRT */
2450 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2452 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2453 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2454 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2455 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2457 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2458 				break;
2459 			case 23:  /* 32 bpp PRT */
2460 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2461 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2462 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2463 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2464 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2465 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2468 				break;
2469 			case 24:  /* 64 bpp PRT */
2470 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2471 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2472 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2473 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2474 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2475 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2477 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2478 				break;
2479 			case 25:  /* 128 bpp PRT */
2480 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2482 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2483 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2484 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2485 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2488 				break;
2489 			default:
2490 				gb_tile_moden = 0;
2491 				break;
2492 			}
2493 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2494 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2495 		}
2496 	} else if ((rdev->family == CHIP_VERDE) ||
2497 		   (rdev->family == CHIP_OLAND) ||
2498 		   (rdev->family == CHIP_HAINAN)) {
2499 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2500 			switch (reg_offset) {
2501 			case 0:  /* non-AA compressed depth or any compressed stencil */
2502 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2503 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2504 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2505 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2506 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2507 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2510 				break;
2511 			case 1:  /* 2xAA/4xAA compressed depth only */
2512 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2514 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2515 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2516 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2517 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2519 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2520 				break;
2521 			case 2:  /* 8xAA compressed depth only */
2522 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2523 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2524 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2525 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2526 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2527 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2529 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2530 				break;
2531 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2532 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2534 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2535 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2536 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2537 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2540 				break;
2541 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2542 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2543 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2544 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2546 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2547 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2550 				break;
2551 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2552 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2554 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2555 						 TILE_SPLIT(split_equal_to_row_size) |
2556 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2557 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2559 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2560 				break;
2561 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2562 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2564 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2565 						 TILE_SPLIT(split_equal_to_row_size) |
2566 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2567 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2569 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2570 				break;
2571 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2572 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2573 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2574 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2575 						 TILE_SPLIT(split_equal_to_row_size) |
2576 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2577 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2578 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2579 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2580 				break;
2581 			case 8:  /* 1D and 1D Array Surfaces */
2582 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2583 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2584 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2585 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2586 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2587 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2589 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2590 				break;
2591 			case 9:  /* Displayable maps. */
2592 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2593 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2594 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2595 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2596 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2597 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2598 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2599 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2600 				break;
2601 			case 10:  /* Display 8bpp. */
2602 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2603 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2604 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2605 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2606 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2607 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2610 				break;
2611 			case 11:  /* Display 16bpp. */
2612 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2614 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2615 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2616 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2617 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2618 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2619 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2620 				break;
2621 			case 12:  /* Display 32bpp. */
2622 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2623 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2624 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2625 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2626 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2627 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2630 				break;
2631 			case 13:  /* Thin. */
2632 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2633 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2634 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2635 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2636 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2637 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2639 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2640 				break;
2641 			case 14:  /* Thin 8 bpp. */
2642 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2644 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2645 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2646 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2647 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2650 				break;
2651 			case 15:  /* Thin 16 bpp. */
2652 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2654 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2656 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2657 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2659 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2660 				break;
2661 			case 16:  /* Thin 32 bpp. */
2662 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2663 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2664 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2666 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2667 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2670 				break;
2671 			case 17:  /* Thin 64 bpp. */
2672 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2674 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2675 						 TILE_SPLIT(split_equal_to_row_size) |
2676 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2677 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2678 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2679 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2680 				break;
2681 			case 21:  /* 8 bpp PRT. */
2682 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2683 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2684 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2686 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2687 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2688 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2689 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2690 				break;
2691 			case 22:  /* 16 bpp PRT */
2692 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2694 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2695 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2696 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2697 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2699 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2700 				break;
2701 			case 23:  /* 32 bpp PRT */
2702 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2704 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2705 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2706 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2707 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2708 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2709 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2710 				break;
2711 			case 24:  /* 64 bpp PRT */
2712 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2713 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2714 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2715 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2716 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2717 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2719 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2720 				break;
2721 			case 25:  /* 128 bpp PRT */
2722 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2724 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2725 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2726 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2727 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2729 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2730 				break;
2731 			default:
2732 				gb_tile_moden = 0;
2733 				break;
2734 			}
2735 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2736 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2737 		}
2738 	} else
2739 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2740 }
2741 
2742 static void si_select_se_sh(struct radeon_device *rdev,
2743 			    u32 se_num, u32 sh_num)
2744 {
2745 	u32 data = INSTANCE_BROADCAST_WRITES;
2746 
2747 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2748 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2749 	else if (se_num == 0xffffffff)
2750 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2751 	else if (sh_num == 0xffffffff)
2752 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2753 	else
2754 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2755 	WREG32(GRBM_GFX_INDEX, data);
2756 }
2757 
2758 static u32 si_create_bitmask(u32 bit_width)
2759 {
2760 	u32 i, mask = 0;
2761 
2762 	for (i = 0; i < bit_width; i++) {
2763 		mask <<= 1;
2764 		mask |= 1;
2765 	}
2766 	return mask;
2767 }
2768 
2769 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2770 {
2771 	u32 data, mask;
2772 
2773 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2774 	if (data & 1)
2775 		data &= INACTIVE_CUS_MASK;
2776 	else
2777 		data = 0;
2778 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2779 
2780 	data >>= INACTIVE_CUS_SHIFT;
2781 
2782 	mask = si_create_bitmask(cu_per_sh);
2783 
2784 	return ~data & mask;
2785 }
2786 
2787 static void si_setup_spi(struct radeon_device *rdev,
2788 			 u32 se_num, u32 sh_per_se,
2789 			 u32 cu_per_sh)
2790 {
2791 	int i, j, k;
2792 	u32 data, mask, active_cu;
2793 
2794 	for (i = 0; i < se_num; i++) {
2795 		for (j = 0; j < sh_per_se; j++) {
2796 			si_select_se_sh(rdev, i, j);
2797 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2798 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2799 
2800 			mask = 1;
2801 			for (k = 0; k < 16; k++) {
2802 				mask <<= k;
2803 				if (active_cu & mask) {
2804 					data &= ~mask;
2805 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2806 					break;
2807 				}
2808 			}
2809 		}
2810 	}
2811 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2812 }
2813 
2814 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2815 			      u32 max_rb_num, u32 se_num,
2816 			      u32 sh_per_se)
2817 {
2818 	u32 data, mask;
2819 
2820 	data = RREG32(CC_RB_BACKEND_DISABLE);
2821 	if (data & 1)
2822 		data &= BACKEND_DISABLE_MASK;
2823 	else
2824 		data = 0;
2825 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2826 
2827 	data >>= BACKEND_DISABLE_SHIFT;
2828 
2829 	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2830 
2831 	return data & mask;
2832 }
2833 
2834 static void si_setup_rb(struct radeon_device *rdev,
2835 			u32 se_num, u32 sh_per_se,
2836 			u32 max_rb_num)
2837 {
2838 	int i, j;
2839 	u32 data, mask;
2840 	u32 disabled_rbs = 0;
2841 	u32 enabled_rbs = 0;
2842 
2843 	for (i = 0; i < se_num; i++) {
2844 		for (j = 0; j < sh_per_se; j++) {
2845 			si_select_se_sh(rdev, i, j);
2846 			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2847 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2848 		}
2849 	}
2850 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2851 
2852 	mask = 1;
2853 	for (i = 0; i < max_rb_num; i++) {
2854 		if (!(disabled_rbs & mask))
2855 			enabled_rbs |= mask;
2856 		mask <<= 1;
2857 	}
2858 
2859 	for (i = 0; i < se_num; i++) {
2860 		si_select_se_sh(rdev, i, 0xffffffff);
2861 		data = 0;
2862 		for (j = 0; j < sh_per_se; j++) {
2863 			switch (enabled_rbs & 3) {
2864 			case 1:
2865 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2866 				break;
2867 			case 2:
2868 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2869 				break;
2870 			case 3:
2871 			default:
2872 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2873 				break;
2874 			}
2875 			enabled_rbs >>= 2;
2876 		}
2877 		WREG32(PA_SC_RASTER_CONFIG, data);
2878 	}
2879 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2880 }
2881 
2882 static void si_gpu_init(struct radeon_device *rdev)
2883 {
2884 	u32 gb_addr_config = 0;
2885 	u32 mc_shared_chmap, mc_arb_ramcfg;
2886 	u32 sx_debug_1;
2887 	u32 hdp_host_path_cntl;
2888 	u32 tmp;
2889 	int i, j;
2890 
2891 	switch (rdev->family) {
2892 	case CHIP_TAHITI:
2893 		rdev->config.si.max_shader_engines = 2;
2894 		rdev->config.si.max_tile_pipes = 12;
2895 		rdev->config.si.max_cu_per_sh = 8;
2896 		rdev->config.si.max_sh_per_se = 2;
2897 		rdev->config.si.max_backends_per_se = 4;
2898 		rdev->config.si.max_texture_channel_caches = 12;
2899 		rdev->config.si.max_gprs = 256;
2900 		rdev->config.si.max_gs_threads = 32;
2901 		rdev->config.si.max_hw_contexts = 8;
2902 
2903 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2904 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2905 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2906 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2907 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2908 		break;
2909 	case CHIP_PITCAIRN:
2910 		rdev->config.si.max_shader_engines = 2;
2911 		rdev->config.si.max_tile_pipes = 8;
2912 		rdev->config.si.max_cu_per_sh = 5;
2913 		rdev->config.si.max_sh_per_se = 2;
2914 		rdev->config.si.max_backends_per_se = 4;
2915 		rdev->config.si.max_texture_channel_caches = 8;
2916 		rdev->config.si.max_gprs = 256;
2917 		rdev->config.si.max_gs_threads = 32;
2918 		rdev->config.si.max_hw_contexts = 8;
2919 
2920 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2921 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2922 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2923 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2924 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2925 		break;
2926 	case CHIP_VERDE:
2927 	default:
2928 		rdev->config.si.max_shader_engines = 1;
2929 		rdev->config.si.max_tile_pipes = 4;
2930 		rdev->config.si.max_cu_per_sh = 5;
2931 		rdev->config.si.max_sh_per_se = 2;
2932 		rdev->config.si.max_backends_per_se = 4;
2933 		rdev->config.si.max_texture_channel_caches = 4;
2934 		rdev->config.si.max_gprs = 256;
2935 		rdev->config.si.max_gs_threads = 32;
2936 		rdev->config.si.max_hw_contexts = 8;
2937 
2938 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2939 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2940 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2941 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2942 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2943 		break;
2944 	case CHIP_OLAND:
2945 		rdev->config.si.max_shader_engines = 1;
2946 		rdev->config.si.max_tile_pipes = 4;
2947 		rdev->config.si.max_cu_per_sh = 6;
2948 		rdev->config.si.max_sh_per_se = 1;
2949 		rdev->config.si.max_backends_per_se = 2;
2950 		rdev->config.si.max_texture_channel_caches = 4;
2951 		rdev->config.si.max_gprs = 256;
2952 		rdev->config.si.max_gs_threads = 16;
2953 		rdev->config.si.max_hw_contexts = 8;
2954 
2955 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2956 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2957 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2958 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2959 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2960 		break;
2961 	case CHIP_HAINAN:
2962 		rdev->config.si.max_shader_engines = 1;
2963 		rdev->config.si.max_tile_pipes = 4;
2964 		rdev->config.si.max_cu_per_sh = 5;
2965 		rdev->config.si.max_sh_per_se = 1;
2966 		rdev->config.si.max_backends_per_se = 1;
2967 		rdev->config.si.max_texture_channel_caches = 2;
2968 		rdev->config.si.max_gprs = 256;
2969 		rdev->config.si.max_gs_threads = 16;
2970 		rdev->config.si.max_hw_contexts = 8;
2971 
2972 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2973 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2974 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2975 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2976 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2977 		break;
2978 	}
2979 
2980 	/* Initialize HDP */
2981 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2982 		WREG32((0x2c14 + j), 0x00000000);
2983 		WREG32((0x2c18 + j), 0x00000000);
2984 		WREG32((0x2c1c + j), 0x00000000);
2985 		WREG32((0x2c20 + j), 0x00000000);
2986 		WREG32((0x2c24 + j), 0x00000000);
2987 	}
2988 
2989 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2990 
2991 	evergreen_fix_pci_max_read_req_size(rdev);
2992 
2993 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2994 
2995 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2996 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2997 
2998 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2999 	rdev->config.si.mem_max_burst_length_bytes = 256;
3000 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3001 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3002 	if (rdev->config.si.mem_row_size_in_kb > 4)
3003 		rdev->config.si.mem_row_size_in_kb = 4;
3004 	/* XXX use MC settings? */
3005 	rdev->config.si.shader_engine_tile_size = 32;
3006 	rdev->config.si.num_gpus = 1;
3007 	rdev->config.si.multi_gpu_tile_size = 64;
3008 
3009 	/* fix up row size */
3010 	gb_addr_config &= ~ROW_SIZE_MASK;
3011 	switch (rdev->config.si.mem_row_size_in_kb) {
3012 	case 1:
3013 	default:
3014 		gb_addr_config |= ROW_SIZE(0);
3015 		break;
3016 	case 2:
3017 		gb_addr_config |= ROW_SIZE(1);
3018 		break;
3019 	case 4:
3020 		gb_addr_config |= ROW_SIZE(2);
3021 		break;
3022 	}
3023 
3024 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3025 	 * not have bank info, so create a custom tiling dword.
3026 	 * bits 3:0   num_pipes
3027 	 * bits 7:4   num_banks
3028 	 * bits 11:8  group_size
3029 	 * bits 15:12 row_size
3030 	 */
3031 	rdev->config.si.tile_config = 0;
3032 	switch (rdev->config.si.num_tile_pipes) {
3033 	case 1:
3034 		rdev->config.si.tile_config |= (0 << 0);
3035 		break;
3036 	case 2:
3037 		rdev->config.si.tile_config |= (1 << 0);
3038 		break;
3039 	case 4:
3040 		rdev->config.si.tile_config |= (2 << 0);
3041 		break;
3042 	case 8:
3043 	default:
3044 		/* XXX what about 12? */
3045 		rdev->config.si.tile_config |= (3 << 0);
3046 		break;
3047 	}
3048 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3049 	case 0: /* four banks */
3050 		rdev->config.si.tile_config |= 0 << 4;
3051 		break;
3052 	case 1: /* eight banks */
3053 		rdev->config.si.tile_config |= 1 << 4;
3054 		break;
3055 	case 2: /* sixteen banks */
3056 	default:
3057 		rdev->config.si.tile_config |= 2 << 4;
3058 		break;
3059 	}
3060 	rdev->config.si.tile_config |=
3061 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3062 	rdev->config.si.tile_config |=
3063 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3064 
3065 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3066 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3067 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3068 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3069 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3070 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3071 	if (rdev->has_uvd) {
3072 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3073 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3074 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3075 	}
3076 
3077 	si_tiling_mode_table_init(rdev);
3078 
3079 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3080 		    rdev->config.si.max_sh_per_se,
3081 		    rdev->config.si.max_backends_per_se);
3082 
3083 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3084 		     rdev->config.si.max_sh_per_se,
3085 		     rdev->config.si.max_cu_per_sh);
3086 
3087 
3088 	/* set HW defaults for 3D engine */
3089 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3090 				     ROQ_IB2_START(0x2b)));
3091 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3092 
3093 	sx_debug_1 = RREG32(SX_DEBUG_1);
3094 	WREG32(SX_DEBUG_1, sx_debug_1);
3095 
3096 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3097 
3098 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3099 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3100 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3101 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3102 
3103 	WREG32(VGT_NUM_INSTANCES, 1);
3104 
3105 	WREG32(CP_PERFMON_CNTL, 0);
3106 
3107 	WREG32(SQ_CONFIG, 0);
3108 
3109 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3110 					  FORCE_EOV_MAX_REZ_CNT(255)));
3111 
3112 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3113 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3114 
3115 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3116 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3117 
3118 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3119 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3120 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3121 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3122 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3123 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3124 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3125 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3126 
3127 	tmp = RREG32(HDP_MISC_CNTL);
3128 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3129 	WREG32(HDP_MISC_CNTL, tmp);
3130 
3131 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3132 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3133 
3134 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3135 
3136 	udelay(50);
3137 }
3138 
3139 /*
3140  * GPU scratch registers helpers function.
3141  */
3142 static void si_scratch_init(struct radeon_device *rdev)
3143 {
3144 	int i;
3145 
3146 	rdev->scratch.num_reg = 7;
3147 	rdev->scratch.reg_base = SCRATCH_REG0;
3148 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3149 		rdev->scratch.free[i] = true;
3150 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3151 	}
3152 }
3153 
3154 void si_fence_ring_emit(struct radeon_device *rdev,
3155 			struct radeon_fence *fence)
3156 {
3157 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3158 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3159 
3160 	/* flush read cache over gart */
3161 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3162 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3163 	radeon_ring_write(ring, 0);
3164 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3165 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3166 			  PACKET3_TC_ACTION_ENA |
3167 			  PACKET3_SH_KCACHE_ACTION_ENA |
3168 			  PACKET3_SH_ICACHE_ACTION_ENA);
3169 	radeon_ring_write(ring, 0xFFFFFFFF);
3170 	radeon_ring_write(ring, 0);
3171 	radeon_ring_write(ring, 10); /* poll interval */
3172 	/* EVENT_WRITE_EOP - flush caches, send int */
3173 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3174 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3175 	radeon_ring_write(ring, addr & 0xffffffff);
3176 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3177 	radeon_ring_write(ring, fence->seq);
3178 	radeon_ring_write(ring, 0);
3179 }
3180 
3181 /*
3182  * IB stuff
3183  */
3184 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3185 {
3186 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3187 	u32 header;
3188 
3189 	if (ib->is_const_ib) {
3190 		/* set switch buffer packet before const IB */
3191 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3192 		radeon_ring_write(ring, 0);
3193 
3194 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3195 	} else {
3196 		u32 next_rptr;
3197 		if (ring->rptr_save_reg) {
3198 			next_rptr = ring->wptr + 3 + 4 + 8;
3199 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3200 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3201 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3202 			radeon_ring_write(ring, next_rptr);
3203 		} else if (rdev->wb.enabled) {
3204 			next_rptr = ring->wptr + 5 + 4 + 8;
3205 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3206 			radeon_ring_write(ring, (1 << 8));
3207 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3208 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3209 			radeon_ring_write(ring, next_rptr);
3210 		}
3211 
3212 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3213 	}
3214 
3215 	radeon_ring_write(ring, header);
3216 	radeon_ring_write(ring,
3217 #ifdef __BIG_ENDIAN
3218 			  (2 << 0) |
3219 #endif
3220 			  (ib->gpu_addr & 0xFFFFFFFC));
3221 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3222 	radeon_ring_write(ring, ib->length_dw |
3223 			  (ib->vm ? (ib->vm->id << 24) : 0));
3224 
3225 	if (!ib->is_const_ib) {
3226 		/* flush read cache over gart for this vmid */
3227 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3228 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3229 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3230 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3231 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3232 				  PACKET3_TC_ACTION_ENA |
3233 				  PACKET3_SH_KCACHE_ACTION_ENA |
3234 				  PACKET3_SH_ICACHE_ACTION_ENA);
3235 		radeon_ring_write(ring, 0xFFFFFFFF);
3236 		radeon_ring_write(ring, 0);
3237 		radeon_ring_write(ring, 10); /* poll interval */
3238 	}
3239 }
3240 
3241 /*
3242  * CP.
3243  */
3244 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3245 {
3246 	if (enable)
3247 		WREG32(CP_ME_CNTL, 0);
3248 	else {
3249 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3250 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3251 		WREG32(SCRATCH_UMSK, 0);
3252 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3253 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3254 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3255 	}
3256 	udelay(50);
3257 }
3258 
3259 static int si_cp_load_microcode(struct radeon_device *rdev)
3260 {
3261 	const __be32 *fw_data;
3262 	int i;
3263 
3264 	if (!rdev->me_fw || !rdev->pfp_fw)
3265 		return -EINVAL;
3266 
3267 	si_cp_enable(rdev, false);
3268 
3269 	/* PFP */
3270 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3271 	WREG32(CP_PFP_UCODE_ADDR, 0);
3272 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3273 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3274 	WREG32(CP_PFP_UCODE_ADDR, 0);
3275 
3276 	/* CE */
3277 	fw_data = (const __be32 *)rdev->ce_fw->data;
3278 	WREG32(CP_CE_UCODE_ADDR, 0);
3279 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3280 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3281 	WREG32(CP_CE_UCODE_ADDR, 0);
3282 
3283 	/* ME */
3284 	fw_data = (const __be32 *)rdev->me_fw->data;
3285 	WREG32(CP_ME_RAM_WADDR, 0);
3286 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3287 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3288 	WREG32(CP_ME_RAM_WADDR, 0);
3289 
3290 	WREG32(CP_PFP_UCODE_ADDR, 0);
3291 	WREG32(CP_CE_UCODE_ADDR, 0);
3292 	WREG32(CP_ME_RAM_WADDR, 0);
3293 	WREG32(CP_ME_RAM_RADDR, 0);
3294 	return 0;
3295 }
3296 
3297 static int si_cp_start(struct radeon_device *rdev)
3298 {
3299 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3300 	int r, i;
3301 
3302 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3303 	if (r) {
3304 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3305 		return r;
3306 	}
3307 	/* init the CP */
3308 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3309 	radeon_ring_write(ring, 0x1);
3310 	radeon_ring_write(ring, 0x0);
3311 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3312 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3313 	radeon_ring_write(ring, 0);
3314 	radeon_ring_write(ring, 0);
3315 
3316 	/* init the CE partitions */
3317 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3318 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3319 	radeon_ring_write(ring, 0xc000);
3320 	radeon_ring_write(ring, 0xe000);
3321 	radeon_ring_unlock_commit(rdev, ring);
3322 
3323 	si_cp_enable(rdev, true);
3324 
3325 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3326 	if (r) {
3327 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3328 		return r;
3329 	}
3330 
3331 	/* setup clear context state */
3332 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3333 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3334 
3335 	for (i = 0; i < si_default_size; i++)
3336 		radeon_ring_write(ring, si_default_state[i]);
3337 
3338 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3339 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3340 
3341 	/* set clear context state */
3342 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3343 	radeon_ring_write(ring, 0);
3344 
3345 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3346 	radeon_ring_write(ring, 0x00000316);
3347 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3348 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3349 
3350 	radeon_ring_unlock_commit(rdev, ring);
3351 
3352 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3353 		ring = &rdev->ring[i];
3354 		r = radeon_ring_lock(rdev, ring, 2);
3355 
3356 		/* clear the compute context state */
3357 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3358 		radeon_ring_write(ring, 0);
3359 
3360 		radeon_ring_unlock_commit(rdev, ring);
3361 	}
3362 
3363 	return 0;
3364 }
3365 
3366 static void si_cp_fini(struct radeon_device *rdev)
3367 {
3368 	struct radeon_ring *ring;
3369 	si_cp_enable(rdev, false);
3370 
3371 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3372 	radeon_ring_fini(rdev, ring);
3373 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3374 
3375 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3376 	radeon_ring_fini(rdev, ring);
3377 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3378 
3379 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3380 	radeon_ring_fini(rdev, ring);
3381 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3382 }
3383 
3384 static int si_cp_resume(struct radeon_device *rdev)
3385 {
3386 	struct radeon_ring *ring;
3387 	u32 tmp;
3388 	u32 rb_bufsz;
3389 	int r;
3390 
3391 	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
3392 	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
3393 				 SOFT_RESET_PA |
3394 				 SOFT_RESET_VGT |
3395 				 SOFT_RESET_SPI |
3396 				 SOFT_RESET_SX));
3397 	RREG32(GRBM_SOFT_RESET);
3398 	mdelay(15);
3399 	WREG32(GRBM_SOFT_RESET, 0);
3400 	RREG32(GRBM_SOFT_RESET);
3401 
3402 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3403 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3404 
3405 	/* Set the write pointer delay */
3406 	WREG32(CP_RB_WPTR_DELAY, 0);
3407 
3408 	WREG32(CP_DEBUG, 0);
3409 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3410 
3411 	/* ring 0 - compute and gfx */
3412 	/* Set ring buffer size */
3413 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3414 	rb_bufsz = drm_order(ring->ring_size / 8);
3415 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3416 #ifdef __BIG_ENDIAN
3417 	tmp |= BUF_SWAP_32BIT;
3418 #endif
3419 	WREG32(CP_RB0_CNTL, tmp);
3420 
3421 	/* Initialize the ring buffer's read and write pointers */
3422 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3423 	ring->wptr = 0;
3424 	WREG32(CP_RB0_WPTR, ring->wptr);
3425 
3426 	/* set the wb address whether it's enabled or not */
3427 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3428 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3429 
3430 	if (rdev->wb.enabled)
3431 		WREG32(SCRATCH_UMSK, 0xff);
3432 	else {
3433 		tmp |= RB_NO_UPDATE;
3434 		WREG32(SCRATCH_UMSK, 0);
3435 	}
3436 
3437 	mdelay(1);
3438 	WREG32(CP_RB0_CNTL, tmp);
3439 
3440 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3441 
3442 	ring->rptr = RREG32(CP_RB0_RPTR);
3443 
3444 	/* ring1  - compute only */
3445 	/* Set ring buffer size */
3446 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3447 	rb_bufsz = drm_order(ring->ring_size / 8);
3448 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3449 #ifdef __BIG_ENDIAN
3450 	tmp |= BUF_SWAP_32BIT;
3451 #endif
3452 	WREG32(CP_RB1_CNTL, tmp);
3453 
3454 	/* Initialize the ring buffer's read and write pointers */
3455 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3456 	ring->wptr = 0;
3457 	WREG32(CP_RB1_WPTR, ring->wptr);
3458 
3459 	/* set the wb address whether it's enabled or not */
3460 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3461 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3462 
3463 	mdelay(1);
3464 	WREG32(CP_RB1_CNTL, tmp);
3465 
3466 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3467 
3468 	ring->rptr = RREG32(CP_RB1_RPTR);
3469 
3470 	/* ring2 - compute only */
3471 	/* Set ring buffer size */
3472 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3473 	rb_bufsz = drm_order(ring->ring_size / 8);
3474 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3475 #ifdef __BIG_ENDIAN
3476 	tmp |= BUF_SWAP_32BIT;
3477 #endif
3478 	WREG32(CP_RB2_CNTL, tmp);
3479 
3480 	/* Initialize the ring buffer's read and write pointers */
3481 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3482 	ring->wptr = 0;
3483 	WREG32(CP_RB2_WPTR, ring->wptr);
3484 
3485 	/* set the wb address whether it's enabled or not */
3486 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3487 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3488 
3489 	mdelay(1);
3490 	WREG32(CP_RB2_CNTL, tmp);
3491 
3492 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3493 
3494 	ring->rptr = RREG32(CP_RB2_RPTR);
3495 
3496 	/* start the rings */
3497 	si_cp_start(rdev);
3498 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3499 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3500 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3501 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3502 	if (r) {
3503 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3504 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3505 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3506 		return r;
3507 	}
3508 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3509 	if (r) {
3510 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3511 	}
3512 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3513 	if (r) {
3514 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3515 	}
3516 
3517 	return 0;
3518 }
3519 
3520 static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3521 {
3522 	u32 reset_mask = 0;
3523 	u32 tmp;
3524 
3525 	/* GRBM_STATUS */
3526 	tmp = RREG32(GRBM_STATUS);
3527 	if (tmp & (PA_BUSY | SC_BUSY |
3528 		   BCI_BUSY | SX_BUSY |
3529 		   TA_BUSY | VGT_BUSY |
3530 		   DB_BUSY | CB_BUSY |
3531 		   GDS_BUSY | SPI_BUSY |
3532 		   IA_BUSY | IA_BUSY_NO_DMA))
3533 		reset_mask |= RADEON_RESET_GFX;
3534 
3535 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3536 		   CP_BUSY | CP_COHERENCY_BUSY))
3537 		reset_mask |= RADEON_RESET_CP;
3538 
3539 	if (tmp & GRBM_EE_BUSY)
3540 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3541 
3542 	/* GRBM_STATUS2 */
3543 	tmp = RREG32(GRBM_STATUS2);
3544 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3545 		reset_mask |= RADEON_RESET_RLC;
3546 
3547 	/* DMA_STATUS_REG 0 */
3548 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3549 	if (!(tmp & DMA_IDLE))
3550 		reset_mask |= RADEON_RESET_DMA;
3551 
3552 	/* DMA_STATUS_REG 1 */
3553 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3554 	if (!(tmp & DMA_IDLE))
3555 		reset_mask |= RADEON_RESET_DMA1;
3556 
3557 	/* SRBM_STATUS2 */
3558 	tmp = RREG32(SRBM_STATUS2);
3559 	if (tmp & DMA_BUSY)
3560 		reset_mask |= RADEON_RESET_DMA;
3561 
3562 	if (tmp & DMA1_BUSY)
3563 		reset_mask |= RADEON_RESET_DMA1;
3564 
3565 	/* SRBM_STATUS */
3566 	tmp = RREG32(SRBM_STATUS);
3567 
3568 	if (tmp & IH_BUSY)
3569 		reset_mask |= RADEON_RESET_IH;
3570 
3571 	if (tmp & SEM_BUSY)
3572 		reset_mask |= RADEON_RESET_SEM;
3573 
3574 	if (tmp & GRBM_RQ_PENDING)
3575 		reset_mask |= RADEON_RESET_GRBM;
3576 
3577 	if (tmp & VMC_BUSY)
3578 		reset_mask |= RADEON_RESET_VMC;
3579 
3580 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3581 		   MCC_BUSY | MCD_BUSY))
3582 		reset_mask |= RADEON_RESET_MC;
3583 
3584 	if (evergreen_is_display_hung(rdev))
3585 		reset_mask |= RADEON_RESET_DISPLAY;
3586 
3587 	/* VM_L2_STATUS */
3588 	tmp = RREG32(VM_L2_STATUS);
3589 	if (tmp & L2_BUSY)
3590 		reset_mask |= RADEON_RESET_VMC;
3591 
3592 	/* Skip MC reset as it's mostly likely not hung, just busy */
3593 	if (reset_mask & RADEON_RESET_MC) {
3594 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3595 		reset_mask &= ~RADEON_RESET_MC;
3596 	}
3597 
3598 	return reset_mask;
3599 }
3600 
3601 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3602 {
3603 	struct evergreen_mc_save save;
3604 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3605 	u32 tmp;
3606 
3607 	if (reset_mask == 0)
3608 		return;
3609 
3610 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3611 
3612 	evergreen_print_gpu_status_regs(rdev);
3613 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3614 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3615 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3616 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3617 
3618 	/* Disable CP parsing/prefetching */
3619 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3620 
3621 	if (reset_mask & RADEON_RESET_DMA) {
3622 		/* dma0 */
3623 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3624 		tmp &= ~DMA_RB_ENABLE;
3625 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3626 	}
3627 	if (reset_mask & RADEON_RESET_DMA1) {
3628 		/* dma1 */
3629 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3630 		tmp &= ~DMA_RB_ENABLE;
3631 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3632 	}
3633 
3634 	udelay(50);
3635 
3636 	evergreen_mc_stop(rdev, &save);
3637 	if (evergreen_mc_wait_for_idle(rdev)) {
3638 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3639 	}
3640 
3641 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3642 		grbm_soft_reset = SOFT_RESET_CB |
3643 			SOFT_RESET_DB |
3644 			SOFT_RESET_GDS |
3645 			SOFT_RESET_PA |
3646 			SOFT_RESET_SC |
3647 			SOFT_RESET_BCI |
3648 			SOFT_RESET_SPI |
3649 			SOFT_RESET_SX |
3650 			SOFT_RESET_TC |
3651 			SOFT_RESET_TA |
3652 			SOFT_RESET_VGT |
3653 			SOFT_RESET_IA;
3654 	}
3655 
3656 	if (reset_mask & RADEON_RESET_CP) {
3657 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3658 
3659 		srbm_soft_reset |= SOFT_RESET_GRBM;
3660 	}
3661 
3662 	if (reset_mask & RADEON_RESET_DMA)
3663 		srbm_soft_reset |= SOFT_RESET_DMA;
3664 
3665 	if (reset_mask & RADEON_RESET_DMA1)
3666 		srbm_soft_reset |= SOFT_RESET_DMA1;
3667 
3668 	if (reset_mask & RADEON_RESET_DISPLAY)
3669 		srbm_soft_reset |= SOFT_RESET_DC;
3670 
3671 	if (reset_mask & RADEON_RESET_RLC)
3672 		grbm_soft_reset |= SOFT_RESET_RLC;
3673 
3674 	if (reset_mask & RADEON_RESET_SEM)
3675 		srbm_soft_reset |= SOFT_RESET_SEM;
3676 
3677 	if (reset_mask & RADEON_RESET_IH)
3678 		srbm_soft_reset |= SOFT_RESET_IH;
3679 
3680 	if (reset_mask & RADEON_RESET_GRBM)
3681 		srbm_soft_reset |= SOFT_RESET_GRBM;
3682 
3683 	if (reset_mask & RADEON_RESET_VMC)
3684 		srbm_soft_reset |= SOFT_RESET_VMC;
3685 
3686 	if (reset_mask & RADEON_RESET_MC)
3687 		srbm_soft_reset |= SOFT_RESET_MC;
3688 
3689 	if (grbm_soft_reset) {
3690 		tmp = RREG32(GRBM_SOFT_RESET);
3691 		tmp |= grbm_soft_reset;
3692 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3693 		WREG32(GRBM_SOFT_RESET, tmp);
3694 		tmp = RREG32(GRBM_SOFT_RESET);
3695 
3696 		udelay(50);
3697 
3698 		tmp &= ~grbm_soft_reset;
3699 		WREG32(GRBM_SOFT_RESET, tmp);
3700 		tmp = RREG32(GRBM_SOFT_RESET);
3701 	}
3702 
3703 	if (srbm_soft_reset) {
3704 		tmp = RREG32(SRBM_SOFT_RESET);
3705 		tmp |= srbm_soft_reset;
3706 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3707 		WREG32(SRBM_SOFT_RESET, tmp);
3708 		tmp = RREG32(SRBM_SOFT_RESET);
3709 
3710 		udelay(50);
3711 
3712 		tmp &= ~srbm_soft_reset;
3713 		WREG32(SRBM_SOFT_RESET, tmp);
3714 		tmp = RREG32(SRBM_SOFT_RESET);
3715 	}
3716 
3717 	/* Wait a little for things to settle down */
3718 	udelay(50);
3719 
3720 	evergreen_mc_resume(rdev, &save);
3721 	udelay(50);
3722 
3723 	evergreen_print_gpu_status_regs(rdev);
3724 }
3725 
3726 int si_asic_reset(struct radeon_device *rdev)
3727 {
3728 	u32 reset_mask;
3729 
3730 	reset_mask = si_gpu_check_soft_reset(rdev);
3731 
3732 	if (reset_mask)
3733 		r600_set_bios_scratch_engine_hung(rdev, true);
3734 
3735 	si_gpu_soft_reset(rdev, reset_mask);
3736 
3737 	reset_mask = si_gpu_check_soft_reset(rdev);
3738 
3739 	if (!reset_mask)
3740 		r600_set_bios_scratch_engine_hung(rdev, false);
3741 
3742 	return 0;
3743 }
3744 
3745 /**
3746  * si_gfx_is_lockup - Check if the GFX engine is locked up
3747  *
3748  * @rdev: radeon_device pointer
3749  * @ring: radeon_ring structure holding ring information
3750  *
3751  * Check if the GFX engine is locked up.
3752  * Returns true if the engine appears to be locked up, false if not.
3753  */
3754 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3755 {
3756 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3757 
3758 	if (!(reset_mask & (RADEON_RESET_GFX |
3759 			    RADEON_RESET_COMPUTE |
3760 			    RADEON_RESET_CP))) {
3761 		radeon_ring_lockup_update(ring);
3762 		return false;
3763 	}
3764 	/* force CP activities */
3765 	radeon_ring_force_activity(rdev, ring);
3766 	return radeon_ring_test_lockup(rdev, ring);
3767 }
3768 
3769 /**
3770  * si_dma_is_lockup - Check if the DMA engine is locked up
3771  *
3772  * @rdev: radeon_device pointer
3773  * @ring: radeon_ring structure holding ring information
3774  *
3775  * Check if the async DMA engine is locked up.
3776  * Returns true if the engine appears to be locked up, false if not.
3777  */
3778 bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3779 {
3780 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3781 	u32 mask;
3782 
3783 	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
3784 		mask = RADEON_RESET_DMA;
3785 	else
3786 		mask = RADEON_RESET_DMA1;
3787 
3788 	if (!(reset_mask & mask)) {
3789 		radeon_ring_lockup_update(ring);
3790 		return false;
3791 	}
3792 	/* force ring activities */
3793 	radeon_ring_force_activity(rdev, ring);
3794 	return radeon_ring_test_lockup(rdev, ring);
3795 }
3796 
3797 /* MC */
3798 static void si_mc_program(struct radeon_device *rdev)
3799 {
3800 	struct evergreen_mc_save save;
3801 	u32 tmp;
3802 	int i, j;
3803 
3804 	/* Initialize HDP */
3805 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3806 		WREG32((0x2c14 + j), 0x00000000);
3807 		WREG32((0x2c18 + j), 0x00000000);
3808 		WREG32((0x2c1c + j), 0x00000000);
3809 		WREG32((0x2c20 + j), 0x00000000);
3810 		WREG32((0x2c24 + j), 0x00000000);
3811 	}
3812 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3813 
3814 	evergreen_mc_stop(rdev, &save);
3815 	if (radeon_mc_wait_for_idle(rdev)) {
3816 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3817 	}
3818 	if (!ASIC_IS_NODCE(rdev))
3819 		/* Lockout access through VGA aperture*/
3820 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3821 	/* Update configuration */
3822 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3823 	       rdev->mc.vram_start >> 12);
3824 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3825 	       rdev->mc.vram_end >> 12);
3826 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3827 	       rdev->vram_scratch.gpu_addr >> 12);
3828 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3829 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3830 	WREG32(MC_VM_FB_LOCATION, tmp);
3831 	/* XXX double check these! */
3832 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3833 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3834 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3835 	WREG32(MC_VM_AGP_BASE, 0);
3836 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3837 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3838 	if (radeon_mc_wait_for_idle(rdev)) {
3839 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3840 	}
3841 	evergreen_mc_resume(rdev, &save);
3842 	if (!ASIC_IS_NODCE(rdev)) {
3843 		/* we need to own VRAM, so turn off the VGA renderer here
3844 		 * to stop it overwriting our objects */
3845 		rv515_vga_render_disable(rdev);
3846 	}
3847 }
3848 
3849 void si_vram_gtt_location(struct radeon_device *rdev,
3850 			  struct radeon_mc *mc)
3851 {
3852 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3853 		/* leave room for at least 1024M GTT */
3854 		dev_warn(rdev->dev, "limiting VRAM\n");
3855 		mc->real_vram_size = 0xFFC0000000ULL;
3856 		mc->mc_vram_size = 0xFFC0000000ULL;
3857 	}
3858 	radeon_vram_location(rdev, &rdev->mc, 0);
3859 	rdev->mc.gtt_base_align = 0;
3860 	radeon_gtt_location(rdev, mc);
3861 }
3862 
3863 static int si_mc_init(struct radeon_device *rdev)
3864 {
3865 	u32 tmp;
3866 	int chansize, numchan;
3867 
3868 	/* Get VRAM informations */
3869 	rdev->mc.vram_is_ddr = true;
3870 	tmp = RREG32(MC_ARB_RAMCFG);
3871 	if (tmp & CHANSIZE_OVERRIDE) {
3872 		chansize = 16;
3873 	} else if (tmp & CHANSIZE_MASK) {
3874 		chansize = 64;
3875 	} else {
3876 		chansize = 32;
3877 	}
3878 	tmp = RREG32(MC_SHARED_CHMAP);
3879 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3880 	case 0:
3881 	default:
3882 		numchan = 1;
3883 		break;
3884 	case 1:
3885 		numchan = 2;
3886 		break;
3887 	case 2:
3888 		numchan = 4;
3889 		break;
3890 	case 3:
3891 		numchan = 8;
3892 		break;
3893 	case 4:
3894 		numchan = 3;
3895 		break;
3896 	case 5:
3897 		numchan = 6;
3898 		break;
3899 	case 6:
3900 		numchan = 10;
3901 		break;
3902 	case 7:
3903 		numchan = 12;
3904 		break;
3905 	case 8:
3906 		numchan = 16;
3907 		break;
3908 	}
3909 	rdev->mc.vram_width = numchan * chansize;
3910 	/* Could aper size report 0 ? */
3911 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
3912 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
3913 	/* size in MB on si */
3914 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3915 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3916 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
3917 	si_vram_gtt_location(rdev, &rdev->mc);
3918 	radeon_update_bandwidth_info(rdev);
3919 
3920 	return 0;
3921 }
3922 
3923 /*
3924  * GART
3925  */
3926 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3927 {
3928 	/* flush hdp cache */
3929 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3930 
3931 	/* bits 0-15 are the VM contexts0-15 */
3932 	WREG32(VM_INVALIDATE_REQUEST, 1);
3933 }
3934 
3935 static int si_pcie_gart_enable(struct radeon_device *rdev)
3936 {
3937 	int r, i;
3938 
3939 	if (rdev->gart.robj == NULL) {
3940 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3941 		return -EINVAL;
3942 	}
3943 	r = radeon_gart_table_vram_pin(rdev);
3944 	if (r)
3945 		return r;
3946 	radeon_gart_restore(rdev);
3947 	/* Setup TLB control */
3948 	WREG32(MC_VM_MX_L1_TLB_CNTL,
3949 	       (0xA << 7) |
3950 	       ENABLE_L1_TLB |
3951 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3952 	       ENABLE_ADVANCED_DRIVER_MODEL |
3953 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3954 	/* Setup L2 cache */
3955 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3956 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3957 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3958 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3959 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3960 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3961 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3962 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3963 	/* setup context0 */
3964 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3965 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3966 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3967 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3968 			(u32)(rdev->dummy_page.addr >> 12));
3969 	WREG32(VM_CONTEXT0_CNTL2, 0);
3970 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3971 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3972 
3973 	WREG32(0x15D4, 0);
3974 	WREG32(0x15D8, 0);
3975 	WREG32(0x15DC, 0);
3976 
3977 	/* empty context1-15 */
3978 	/* set vm size, must be a multiple of 4 */
3979 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3980 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3981 	/* Assign the pt base to something valid for now; the pts used for
3982 	 * the VMs are determined by the application and setup and assigned
3983 	 * on the fly in the vm part of radeon_gart.c
3984 	 */
3985 	for (i = 1; i < 16; i++) {
3986 		if (i < 8)
3987 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3988 			       rdev->gart.table_addr >> 12);
3989 		else
3990 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3991 			       rdev->gart.table_addr >> 12);
3992 	}
3993 
3994 	/* enable context1-15 */
3995 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3996 	       (u32)(rdev->dummy_page.addr >> 12));
3997 	WREG32(VM_CONTEXT1_CNTL2, 4);
3998 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3999 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4000 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4001 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4002 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4003 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4004 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4005 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4006 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4007 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4008 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4009 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4010 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4011 
4012 	si_pcie_gart_tlb_flush(rdev);
4013 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4014 		 (unsigned)(rdev->mc.gtt_size >> 20),
4015 		 (unsigned long long)rdev->gart.table_addr);
4016 	rdev->gart.ready = true;
4017 	return 0;
4018 }
4019 
4020 static void si_pcie_gart_disable(struct radeon_device *rdev)
4021 {
4022 	/* Disable all tables */
4023 	WREG32(VM_CONTEXT0_CNTL, 0);
4024 	WREG32(VM_CONTEXT1_CNTL, 0);
4025 	/* Setup TLB control */
4026 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4027 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4028 	/* Setup L2 cache */
4029 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4030 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4031 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4032 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4033 	WREG32(VM_L2_CNTL2, 0);
4034 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4035 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4036 	radeon_gart_table_vram_unpin(rdev);
4037 }
4038 
4039 static void si_pcie_gart_fini(struct radeon_device *rdev)
4040 {
4041 	si_pcie_gart_disable(rdev);
4042 	radeon_gart_table_vram_free(rdev);
4043 	radeon_gart_fini(rdev);
4044 }
4045 
4046 /* vm parser */
4047 static bool si_vm_reg_valid(u32 reg)
4048 {
4049 	/* context regs are fine */
4050 	if (reg >= 0x28000)
4051 		return true;
4052 
4053 	/* check config regs */
4054 	switch (reg) {
4055 	case GRBM_GFX_INDEX:
4056 	case CP_STRMOUT_CNTL:
4057 	case VGT_VTX_VECT_EJECT_REG:
4058 	case VGT_CACHE_INVALIDATION:
4059 	case VGT_ESGS_RING_SIZE:
4060 	case VGT_GSVS_RING_SIZE:
4061 	case VGT_GS_VERTEX_REUSE:
4062 	case VGT_PRIMITIVE_TYPE:
4063 	case VGT_INDEX_TYPE:
4064 	case VGT_NUM_INDICES:
4065 	case VGT_NUM_INSTANCES:
4066 	case VGT_TF_RING_SIZE:
4067 	case VGT_HS_OFFCHIP_PARAM:
4068 	case VGT_TF_MEMORY_BASE:
4069 	case PA_CL_ENHANCE:
4070 	case PA_SU_LINE_STIPPLE_VALUE:
4071 	case PA_SC_LINE_STIPPLE_STATE:
4072 	case PA_SC_ENHANCE:
4073 	case SQC_CACHES:
4074 	case SPI_STATIC_THREAD_MGMT_1:
4075 	case SPI_STATIC_THREAD_MGMT_2:
4076 	case SPI_STATIC_THREAD_MGMT_3:
4077 	case SPI_PS_MAX_WAVE_ID:
4078 	case SPI_CONFIG_CNTL:
4079 	case SPI_CONFIG_CNTL_1:
4080 	case TA_CNTL_AUX:
4081 		return true;
4082 	default:
4083 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4084 		return false;
4085 	}
4086 }
4087 
4088 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4089 				  u32 *ib, struct radeon_cs_packet *pkt)
4090 {
4091 	switch (pkt->opcode) {
4092 	case PACKET3_NOP:
4093 	case PACKET3_SET_BASE:
4094 	case PACKET3_SET_CE_DE_COUNTERS:
4095 	case PACKET3_LOAD_CONST_RAM:
4096 	case PACKET3_WRITE_CONST_RAM:
4097 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4098 	case PACKET3_DUMP_CONST_RAM:
4099 	case PACKET3_INCREMENT_CE_COUNTER:
4100 	case PACKET3_WAIT_ON_DE_COUNTER:
4101 	case PACKET3_CE_WRITE:
4102 		break;
4103 	default:
4104 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4105 		return -EINVAL;
4106 	}
4107 	return 0;
4108 }
4109 
4110 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4111 				   u32 *ib, struct radeon_cs_packet *pkt)
4112 {
4113 	u32 idx = pkt->idx + 1;
4114 	u32 idx_value = ib[idx];
4115 	u32 start_reg, end_reg, reg, i;
4116 	u32 command, info;
4117 
4118 	switch (pkt->opcode) {
4119 	case PACKET3_NOP:
4120 	case PACKET3_SET_BASE:
4121 	case PACKET3_CLEAR_STATE:
4122 	case PACKET3_INDEX_BUFFER_SIZE:
4123 	case PACKET3_DISPATCH_DIRECT:
4124 	case PACKET3_DISPATCH_INDIRECT:
4125 	case PACKET3_ALLOC_GDS:
4126 	case PACKET3_WRITE_GDS_RAM:
4127 	case PACKET3_ATOMIC_GDS:
4128 	case PACKET3_ATOMIC:
4129 	case PACKET3_OCCLUSION_QUERY:
4130 	case PACKET3_SET_PREDICATION:
4131 	case PACKET3_COND_EXEC:
4132 	case PACKET3_PRED_EXEC:
4133 	case PACKET3_DRAW_INDIRECT:
4134 	case PACKET3_DRAW_INDEX_INDIRECT:
4135 	case PACKET3_INDEX_BASE:
4136 	case PACKET3_DRAW_INDEX_2:
4137 	case PACKET3_CONTEXT_CONTROL:
4138 	case PACKET3_INDEX_TYPE:
4139 	case PACKET3_DRAW_INDIRECT_MULTI:
4140 	case PACKET3_DRAW_INDEX_AUTO:
4141 	case PACKET3_DRAW_INDEX_IMMD:
4142 	case PACKET3_NUM_INSTANCES:
4143 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4144 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4145 	case PACKET3_DRAW_INDEX_OFFSET_2:
4146 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4147 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4148 	case PACKET3_MPEG_INDEX:
4149 	case PACKET3_WAIT_REG_MEM:
4150 	case PACKET3_MEM_WRITE:
4151 	case PACKET3_PFP_SYNC_ME:
4152 	case PACKET3_SURFACE_SYNC:
4153 	case PACKET3_EVENT_WRITE:
4154 	case PACKET3_EVENT_WRITE_EOP:
4155 	case PACKET3_EVENT_WRITE_EOS:
4156 	case PACKET3_SET_CONTEXT_REG:
4157 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4158 	case PACKET3_SET_SH_REG:
4159 	case PACKET3_SET_SH_REG_OFFSET:
4160 	case PACKET3_INCREMENT_DE_COUNTER:
4161 	case PACKET3_WAIT_ON_CE_COUNTER:
4162 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4163 	case PACKET3_ME_WRITE:
4164 		break;
4165 	case PACKET3_COPY_DATA:
4166 		if ((idx_value & 0xf00) == 0) {
4167 			reg = ib[idx + 3] * 4;
4168 			if (!si_vm_reg_valid(reg))
4169 				return -EINVAL;
4170 		}
4171 		break;
4172 	case PACKET3_WRITE_DATA:
4173 		if ((idx_value & 0xf00) == 0) {
4174 			start_reg = ib[idx + 1] * 4;
4175 			if (idx_value & 0x10000) {
4176 				if (!si_vm_reg_valid(start_reg))
4177 					return -EINVAL;
4178 			} else {
4179 				for (i = 0; i < (pkt->count - 2); i++) {
4180 					reg = start_reg + (4 * i);
4181 					if (!si_vm_reg_valid(reg))
4182 						return -EINVAL;
4183 				}
4184 			}
4185 		}
4186 		break;
4187 	case PACKET3_COND_WRITE:
4188 		if (idx_value & 0x100) {
4189 			reg = ib[idx + 5] * 4;
4190 			if (!si_vm_reg_valid(reg))
4191 				return -EINVAL;
4192 		}
4193 		break;
4194 	case PACKET3_COPY_DW:
4195 		if (idx_value & 0x2) {
4196 			reg = ib[idx + 3] * 4;
4197 			if (!si_vm_reg_valid(reg))
4198 				return -EINVAL;
4199 		}
4200 		break;
4201 	case PACKET3_SET_CONFIG_REG:
4202 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4203 		end_reg = 4 * pkt->count + start_reg - 4;
4204 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4205 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4206 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4207 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4208 			return -EINVAL;
4209 		}
4210 		for (i = 0; i < pkt->count; i++) {
4211 			reg = start_reg + (4 * i);
4212 			if (!si_vm_reg_valid(reg))
4213 				return -EINVAL;
4214 		}
4215 		break;
4216 	case PACKET3_CP_DMA:
4217 		command = ib[idx + 4];
4218 		info = ib[idx + 1];
4219 		if (command & PACKET3_CP_DMA_CMD_SAS) {
4220 			/* src address space is register */
4221 			if (((info & 0x60000000) >> 29) == 0) {
4222 				start_reg = idx_value << 2;
4223 				if (command & PACKET3_CP_DMA_CMD_SAIC) {
4224 					reg = start_reg;
4225 					if (!si_vm_reg_valid(reg)) {
4226 						DRM_ERROR("CP DMA Bad SRC register\n");
4227 						return -EINVAL;
4228 					}
4229 				} else {
4230 					for (i = 0; i < (command & 0x1fffff); i++) {
4231 						reg = start_reg + (4 * i);
4232 						if (!si_vm_reg_valid(reg)) {
4233 							DRM_ERROR("CP DMA Bad SRC register\n");
4234 							return -EINVAL;
4235 						}
4236 					}
4237 				}
4238 			}
4239 		}
4240 		if (command & PACKET3_CP_DMA_CMD_DAS) {
4241 			/* dst address space is register */
4242 			if (((info & 0x00300000) >> 20) == 0) {
4243 				start_reg = ib[idx + 2];
4244 				if (command & PACKET3_CP_DMA_CMD_DAIC) {
4245 					reg = start_reg;
4246 					if (!si_vm_reg_valid(reg)) {
4247 						DRM_ERROR("CP DMA Bad DST register\n");
4248 						return -EINVAL;
4249 					}
4250 				} else {
4251 					for (i = 0; i < (command & 0x1fffff); i++) {
4252 						reg = start_reg + (4 * i);
4253 						if (!si_vm_reg_valid(reg)) {
4254 							DRM_ERROR("CP DMA Bad DST register\n");
4255 							return -EINVAL;
4256 						}
4257 					}
4258 				}
4259 			}
4260 		}
4261 		break;
4262 	default:
4263 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4264 		return -EINVAL;
4265 	}
4266 	return 0;
4267 }
4268 
4269 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4270 				       u32 *ib, struct radeon_cs_packet *pkt)
4271 {
4272 	u32 idx = pkt->idx + 1;
4273 	u32 idx_value = ib[idx];
4274 	u32 start_reg, reg, i;
4275 
4276 	switch (pkt->opcode) {
4277 	case PACKET3_NOP:
4278 	case PACKET3_SET_BASE:
4279 	case PACKET3_CLEAR_STATE:
4280 	case PACKET3_DISPATCH_DIRECT:
4281 	case PACKET3_DISPATCH_INDIRECT:
4282 	case PACKET3_ALLOC_GDS:
4283 	case PACKET3_WRITE_GDS_RAM:
4284 	case PACKET3_ATOMIC_GDS:
4285 	case PACKET3_ATOMIC:
4286 	case PACKET3_OCCLUSION_QUERY:
4287 	case PACKET3_SET_PREDICATION:
4288 	case PACKET3_COND_EXEC:
4289 	case PACKET3_PRED_EXEC:
4290 	case PACKET3_CONTEXT_CONTROL:
4291 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4292 	case PACKET3_WAIT_REG_MEM:
4293 	case PACKET3_MEM_WRITE:
4294 	case PACKET3_PFP_SYNC_ME:
4295 	case PACKET3_SURFACE_SYNC:
4296 	case PACKET3_EVENT_WRITE:
4297 	case PACKET3_EVENT_WRITE_EOP:
4298 	case PACKET3_EVENT_WRITE_EOS:
4299 	case PACKET3_SET_CONTEXT_REG:
4300 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4301 	case PACKET3_SET_SH_REG:
4302 	case PACKET3_SET_SH_REG_OFFSET:
4303 	case PACKET3_INCREMENT_DE_COUNTER:
4304 	case PACKET3_WAIT_ON_CE_COUNTER:
4305 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4306 	case PACKET3_ME_WRITE:
4307 		break;
4308 	case PACKET3_COPY_DATA:
4309 		if ((idx_value & 0xf00) == 0) {
4310 			reg = ib[idx + 3] * 4;
4311 			if (!si_vm_reg_valid(reg))
4312 				return -EINVAL;
4313 		}
4314 		break;
4315 	case PACKET3_WRITE_DATA:
4316 		if ((idx_value & 0xf00) == 0) {
4317 			start_reg = ib[idx + 1] * 4;
4318 			if (idx_value & 0x10000) {
4319 				if (!si_vm_reg_valid(start_reg))
4320 					return -EINVAL;
4321 			} else {
4322 				for (i = 0; i < (pkt->count - 2); i++) {
4323 					reg = start_reg + (4 * i);
4324 					if (!si_vm_reg_valid(reg))
4325 						return -EINVAL;
4326 				}
4327 			}
4328 		}
4329 		break;
4330 	case PACKET3_COND_WRITE:
4331 		if (idx_value & 0x100) {
4332 			reg = ib[idx + 5] * 4;
4333 			if (!si_vm_reg_valid(reg))
4334 				return -EINVAL;
4335 		}
4336 		break;
4337 	case PACKET3_COPY_DW:
4338 		if (idx_value & 0x2) {
4339 			reg = ib[idx + 3] * 4;
4340 			if (!si_vm_reg_valid(reg))
4341 				return -EINVAL;
4342 		}
4343 		break;
4344 	default:
4345 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4346 		return -EINVAL;
4347 	}
4348 	return 0;
4349 }
4350 
4351 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4352 {
4353 	int ret = 0;
4354 	u32 idx = 0;
4355 	struct radeon_cs_packet pkt;
4356 
4357 	do {
4358 		pkt.idx = idx;
4359 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4360 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4361 		pkt.one_reg_wr = 0;
4362 		switch (pkt.type) {
4363 		case RADEON_PACKET_TYPE0:
4364 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4365 			ret = -EINVAL;
4366 			break;
4367 		case RADEON_PACKET_TYPE2:
4368 			idx += 1;
4369 			break;
4370 		case RADEON_PACKET_TYPE3:
4371 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4372 			if (ib->is_const_ib)
4373 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4374 			else {
4375 				switch (ib->ring) {
4376 				case RADEON_RING_TYPE_GFX_INDEX:
4377 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4378 					break;
4379 				case CAYMAN_RING_TYPE_CP1_INDEX:
4380 				case CAYMAN_RING_TYPE_CP2_INDEX:
4381 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4382 					break;
4383 				default:
4384 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4385 					ret = -EINVAL;
4386 					break;
4387 				}
4388 			}
4389 			idx += pkt.count + 2;
4390 			break;
4391 		default:
4392 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4393 			ret = -EINVAL;
4394 			break;
4395 		}
4396 		if (ret)
4397 			break;
4398 	} while (idx < ib->length_dw);
4399 
4400 	return ret;
4401 }
4402 
4403 /*
4404  * vm
4405  */
4406 int si_vm_init(struct radeon_device *rdev)
4407 {
4408 	/* number of VMs */
4409 	rdev->vm_manager.nvm = 16;
4410 	/* base offset of vram pages */
4411 	rdev->vm_manager.vram_base_offset = 0;
4412 
4413 	return 0;
4414 }
4415 
4416 void si_vm_fini(struct radeon_device *rdev)
4417 {
4418 }
4419 
4420 /**
4421  * si_vm_decode_fault - print human readable fault info
4422  *
4423  * @rdev: radeon_device pointer
4424  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4425  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4426  *
4427  * Print human readable fault information (SI).
4428  */
4429 static void si_vm_decode_fault(struct radeon_device *rdev,
4430 			       u32 status, u32 addr)
4431 {
4432 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4433 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4434 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4435 	char *block;
4436 
4437 	if (rdev->family == CHIP_TAHITI) {
4438 		switch (mc_id) {
4439 		case 160:
4440 		case 144:
4441 		case 96:
4442 		case 80:
4443 		case 224:
4444 		case 208:
4445 		case 32:
4446 		case 16:
4447 			block = "CB";
4448 			break;
4449 		case 161:
4450 		case 145:
4451 		case 97:
4452 		case 81:
4453 		case 225:
4454 		case 209:
4455 		case 33:
4456 		case 17:
4457 			block = "CB_FMASK";
4458 			break;
4459 		case 162:
4460 		case 146:
4461 		case 98:
4462 		case 82:
4463 		case 226:
4464 		case 210:
4465 		case 34:
4466 		case 18:
4467 			block = "CB_CMASK";
4468 			break;
4469 		case 163:
4470 		case 147:
4471 		case 99:
4472 		case 83:
4473 		case 227:
4474 		case 211:
4475 		case 35:
4476 		case 19:
4477 			block = "CB_IMMED";
4478 			break;
4479 		case 164:
4480 		case 148:
4481 		case 100:
4482 		case 84:
4483 		case 228:
4484 		case 212:
4485 		case 36:
4486 		case 20:
4487 			block = "DB";
4488 			break;
4489 		case 165:
4490 		case 149:
4491 		case 101:
4492 		case 85:
4493 		case 229:
4494 		case 213:
4495 		case 37:
4496 		case 21:
4497 			block = "DB_HTILE";
4498 			break;
4499 		case 167:
4500 		case 151:
4501 		case 103:
4502 		case 87:
4503 		case 231:
4504 		case 215:
4505 		case 39:
4506 		case 23:
4507 			block = "DB_STEN";
4508 			break;
4509 		case 72:
4510 		case 68:
4511 		case 64:
4512 		case 8:
4513 		case 4:
4514 		case 0:
4515 		case 136:
4516 		case 132:
4517 		case 128:
4518 		case 200:
4519 		case 196:
4520 		case 192:
4521 			block = "TC";
4522 			break;
4523 		case 112:
4524 		case 48:
4525 			block = "CP";
4526 			break;
4527 		case 49:
4528 		case 177:
4529 		case 50:
4530 		case 178:
4531 			block = "SH";
4532 			break;
4533 		case 53:
4534 		case 190:
4535 			block = "VGT";
4536 			break;
4537 		case 117:
4538 			block = "IH";
4539 			break;
4540 		case 51:
4541 		case 115:
4542 			block = "RLC";
4543 			break;
4544 		case 119:
4545 		case 183:
4546 			block = "DMA0";
4547 			break;
4548 		case 61:
4549 			block = "DMA1";
4550 			break;
4551 		case 248:
4552 		case 120:
4553 			block = "HDP";
4554 			break;
4555 		default:
4556 			block = "unknown";
4557 			break;
4558 		}
4559 	} else {
4560 		switch (mc_id) {
4561 		case 32:
4562 		case 16:
4563 		case 96:
4564 		case 80:
4565 		case 160:
4566 		case 144:
4567 		case 224:
4568 		case 208:
4569 			block = "CB";
4570 			break;
4571 		case 33:
4572 		case 17:
4573 		case 97:
4574 		case 81:
4575 		case 161:
4576 		case 145:
4577 		case 225:
4578 		case 209:
4579 			block = "CB_FMASK";
4580 			break;
4581 		case 34:
4582 		case 18:
4583 		case 98:
4584 		case 82:
4585 		case 162:
4586 		case 146:
4587 		case 226:
4588 		case 210:
4589 			block = "CB_CMASK";
4590 			break;
4591 		case 35:
4592 		case 19:
4593 		case 99:
4594 		case 83:
4595 		case 163:
4596 		case 147:
4597 		case 227:
4598 		case 211:
4599 			block = "CB_IMMED";
4600 			break;
4601 		case 36:
4602 		case 20:
4603 		case 100:
4604 		case 84:
4605 		case 164:
4606 		case 148:
4607 		case 228:
4608 		case 212:
4609 			block = "DB";
4610 			break;
4611 		case 37:
4612 		case 21:
4613 		case 101:
4614 		case 85:
4615 		case 165:
4616 		case 149:
4617 		case 229:
4618 		case 213:
4619 			block = "DB_HTILE";
4620 			break;
4621 		case 39:
4622 		case 23:
4623 		case 103:
4624 		case 87:
4625 		case 167:
4626 		case 151:
4627 		case 231:
4628 		case 215:
4629 			block = "DB_STEN";
4630 			break;
4631 		case 72:
4632 		case 68:
4633 		case 8:
4634 		case 4:
4635 		case 136:
4636 		case 132:
4637 		case 200:
4638 		case 196:
4639 			block = "TC";
4640 			break;
4641 		case 112:
4642 		case 48:
4643 			block = "CP";
4644 			break;
4645 		case 49:
4646 		case 177:
4647 		case 50:
4648 		case 178:
4649 			block = "SH";
4650 			break;
4651 		case 53:
4652 			block = "VGT";
4653 			break;
4654 		case 117:
4655 			block = "IH";
4656 			break;
4657 		case 51:
4658 		case 115:
4659 			block = "RLC";
4660 			break;
4661 		case 119:
4662 		case 183:
4663 			block = "DMA0";
4664 			break;
4665 		case 61:
4666 			block = "DMA1";
4667 			break;
4668 		case 248:
4669 		case 120:
4670 			block = "HDP";
4671 			break;
4672 		default:
4673 			block = "unknown";
4674 			break;
4675 		}
4676 	}
4677 
4678 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4679 	       protections, vmid, addr,
4680 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4681 	       block, mc_id);
4682 }
4683 
4684 /**
4685  * si_vm_set_page - update the page tables using the CP
4686  *
4687  * @rdev: radeon_device pointer
4688  * @ib: indirect buffer to fill with commands
4689  * @pe: addr of the page entry
4690  * @addr: dst addr to write into pe
4691  * @count: number of page entries to update
4692  * @incr: increase next addr by incr bytes
4693  * @flags: access flags
4694  *
4695  * Update the page tables using the CP (SI).
4696  */
4697 void si_vm_set_page(struct radeon_device *rdev,
4698 		    struct radeon_ib *ib,
4699 		    uint64_t pe,
4700 		    uint64_t addr, unsigned count,
4701 		    uint32_t incr, uint32_t flags)
4702 {
4703 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4704 	uint64_t value;
4705 	unsigned ndw;
4706 
4707 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4708 		while (count) {
4709 			ndw = 2 + count * 2;
4710 			if (ndw > 0x3FFE)
4711 				ndw = 0x3FFE;
4712 
4713 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4714 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4715 					WRITE_DATA_DST_SEL(1));
4716 			ib->ptr[ib->length_dw++] = pe;
4717 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4718 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4719 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4720 					value = radeon_vm_map_gart(rdev, addr);
4721 					value &= 0xFFFFFFFFFFFFF000ULL;
4722 				} else if (flags & RADEON_VM_PAGE_VALID) {
4723 					value = addr;
4724 				} else {
4725 					value = 0;
4726 				}
4727 				addr += incr;
4728 				value |= r600_flags;
4729 				ib->ptr[ib->length_dw++] = value;
4730 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4731 			}
4732 		}
4733 	} else {
4734 		/* DMA */
4735 		if (flags & RADEON_VM_PAGE_SYSTEM) {
4736 			while (count) {
4737 				ndw = count * 2;
4738 				if (ndw > 0xFFFFE)
4739 					ndw = 0xFFFFE;
4740 
4741 				/* for non-physically contiguous pages (system) */
4742 				ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
4743 				ib->ptr[ib->length_dw++] = pe;
4744 				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4745 				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4746 					if (flags & RADEON_VM_PAGE_SYSTEM) {
4747 						value = radeon_vm_map_gart(rdev, addr);
4748 						value &= 0xFFFFFFFFFFFFF000ULL;
4749 					} else if (flags & RADEON_VM_PAGE_VALID) {
4750 						value = addr;
4751 					} else {
4752 						value = 0;
4753 					}
4754 					addr += incr;
4755 					value |= r600_flags;
4756 					ib->ptr[ib->length_dw++] = value;
4757 					ib->ptr[ib->length_dw++] = upper_32_bits(value);
4758 				}
4759 			}
4760 		} else {
4761 			while (count) {
4762 				ndw = count * 2;
4763 				if (ndw > 0xFFFFE)
4764 					ndw = 0xFFFFE;
4765 
4766 				if (flags & RADEON_VM_PAGE_VALID)
4767 					value = addr;
4768 				else
4769 					value = 0;
4770 				/* for physically contiguous pages (vram) */
4771 				ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
4772 				ib->ptr[ib->length_dw++] = pe; /* dst addr */
4773 				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
4774 				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4775 				ib->ptr[ib->length_dw++] = 0;
4776 				ib->ptr[ib->length_dw++] = value; /* value */
4777 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4778 				ib->ptr[ib->length_dw++] = incr; /* increment size */
4779 				ib->ptr[ib->length_dw++] = 0;
4780 				pe += ndw * 4;
4781 				addr += (ndw / 2) * incr;
4782 				count -= ndw / 2;
4783 			}
4784 		}
4785 		while (ib->length_dw & 0x7)
4786 			ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
4787 	}
4788 }
4789 
4790 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4791 {
4792 	struct radeon_ring *ring = &rdev->ring[ridx];
4793 
4794 	if (vm == NULL)
4795 		return;
4796 
4797 	/* write new base address */
4798 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4799 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4800 				 WRITE_DATA_DST_SEL(0)));
4801 
4802 	if (vm->id < 8) {
4803 		radeon_ring_write(ring,
4804 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4805 	} else {
4806 		radeon_ring_write(ring,
4807 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4808 	}
4809 	radeon_ring_write(ring, 0);
4810 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4811 
4812 	/* flush hdp cache */
4813 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4814 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4815 				 WRITE_DATA_DST_SEL(0)));
4816 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4817 	radeon_ring_write(ring, 0);
4818 	radeon_ring_write(ring, 0x1);
4819 
4820 	/* bits 0-15 are the VM contexts0-15 */
4821 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4822 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4823 				 WRITE_DATA_DST_SEL(0)));
4824 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4825 	radeon_ring_write(ring, 0);
4826 	radeon_ring_write(ring, 1 << vm->id);
4827 
4828 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4829 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4830 	radeon_ring_write(ring, 0x0);
4831 }
4832 
4833 void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4834 {
4835 	struct radeon_ring *ring = &rdev->ring[ridx];
4836 
4837 	if (vm == NULL)
4838 		return;
4839 
4840 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4841 	if (vm->id < 8) {
4842 		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
4843 	} else {
4844 		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
4845 	}
4846 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4847 
4848 	/* flush hdp cache */
4849 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4850 	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
4851 	radeon_ring_write(ring, 1);
4852 
4853 	/* bits 0-7 are the VM contexts0-7 */
4854 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
4855 	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
4856 	radeon_ring_write(ring, 1 << vm->id);
4857 }
4858 
4859 /*
4860  *  Power and clock gating
4861  */
4862 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4863 {
4864 	int i;
4865 
4866 	for (i = 0; i < rdev->usec_timeout; i++) {
4867 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4868 			break;
4869 		udelay(1);
4870 	}
4871 
4872 	for (i = 0; i < rdev->usec_timeout; i++) {
4873 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4874 			break;
4875 		udelay(1);
4876 	}
4877 }
4878 
4879 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4880 					 bool enable)
4881 {
4882 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4883 	u32 mask;
4884 	int i;
4885 
4886 	if (enable)
4887 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4888 	else
4889 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4890 	WREG32(CP_INT_CNTL_RING0, tmp);
4891 
4892 	if (!enable) {
4893 		/* read a gfx register */
4894 		tmp = RREG32(DB_DEPTH_INFO);
4895 
4896 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4897 		for (i = 0; i < rdev->usec_timeout; i++) {
4898 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4899 				break;
4900 			udelay(1);
4901 		}
4902 	}
4903 }
4904 
4905 static void si_set_uvd_dcm(struct radeon_device *rdev,
4906 			   bool sw_mode)
4907 {
4908 	u32 tmp, tmp2;
4909 
4910 	tmp = RREG32(UVD_CGC_CTRL);
4911 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4912 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4913 
4914 	if (sw_mode) {
4915 		tmp &= ~0x7ffff800;
4916 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4917 	} else {
4918 		tmp |= 0x7ffff800;
4919 		tmp2 = 0;
4920 	}
4921 
4922 	WREG32(UVD_CGC_CTRL, tmp);
4923 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4924 }
4925 
4926 static void si_init_uvd_internal_cg(struct radeon_device *rdev)
4927 {
4928 	bool hw_mode = true;
4929 
4930 	if (hw_mode) {
4931 		si_set_uvd_dcm(rdev, false);
4932 	} else {
4933 		u32 tmp = RREG32(UVD_CGC_CTRL);
4934 		tmp &= ~DCM;
4935 		WREG32(UVD_CGC_CTRL, tmp);
4936 	}
4937 }
4938 
4939 static u32 si_halt_rlc(struct radeon_device *rdev)
4940 {
4941 	u32 data, orig;
4942 
4943 	orig = data = RREG32(RLC_CNTL);
4944 
4945 	if (data & RLC_ENABLE) {
4946 		data &= ~RLC_ENABLE;
4947 		WREG32(RLC_CNTL, data);
4948 
4949 		si_wait_for_rlc_serdes(rdev);
4950 	}
4951 
4952 	return orig;
4953 }
4954 
4955 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4956 {
4957 	u32 tmp;
4958 
4959 	tmp = RREG32(RLC_CNTL);
4960 	if (tmp != rlc)
4961 		WREG32(RLC_CNTL, rlc);
4962 }
4963 
4964 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4965 {
4966 	u32 data, orig;
4967 
4968 	orig = data = RREG32(DMA_PG);
4969 	if (enable)
4970 		data |= PG_CNTL_ENABLE;
4971 	else
4972 		data &= ~PG_CNTL_ENABLE;
4973 	if (orig != data)
4974 		WREG32(DMA_PG, data);
4975 }
4976 
4977 static void si_init_dma_pg(struct radeon_device *rdev)
4978 {
4979 	u32 tmp;
4980 
4981 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
4982 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4983 
4984 	for (tmp = 0; tmp < 5; tmp++)
4985 		WREG32(DMA_PGFSM_WRITE, 0);
4986 }
4987 
4988 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4989 			       bool enable)
4990 {
4991 	u32 tmp;
4992 
4993 	if (enable) {
4994 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4995 		WREG32(RLC_TTOP_D, tmp);
4996 
4997 		tmp = RREG32(RLC_PG_CNTL);
4998 		tmp |= GFX_PG_ENABLE;
4999 		WREG32(RLC_PG_CNTL, tmp);
5000 
5001 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5002 		tmp |= AUTO_PG_EN;
5003 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5004 	} else {
5005 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5006 		tmp &= ~AUTO_PG_EN;
5007 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5008 
5009 		tmp = RREG32(DB_RENDER_CONTROL);
5010 	}
5011 }
5012 
5013 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5014 {
5015 	u32 tmp;
5016 
5017 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5018 
5019 	tmp = RREG32(RLC_PG_CNTL);
5020 	tmp |= GFX_PG_SRC;
5021 	WREG32(RLC_PG_CNTL, tmp);
5022 
5023 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5024 
5025 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5026 
5027 	tmp &= ~GRBM_REG_SGIT_MASK;
5028 	tmp |= GRBM_REG_SGIT(0x700);
5029 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5030 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5031 }
5032 
5033 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5034 {
5035 	u32 mask = 0, tmp, tmp1;
5036 	int i;
5037 
5038 	si_select_se_sh(rdev, se, sh);
5039 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5040 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5041 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5042 
5043 	tmp &= 0xffff0000;
5044 
5045 	tmp |= tmp1;
5046 	tmp >>= 16;
5047 
5048 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5049 		mask <<= 1;
5050 		mask |= 1;
5051 	}
5052 
5053 	return (~tmp) & mask;
5054 }
5055 
5056 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5057 {
5058 	u32 i, j, k, active_cu_number = 0;
5059 	u32 mask, counter, cu_bitmap;
5060 	u32 tmp = 0;
5061 
5062 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5063 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5064 			mask = 1;
5065 			cu_bitmap = 0;
5066 			counter  = 0;
5067 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5068 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5069 					if (counter < 2)
5070 						cu_bitmap |= mask;
5071 					counter++;
5072 				}
5073 				mask <<= 1;
5074 			}
5075 
5076 			active_cu_number += counter;
5077 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5078 		}
5079 	}
5080 
5081 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5082 
5083 	tmp = RREG32(RLC_MAX_PG_CU);
5084 	tmp &= ~MAX_PU_CU_MASK;
5085 	tmp |= MAX_PU_CU(active_cu_number);
5086 	WREG32(RLC_MAX_PG_CU, tmp);
5087 }
5088 
5089 static void si_enable_cgcg(struct radeon_device *rdev,
5090 			   bool enable)
5091 {
5092 	u32 data, orig, tmp;
5093 
5094 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5095 
5096 	si_enable_gui_idle_interrupt(rdev, enable);
5097 
5098 	if (enable) {
5099 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5100 
5101 		tmp = si_halt_rlc(rdev);
5102 
5103 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5104 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5105 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5106 
5107 		si_wait_for_rlc_serdes(rdev);
5108 
5109 		si_update_rlc(rdev, tmp);
5110 
5111 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5112 
5113 		data |= CGCG_EN | CGLS_EN;
5114 	} else {
5115 		RREG32(CB_CGTT_SCLK_CTRL);
5116 		RREG32(CB_CGTT_SCLK_CTRL);
5117 		RREG32(CB_CGTT_SCLK_CTRL);
5118 		RREG32(CB_CGTT_SCLK_CTRL);
5119 
5120 		data &= ~(CGCG_EN | CGLS_EN);
5121 	}
5122 
5123 	if (orig != data)
5124 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5125 }
5126 
5127 static void si_enable_mgcg(struct radeon_device *rdev,
5128 			   bool enable)
5129 {
5130 	u32 data, orig, tmp = 0;
5131 
5132 	if (enable) {
5133 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5134 		data = 0x96940200;
5135 		if (orig != data)
5136 			WREG32(CGTS_SM_CTRL_REG, data);
5137 
5138 		orig = data = RREG32(CP_MEM_SLP_CNTL);
5139 		data |= CP_MEM_LS_EN;
5140 		if (orig != data)
5141 			WREG32(CP_MEM_SLP_CNTL, data);
5142 
5143 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5144 		data &= 0xffffffc0;
5145 		if (orig != data)
5146 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5147 
5148 		tmp = si_halt_rlc(rdev);
5149 
5150 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5151 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5152 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5153 
5154 		si_update_rlc(rdev, tmp);
5155 	} else {
5156 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5157 		data |= 0x00000003;
5158 		if (orig != data)
5159 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5160 
5161 		data = RREG32(CP_MEM_SLP_CNTL);
5162 		if (data & CP_MEM_LS_EN) {
5163 			data &= ~CP_MEM_LS_EN;
5164 			WREG32(CP_MEM_SLP_CNTL, data);
5165 		}
5166 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5167 		data |= LS_OVERRIDE | OVERRIDE;
5168 		if (orig != data)
5169 			WREG32(CGTS_SM_CTRL_REG, data);
5170 
5171 		tmp = si_halt_rlc(rdev);
5172 
5173 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5174 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5175 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5176 
5177 		si_update_rlc(rdev, tmp);
5178 	}
5179 }
5180 
5181 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5182 			       bool enable)
5183 {
5184 	u32 orig, data, tmp;
5185 
5186 	if (enable) {
5187 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5188 		tmp |= 0x3fff;
5189 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5190 
5191 		orig = data = RREG32(UVD_CGC_CTRL);
5192 		data |= DCM;
5193 		if (orig != data)
5194 			WREG32(UVD_CGC_CTRL, data);
5195 
5196 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5197 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5198 	} else {
5199 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5200 		tmp &= ~0x3fff;
5201 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5202 
5203 		orig = data = RREG32(UVD_CGC_CTRL);
5204 		data &= ~DCM;
5205 		if (orig != data)
5206 			WREG32(UVD_CGC_CTRL, data);
5207 
5208 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5209 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5210 	}
5211 }
5212 
5213 static const u32 mc_cg_registers[] =
5214 {
5215 	MC_HUB_MISC_HUB_CG,
5216 	MC_HUB_MISC_SIP_CG,
5217 	MC_HUB_MISC_VM_CG,
5218 	MC_XPB_CLK_GAT,
5219 	ATC_MISC_CG,
5220 	MC_CITF_MISC_WR_CG,
5221 	MC_CITF_MISC_RD_CG,
5222 	MC_CITF_MISC_VM_CG,
5223 	VM_L2_CG,
5224 };
5225 
5226 static void si_enable_mc_ls(struct radeon_device *rdev,
5227 			    bool enable)
5228 {
5229 	int i;
5230 	u32 orig, data;
5231 
5232 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5233 		orig = data = RREG32(mc_cg_registers[i]);
5234 		if (enable)
5235 			data |= MC_LS_ENABLE;
5236 		else
5237 			data &= ~MC_LS_ENABLE;
5238 		if (data != orig)
5239 			WREG32(mc_cg_registers[i], data);
5240 	}
5241 }
5242 
5243 
5244 static void si_init_cg(struct radeon_device *rdev)
5245 {
5246 	si_enable_mgcg(rdev, true);
5247 	si_enable_cgcg(rdev, false);
5248 	/* disable MC LS on Tahiti */
5249 	if (rdev->family == CHIP_TAHITI)
5250 		si_enable_mc_ls(rdev, false);
5251 	if (rdev->has_uvd) {
5252 		si_enable_uvd_mgcg(rdev, true);
5253 		si_init_uvd_internal_cg(rdev);
5254 	}
5255 }
5256 
5257 static void si_fini_cg(struct radeon_device *rdev)
5258 {
5259 	if (rdev->has_uvd)
5260 		si_enable_uvd_mgcg(rdev, false);
5261 	si_enable_cgcg(rdev, false);
5262 	si_enable_mgcg(rdev, false);
5263 }
5264 
5265 static void si_init_pg(struct radeon_device *rdev)
5266 {
5267 	bool has_pg = false;
5268 #if 0
5269 	/* only cape verde supports PG */
5270 	if (rdev->family == CHIP_VERDE)
5271 		has_pg = true;
5272 #endif
5273 	if (has_pg) {
5274 		si_init_ao_cu_mask(rdev);
5275 		si_init_dma_pg(rdev);
5276 		si_enable_dma_pg(rdev, true);
5277 		si_init_gfx_cgpg(rdev);
5278 		si_enable_gfx_cgpg(rdev, true);
5279 	} else {
5280 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5281 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5282 	}
5283 }
5284 
5285 static void si_fini_pg(struct radeon_device *rdev)
5286 {
5287 	bool has_pg = false;
5288 
5289 	/* only cape verde supports PG */
5290 	if (rdev->family == CHIP_VERDE)
5291 		has_pg = true;
5292 
5293 	if (has_pg) {
5294 		si_enable_dma_pg(rdev, false);
5295 		si_enable_gfx_cgpg(rdev, false);
5296 	}
5297 }
5298 
5299 /*
5300  * RLC
5301  */
5302 void si_rlc_fini(struct radeon_device *rdev)
5303 {
5304 	int r;
5305 
5306 	/* save restore block */
5307 	if (rdev->rlc.save_restore_obj) {
5308 		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5309 		if (unlikely(r != 0))
5310 			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
5311 		radeon_bo_unpin(rdev->rlc.save_restore_obj);
5312 		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5313 
5314 		radeon_bo_unref(&rdev->rlc.save_restore_obj);
5315 		rdev->rlc.save_restore_obj = NULL;
5316 	}
5317 
5318 	/* clear state block */
5319 	if (rdev->rlc.clear_state_obj) {
5320 		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5321 		if (unlikely(r != 0))
5322 			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
5323 		radeon_bo_unpin(rdev->rlc.clear_state_obj);
5324 		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5325 
5326 		radeon_bo_unref(&rdev->rlc.clear_state_obj);
5327 		rdev->rlc.clear_state_obj = NULL;
5328 	}
5329 }
5330 
5331 #define RLC_CLEAR_STATE_END_MARKER          0x00000001
5332 
5333 int si_rlc_init(struct radeon_device *rdev)
5334 {
5335 	volatile u32 *dst_ptr;
5336 	void *ptr;
5337 	u32 dws, data, i, j, k, reg_num;
5338 	u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
5339 	u64 reg_list_mc_addr;
5340 	const struct cs_section_def *cs_data = si_cs_data;
5341 	int r;
5342 
5343 	/* save restore block */
5344 	if (rdev->rlc.save_restore_obj == NULL) {
5345 		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
5346 				     RADEON_GEM_DOMAIN_VRAM, NULL,
5347 				     &rdev->rlc.save_restore_obj);
5348 		if (r) {
5349 			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
5350 			return r;
5351 		}
5352 	}
5353 
5354 	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
5355 	if (unlikely(r != 0)) {
5356 		si_rlc_fini(rdev);
5357 		return r;
5358 	}
5359 	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
5360 			  &rdev->rlc.save_restore_gpu_addr);
5361 	if (r) {
5362 		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5363 		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
5364 		si_rlc_fini(rdev);
5365 		return r;
5366 	}
5367 
5368 	if (rdev->family == CHIP_VERDE) {
5369 		r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&ptr);
5370 		if (r) {
5371 			dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
5372 			si_rlc_fini(rdev);
5373 		return r;
5374 		}
5375 		rdev->rlc.sr_ptr = ptr;
5376 		/* write the sr buffer */
5377 		dst_ptr = rdev->rlc.sr_ptr;
5378 		for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
5379 			dst_ptr[i] = verde_rlc_save_restore_register_list[i];
5380 		}
5381 		radeon_bo_kunmap(rdev->rlc.save_restore_obj);
5382 	}
5383 	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
5384 
5385 	/* clear state block */
5386 	reg_list_num = 0;
5387 	dws = 0;
5388 	for (i = 0; cs_data[i].section != NULL; i++) {
5389 		for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5390 			reg_list_num++;
5391 			dws += cs_data[i].section[j].reg_count;
5392 		}
5393 	}
5394 	reg_list_blk_index = (3 * reg_list_num + 2);
5395 	dws += reg_list_blk_index;
5396 
5397 	if (rdev->rlc.clear_state_obj == NULL) {
5398 		r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
5399 				     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
5400 		if (r) {
5401 			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
5402 			si_rlc_fini(rdev);
5403 			return r;
5404 		}
5405 	}
5406 	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
5407 	if (unlikely(r != 0)) {
5408 		si_rlc_fini(rdev);
5409 		return r;
5410 	}
5411 	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
5412 			  &rdev->rlc.clear_state_gpu_addr);
5413 	if (r) {
5414 
5415 		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5416 		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
5417 		si_rlc_fini(rdev);
5418 		return r;
5419 	}
5420 	r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&ptr);
5421 	if (r) {
5422 		dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
5423 		si_rlc_fini(rdev);
5424 		return r;
5425 	}
5426 	rdev->rlc.cs_ptr = ptr;
5427 	/* set up the cs buffer */
5428 	dst_ptr = rdev->rlc.cs_ptr;
5429 	reg_list_hdr_blk_index = 0;
5430 	reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
5431 	data = upper_32_bits(reg_list_mc_addr);
5432 	dst_ptr[reg_list_hdr_blk_index] = data;
5433 	reg_list_hdr_blk_index++;
5434 	for (i = 0; cs_data[i].section != NULL; i++) {
5435 		for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
5436 			reg_num = cs_data[i].section[j].reg_count;
5437 			data = reg_list_mc_addr & 0xffffffff;
5438 			dst_ptr[reg_list_hdr_blk_index] = data;
5439 			reg_list_hdr_blk_index++;
5440 
5441 			data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
5442 			dst_ptr[reg_list_hdr_blk_index] = data;
5443 			reg_list_hdr_blk_index++;
5444 
5445 			data = 0x08000000 | (reg_num * 4);
5446 			dst_ptr[reg_list_hdr_blk_index] = data;
5447 			reg_list_hdr_blk_index++;
5448 
5449 			for (k = 0; k < reg_num; k++) {
5450 				data = cs_data[i].section[j].extent[k];
5451 				dst_ptr[reg_list_blk_index + k] = data;
5452 			}
5453 			reg_list_mc_addr += reg_num * 4;
5454 			reg_list_blk_index += reg_num;
5455 		}
5456 	}
5457 	dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
5458 
5459 	radeon_bo_kunmap(rdev->rlc.clear_state_obj);
5460 	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
5461 
5462 	return 0;
5463 }
5464 
5465 static void si_rlc_reset(struct radeon_device *rdev)
5466 {
5467 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5468 
5469 	tmp |= SOFT_RESET_RLC;
5470 	WREG32(GRBM_SOFT_RESET, tmp);
5471 	udelay(50);
5472 	tmp &= ~SOFT_RESET_RLC;
5473 	WREG32(GRBM_SOFT_RESET, tmp);
5474 	udelay(50);
5475 }
5476 
5477 static void si_rlc_stop(struct radeon_device *rdev)
5478 {
5479 	WREG32(RLC_CNTL, 0);
5480 
5481 	si_enable_gui_idle_interrupt(rdev, false);
5482 
5483 	si_wait_for_rlc_serdes(rdev);
5484 }
5485 
5486 static void si_rlc_start(struct radeon_device *rdev)
5487 {
5488 	WREG32(RLC_CNTL, RLC_ENABLE);
5489 
5490 	si_enable_gui_idle_interrupt(rdev, true);
5491 
5492 	udelay(50);
5493 }
5494 
5495 static bool si_lbpw_supported(struct radeon_device *rdev)
5496 {
5497 	u32 tmp;
5498 
5499 	/* Enable LBPW only for DDR3 */
5500 	tmp = RREG32(MC_SEQ_MISC0);
5501 	if ((tmp & 0xF0000000) == 0xB0000000)
5502 		return true;
5503 	return false;
5504 }
5505 
5506 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5507 {
5508 	u32 tmp;
5509 
5510 	tmp = RREG32(RLC_LB_CNTL);
5511 	if (enable)
5512 		tmp |= LOAD_BALANCE_ENABLE;
5513 	else
5514 		tmp &= ~LOAD_BALANCE_ENABLE;
5515 	WREG32(RLC_LB_CNTL, tmp);
5516 
5517 	if (!enable) {
5518 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5519 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5520 	}
5521 }
5522 
5523 static int si_rlc_resume(struct radeon_device *rdev)
5524 {
5525 	u32 i;
5526 	const __be32 *fw_data;
5527 
5528 	if (!rdev->rlc_fw)
5529 		return -EINVAL;
5530 
5531 	si_rlc_stop(rdev);
5532 
5533 	si_rlc_reset(rdev);
5534 
5535 	si_init_pg(rdev);
5536 
5537 	si_init_cg(rdev);
5538 
5539 	WREG32(RLC_RL_BASE, 0);
5540 	WREG32(RLC_RL_SIZE, 0);
5541 	WREG32(RLC_LB_CNTL, 0);
5542 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5543 	WREG32(RLC_LB_CNTR_INIT, 0);
5544 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5545 
5546 	WREG32(RLC_MC_CNTL, 0);
5547 	WREG32(RLC_UCODE_CNTL, 0);
5548 
5549 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5550 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5551 		WREG32(RLC_UCODE_ADDR, i);
5552 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5553 	}
5554 	WREG32(RLC_UCODE_ADDR, 0);
5555 
5556 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5557 
5558 	si_rlc_start(rdev);
5559 
5560 	return 0;
5561 }
5562 
5563 static void si_enable_interrupts(struct radeon_device *rdev)
5564 {
5565 	u32 ih_cntl = RREG32(IH_CNTL);
5566 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5567 
5568 	ih_cntl |= ENABLE_INTR;
5569 	ih_rb_cntl |= IH_RB_ENABLE;
5570 	WREG32(IH_CNTL, ih_cntl);
5571 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5572 	rdev->ih.enabled = true;
5573 }
5574 
5575 static void si_disable_interrupts(struct radeon_device *rdev)
5576 {
5577 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5578 	u32 ih_cntl = RREG32(IH_CNTL);
5579 
5580 	ih_rb_cntl &= ~IH_RB_ENABLE;
5581 	ih_cntl &= ~ENABLE_INTR;
5582 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5583 	WREG32(IH_CNTL, ih_cntl);
5584 	/* set rptr, wptr to 0 */
5585 	WREG32(IH_RB_RPTR, 0);
5586 	WREG32(IH_RB_WPTR, 0);
5587 	rdev->ih.enabled = false;
5588 	rdev->ih.rptr = 0;
5589 }
5590 
5591 static void si_disable_interrupt_state(struct radeon_device *rdev)
5592 {
5593 	u32 tmp;
5594 
5595 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5596 	WREG32(CP_INT_CNTL_RING1, 0);
5597 	WREG32(CP_INT_CNTL_RING2, 0);
5598 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5599 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5600 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5601 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5602 	WREG32(GRBM_INT_CNTL, 0);
5603 	if (rdev->num_crtc >= 2) {
5604 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5605 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5606 	}
5607 	if (rdev->num_crtc >= 4) {
5608 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5609 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5610 	}
5611 	if (rdev->num_crtc >= 6) {
5612 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5613 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5614 	}
5615 
5616 	if (rdev->num_crtc >= 2) {
5617 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5618 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5619 	}
5620 	if (rdev->num_crtc >= 4) {
5621 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5622 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5623 	}
5624 	if (rdev->num_crtc >= 6) {
5625 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5626 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5627 	}
5628 
5629 	if (!ASIC_IS_NODCE(rdev)) {
5630 		WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5631 
5632 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5633 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5634 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5635 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5636 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5637 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5638 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5639 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5640 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5641 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5642 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5643 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5644 	}
5645 }
5646 
5647 static int si_irq_init(struct radeon_device *rdev)
5648 {
5649 	int ret = 0;
5650 	int rb_bufsz;
5651 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5652 
5653 	/* allocate ring */
5654 	ret = r600_ih_ring_alloc(rdev);
5655 	if (ret)
5656 		return ret;
5657 
5658 	/* disable irqs */
5659 	si_disable_interrupts(rdev);
5660 
5661 	/* init rlc */
5662 	ret = si_rlc_resume(rdev);
5663 	if (ret) {
5664 		r600_ih_ring_fini(rdev);
5665 		return ret;
5666 	}
5667 
5668 	/* setup interrupt control */
5669 	/* set dummy read address to ring address */
5670 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5671 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5672 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5673 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5674 	 */
5675 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5676 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5677 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5678 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5679 
5680 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5681 	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5682 
5683 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5684 		      IH_WPTR_OVERFLOW_CLEAR |
5685 		      (rb_bufsz << 1));
5686 
5687 	if (rdev->wb.enabled)
5688 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5689 
5690 	/* set the writeback address whether it's enabled or not */
5691 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5692 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5693 
5694 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5695 
5696 	/* set rptr, wptr to 0 */
5697 	WREG32(IH_RB_RPTR, 0);
5698 	WREG32(IH_RB_WPTR, 0);
5699 
5700 	/* Default settings for IH_CNTL (disabled at first) */
5701 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5702 	/* RPTR_REARM only works if msi's are enabled */
5703 	if (rdev->msi_enabled)
5704 		ih_cntl |= RPTR_REARM;
5705 	WREG32(IH_CNTL, ih_cntl);
5706 
5707 	/* force the active interrupt state to all disabled */
5708 	si_disable_interrupt_state(rdev);
5709 
5710 	pci_enable_busmaster(rdev->dev);
5711 
5712 	/* enable irqs */
5713 	si_enable_interrupts(rdev);
5714 
5715 	return ret;
5716 }
5717 
5718 int si_irq_set(struct radeon_device *rdev)
5719 {
5720 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
5721 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5722 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5723 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5724 	u32 grbm_int_cntl = 0;
5725 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5726 	u32 dma_cntl, dma_cntl1;
5727 	u32 thermal_int = 0;
5728 
5729 	if (!rdev->irq.installed) {
5730 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5731 		return -EINVAL;
5732 	}
5733 	/* don't enable anything if the ih is disabled */
5734 	if (!rdev->ih.enabled) {
5735 		si_disable_interrupts(rdev);
5736 		/* force the active interrupt state to all disabled */
5737 		si_disable_interrupt_state(rdev);
5738 		return 0;
5739 	}
5740 
5741 	if (!ASIC_IS_NODCE(rdev)) {
5742 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5743 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5744 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5745 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5746 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5747 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5748 	}
5749 
5750 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5751 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5752 
5753 	thermal_int = RREG32(CG_THERMAL_INT) &
5754 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5755 
5756 	/* enable CP interrupts on all rings */
5757 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5758 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5759 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5760 	}
5761 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5762 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5763 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5764 	}
5765 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5766 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5767 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5768 	}
5769 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5770 		DRM_DEBUG("si_irq_set: sw int dma\n");
5771 		dma_cntl |= TRAP_ENABLE;
5772 	}
5773 
5774 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5775 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5776 		dma_cntl1 |= TRAP_ENABLE;
5777 	}
5778 	if (rdev->irq.crtc_vblank_int[0] ||
5779 	    atomic_read(&rdev->irq.pflip[0])) {
5780 		DRM_DEBUG("si_irq_set: vblank 0\n");
5781 		crtc1 |= VBLANK_INT_MASK;
5782 	}
5783 	if (rdev->irq.crtc_vblank_int[1] ||
5784 	    atomic_read(&rdev->irq.pflip[1])) {
5785 		DRM_DEBUG("si_irq_set: vblank 1\n");
5786 		crtc2 |= VBLANK_INT_MASK;
5787 	}
5788 	if (rdev->irq.crtc_vblank_int[2] ||
5789 	    atomic_read(&rdev->irq.pflip[2])) {
5790 		DRM_DEBUG("si_irq_set: vblank 2\n");
5791 		crtc3 |= VBLANK_INT_MASK;
5792 	}
5793 	if (rdev->irq.crtc_vblank_int[3] ||
5794 	    atomic_read(&rdev->irq.pflip[3])) {
5795 		DRM_DEBUG("si_irq_set: vblank 3\n");
5796 		crtc4 |= VBLANK_INT_MASK;
5797 	}
5798 	if (rdev->irq.crtc_vblank_int[4] ||
5799 	    atomic_read(&rdev->irq.pflip[4])) {
5800 		DRM_DEBUG("si_irq_set: vblank 4\n");
5801 		crtc5 |= VBLANK_INT_MASK;
5802 	}
5803 	if (rdev->irq.crtc_vblank_int[5] ||
5804 	    atomic_read(&rdev->irq.pflip[5])) {
5805 		DRM_DEBUG("si_irq_set: vblank 5\n");
5806 		crtc6 |= VBLANK_INT_MASK;
5807 	}
5808 	if (rdev->irq.hpd[0]) {
5809 		DRM_DEBUG("si_irq_set: hpd 1\n");
5810 		hpd1 |= DC_HPDx_INT_EN;
5811 	}
5812 	if (rdev->irq.hpd[1]) {
5813 		DRM_DEBUG("si_irq_set: hpd 2\n");
5814 		hpd2 |= DC_HPDx_INT_EN;
5815 	}
5816 	if (rdev->irq.hpd[2]) {
5817 		DRM_DEBUG("si_irq_set: hpd 3\n");
5818 		hpd3 |= DC_HPDx_INT_EN;
5819 	}
5820 	if (rdev->irq.hpd[3]) {
5821 		DRM_DEBUG("si_irq_set: hpd 4\n");
5822 		hpd4 |= DC_HPDx_INT_EN;
5823 	}
5824 	if (rdev->irq.hpd[4]) {
5825 		DRM_DEBUG("si_irq_set: hpd 5\n");
5826 		hpd5 |= DC_HPDx_INT_EN;
5827 	}
5828 	if (rdev->irq.hpd[5]) {
5829 		DRM_DEBUG("si_irq_set: hpd 6\n");
5830 		hpd6 |= DC_HPDx_INT_EN;
5831 	}
5832 
5833 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5834 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5835 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5836 
5837 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5838 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5839 
5840 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5841 
5842 	if (rdev->irq.dpm_thermal) {
5843 		DRM_DEBUG("dpm thermal\n");
5844 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5845 	}
5846 
5847 	if (rdev->num_crtc >= 2) {
5848 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5849 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5850 	}
5851 	if (rdev->num_crtc >= 4) {
5852 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5853 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5854 	}
5855 	if (rdev->num_crtc >= 6) {
5856 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5857 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5858 	}
5859 
5860 	if (rdev->num_crtc >= 2) {
5861 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5862 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5863 	}
5864 	if (rdev->num_crtc >= 4) {
5865 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5866 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5867 	}
5868 	if (rdev->num_crtc >= 6) {
5869 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5870 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5871 	}
5872 
5873 	if (!ASIC_IS_NODCE(rdev)) {
5874 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5875 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5876 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5877 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5878 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5879 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5880 	}
5881 
5882 	WREG32(CG_THERMAL_INT, thermal_int);
5883 
5884 	return 0;
5885 }
5886 
5887 static inline void si_irq_ack(struct radeon_device *rdev)
5888 {
5889 	u32 tmp;
5890 
5891 	if (ASIC_IS_NODCE(rdev))
5892 		return;
5893 
5894 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5895 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5896 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5897 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5898 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5899 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5900 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5901 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5902 	if (rdev->num_crtc >= 4) {
5903 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5904 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5905 	}
5906 	if (rdev->num_crtc >= 6) {
5907 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5908 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5909 	}
5910 
5911 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5912 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5913 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5914 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5915 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5916 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5917 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5918 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5919 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5920 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5921 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5922 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5923 
5924 	if (rdev->num_crtc >= 4) {
5925 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5926 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5927 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5928 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5929 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5930 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5931 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5932 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5933 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5934 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5935 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5936 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5937 	}
5938 
5939 	if (rdev->num_crtc >= 6) {
5940 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5941 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5942 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5943 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5944 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5945 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5946 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5947 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5948 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5949 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5950 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5951 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5952 	}
5953 
5954 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5955 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5956 		tmp |= DC_HPDx_INT_ACK;
5957 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5958 	}
5959 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5960 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5961 		tmp |= DC_HPDx_INT_ACK;
5962 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5963 	}
5964 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5965 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5966 		tmp |= DC_HPDx_INT_ACK;
5967 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5968 	}
5969 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5970 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5971 		tmp |= DC_HPDx_INT_ACK;
5972 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5973 	}
5974 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5975 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5976 		tmp |= DC_HPDx_INT_ACK;
5977 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5978 	}
5979 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5980 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5981 		tmp |= DC_HPDx_INT_ACK;
5982 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5983 	}
5984 }
5985 
5986 static void si_irq_disable(struct radeon_device *rdev)
5987 {
5988 	si_disable_interrupts(rdev);
5989 	/* Wait and acknowledge irq */
5990 	mdelay(1);
5991 	si_irq_ack(rdev);
5992 	si_disable_interrupt_state(rdev);
5993 }
5994 
5995 static void si_irq_suspend(struct radeon_device *rdev)
5996 {
5997 	si_irq_disable(rdev);
5998 	si_rlc_stop(rdev);
5999 }
6000 
6001 static void si_irq_fini(struct radeon_device *rdev)
6002 {
6003 	si_irq_suspend(rdev);
6004 	r600_ih_ring_fini(rdev);
6005 }
6006 
6007 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6008 {
6009 	u32 wptr, tmp;
6010 
6011 	if (rdev->wb.enabled)
6012 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6013 	else
6014 		wptr = RREG32(IH_RB_WPTR);
6015 
6016 	if (wptr & RB_OVERFLOW) {
6017 		/* When a ring buffer overflow happen start parsing interrupt
6018 		 * from the last not overwritten vector (wptr + 16). Hopefully
6019 		 * this should allow us to catchup.
6020 		 */
6021 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6022 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6023 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6024 		tmp = RREG32(IH_RB_CNTL);
6025 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6026 		WREG32(IH_RB_CNTL, tmp);
6027 	}
6028 	return (wptr & rdev->ih.ptr_mask);
6029 }
6030 
6031 /*        SI IV Ring
6032  * Each IV ring entry is 128 bits:
6033  * [7:0]    - interrupt source id
6034  * [31:8]   - reserved
6035  * [59:32]  - interrupt source data
6036  * [63:60]  - reserved
6037  * [71:64]  - RINGID
6038  * [79:72]  - VMID
6039  * [127:80] - reserved
6040  */
6041 irqreturn_t si_irq_process(struct radeon_device *rdev)
6042 {
6043 	u32 wptr;
6044 	u32 rptr;
6045 	u32 src_id, src_data, ring_id;
6046 	u32 ring_index;
6047 	bool queue_hotplug = false;
6048 	bool queue_thermal = false;
6049 	u32 status, addr;
6050 
6051 	if (!rdev->ih.enabled || rdev->shutdown)
6052 		return IRQ_NONE;
6053 
6054 	wptr = si_get_ih_wptr(rdev);
6055 
6056 restart_ih:
6057 	/* is somebody else already processing irqs? */
6058 	if (atomic_xchg(&rdev->ih.lock, 1))
6059 		return IRQ_NONE;
6060 
6061 	rptr = rdev->ih.rptr;
6062 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6063 
6064 	/* Order reading of wptr vs. reading of IH ring data */
6065 	rmb();
6066 
6067 	/* display interrupts */
6068 	si_irq_ack(rdev);
6069 
6070 	while (rptr != wptr) {
6071 		/* wptr/rptr are in bytes! */
6072 		ring_index = rptr / 4;
6073 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6074 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6075 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6076 
6077 		switch (src_id) {
6078 		case 1: /* D1 vblank/vline */
6079 			switch (src_data) {
6080 			case 0: /* D1 vblank */
6081 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6082 					if (rdev->irq.crtc_vblank_int[0]) {
6083 						drm_handle_vblank(rdev->ddev, 0);
6084 						rdev->pm.vblank_sync = true;
6085 						wake_up(&rdev->irq.vblank_queue);
6086 					}
6087 					if (atomic_read(&rdev->irq.pflip[0]))
6088 						radeon_crtc_handle_flip(rdev, 0);
6089 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6090 					DRM_DEBUG("IH: D1 vblank\n");
6091 				}
6092 				break;
6093 			case 1: /* D1 vline */
6094 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6095 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6096 					DRM_DEBUG("IH: D1 vline\n");
6097 				}
6098 				break;
6099 			default:
6100 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6101 				break;
6102 			}
6103 			break;
6104 		case 2: /* D2 vblank/vline */
6105 			switch (src_data) {
6106 			case 0: /* D2 vblank */
6107 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6108 					if (rdev->irq.crtc_vblank_int[1]) {
6109 						drm_handle_vblank(rdev->ddev, 1);
6110 						rdev->pm.vblank_sync = true;
6111 						wake_up(&rdev->irq.vblank_queue);
6112 					}
6113 					if (atomic_read(&rdev->irq.pflip[1]))
6114 						radeon_crtc_handle_flip(rdev, 1);
6115 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6116 					DRM_DEBUG("IH: D2 vblank\n");
6117 				}
6118 				break;
6119 			case 1: /* D2 vline */
6120 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6121 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6122 					DRM_DEBUG("IH: D2 vline\n");
6123 				}
6124 				break;
6125 			default:
6126 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6127 				break;
6128 			}
6129 			break;
6130 		case 3: /* D3 vblank/vline */
6131 			switch (src_data) {
6132 			case 0: /* D3 vblank */
6133 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6134 					if (rdev->irq.crtc_vblank_int[2]) {
6135 						drm_handle_vblank(rdev->ddev, 2);
6136 						rdev->pm.vblank_sync = true;
6137 						wake_up(&rdev->irq.vblank_queue);
6138 					}
6139 					if (atomic_read(&rdev->irq.pflip[2]))
6140 						radeon_crtc_handle_flip(rdev, 2);
6141 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6142 					DRM_DEBUG("IH: D3 vblank\n");
6143 				}
6144 				break;
6145 			case 1: /* D3 vline */
6146 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6147 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6148 					DRM_DEBUG("IH: D3 vline\n");
6149 				}
6150 				break;
6151 			default:
6152 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6153 				break;
6154 			}
6155 			break;
6156 		case 4: /* D4 vblank/vline */
6157 			switch (src_data) {
6158 			case 0: /* D4 vblank */
6159 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6160 					if (rdev->irq.crtc_vblank_int[3]) {
6161 						drm_handle_vblank(rdev->ddev, 3);
6162 						rdev->pm.vblank_sync = true;
6163 						wake_up(&rdev->irq.vblank_queue);
6164 					}
6165 					if (atomic_read(&rdev->irq.pflip[3]))
6166 						radeon_crtc_handle_flip(rdev, 3);
6167 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6168 					DRM_DEBUG("IH: D4 vblank\n");
6169 				}
6170 				break;
6171 			case 1: /* D4 vline */
6172 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6173 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6174 					DRM_DEBUG("IH: D4 vline\n");
6175 				}
6176 				break;
6177 			default:
6178 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6179 				break;
6180 			}
6181 			break;
6182 		case 5: /* D5 vblank/vline */
6183 			switch (src_data) {
6184 			case 0: /* D5 vblank */
6185 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6186 					if (rdev->irq.crtc_vblank_int[4]) {
6187 						drm_handle_vblank(rdev->ddev, 4);
6188 						rdev->pm.vblank_sync = true;
6189 						wake_up(&rdev->irq.vblank_queue);
6190 					}
6191 					if (atomic_read(&rdev->irq.pflip[4]))
6192 						radeon_crtc_handle_flip(rdev, 4);
6193 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6194 					DRM_DEBUG("IH: D5 vblank\n");
6195 				}
6196 				break;
6197 			case 1: /* D5 vline */
6198 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6199 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6200 					DRM_DEBUG("IH: D5 vline\n");
6201 				}
6202 				break;
6203 			default:
6204 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6205 				break;
6206 			}
6207 			break;
6208 		case 6: /* D6 vblank/vline */
6209 			switch (src_data) {
6210 			case 0: /* D6 vblank */
6211 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6212 					if (rdev->irq.crtc_vblank_int[5]) {
6213 						drm_handle_vblank(rdev->ddev, 5);
6214 						rdev->pm.vblank_sync = true;
6215 						wake_up(&rdev->irq.vblank_queue);
6216 					}
6217 					if (atomic_read(&rdev->irq.pflip[5]))
6218 						radeon_crtc_handle_flip(rdev, 5);
6219 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6220 					DRM_DEBUG("IH: D6 vblank\n");
6221 				}
6222 				break;
6223 			case 1: /* D6 vline */
6224 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6225 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6226 					DRM_DEBUG("IH: D6 vline\n");
6227 				}
6228 				break;
6229 			default:
6230 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6231 				break;
6232 			}
6233 			break;
6234 		case 42: /* HPD hotplug */
6235 			switch (src_data) {
6236 			case 0:
6237 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6238 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6239 					queue_hotplug = true;
6240 					DRM_DEBUG("IH: HPD1\n");
6241 				}
6242 				break;
6243 			case 1:
6244 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6245 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6246 					queue_hotplug = true;
6247 					DRM_DEBUG("IH: HPD2\n");
6248 				}
6249 				break;
6250 			case 2:
6251 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6252 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6253 					queue_hotplug = true;
6254 					DRM_DEBUG("IH: HPD3\n");
6255 				}
6256 				break;
6257 			case 3:
6258 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6259 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6260 					queue_hotplug = true;
6261 					DRM_DEBUG("IH: HPD4\n");
6262 				}
6263 				break;
6264 			case 4:
6265 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6266 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6267 					queue_hotplug = true;
6268 					DRM_DEBUG("IH: HPD5\n");
6269 				}
6270 				break;
6271 			case 5:
6272 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6273 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6274 					queue_hotplug = true;
6275 					DRM_DEBUG("IH: HPD6\n");
6276 				}
6277 				break;
6278 			default:
6279 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6280 				break;
6281 			}
6282 			break;
6283 		case 146:
6284 		case 147:
6285 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6286 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6287 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6288 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6289 				addr);
6290 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6291 				status);
6292 			si_vm_decode_fault(rdev, status, addr);
6293 			/* reset addr and status */
6294 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6295 			break;
6296 		case 176: /* RINGID0 CP_INT */
6297 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6298 			break;
6299 		case 177: /* RINGID1 CP_INT */
6300 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6301 			break;
6302 		case 178: /* RINGID2 CP_INT */
6303 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6304 			break;
6305 		case 181: /* CP EOP event */
6306 			DRM_DEBUG("IH: CP EOP\n");
6307 			switch (ring_id) {
6308 			case 0:
6309 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6310 				break;
6311 			case 1:
6312 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6313 				break;
6314 			case 2:
6315 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6316 				break;
6317 			}
6318 			break;
6319 		case 224: /* DMA trap event */
6320 			DRM_DEBUG("IH: DMA trap\n");
6321 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6322 			break;
6323 		case 230: /* thermal low to high */
6324 			DRM_DEBUG("IH: thermal low to high\n");
6325 			rdev->pm.dpm.thermal.high_to_low = false;
6326 			queue_thermal = true;
6327 			break;
6328 		case 231: /* thermal high to low */
6329 			DRM_DEBUG("IH: thermal high to low\n");
6330 			rdev->pm.dpm.thermal.high_to_low = true;
6331 			queue_thermal = true;
6332 			break;
6333 		case 233: /* GUI IDLE */
6334 			DRM_DEBUG("IH: GUI idle\n");
6335 			break;
6336 		case 244: /* DMA trap event */
6337 			DRM_DEBUG("IH: DMA1 trap\n");
6338 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6339 			break;
6340 		default:
6341 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6342 			break;
6343 		}
6344 
6345 		/* wptr/rptr are in bytes! */
6346 		rptr += 16;
6347 		rptr &= rdev->ih.ptr_mask;
6348 	}
6349 	if (queue_hotplug)
6350 		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
6351 	if (queue_thermal && rdev->pm.dpm_enabled)
6352 		taskqueue_enqueue(rdev->tq, &rdev->pm.dpm.thermal.work);
6353 	rdev->ih.rptr = rptr;
6354 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6355 	atomic_set(&rdev->ih.lock, 0);
6356 
6357 	/* make sure wptr hasn't changed while processing */
6358 	wptr = si_get_ih_wptr(rdev);
6359 	if (wptr != rptr)
6360 		goto restart_ih;
6361 
6362 	return IRQ_HANDLED;
6363 }
6364 
6365 /**
6366  * si_copy_dma - copy pages using the DMA engine
6367  *
6368  * @rdev: radeon_device pointer
6369  * @src_offset: src GPU address
6370  * @dst_offset: dst GPU address
6371  * @num_gpu_pages: number of GPU pages to xfer
6372  * @fence: radeon fence object
6373  *
6374  * Copy GPU paging using the DMA engine (SI).
6375  * Used by the radeon ttm implementation to move pages if
6376  * registered as the asic copy callback.
6377  */
6378 int si_copy_dma(struct radeon_device *rdev,
6379 		uint64_t src_offset, uint64_t dst_offset,
6380 		unsigned num_gpu_pages,
6381 		struct radeon_fence **fence)
6382 {
6383 	struct radeon_semaphore *sem = NULL;
6384 	int ring_index = rdev->asic->copy.dma_ring_index;
6385 	struct radeon_ring *ring = &rdev->ring[ring_index];
6386 	u32 size_in_bytes, cur_size_in_bytes;
6387 	int i, num_loops;
6388 	int r = 0;
6389 
6390 	r = radeon_semaphore_create(rdev, &sem);
6391 	if (r) {
6392 		DRM_ERROR("radeon: moving bo (%d).\n", r);
6393 		return r;
6394 	}
6395 
6396 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
6397 	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
6398 	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
6399 	if (r) {
6400 		DRM_ERROR("radeon: moving bo (%d).\n", r);
6401 		radeon_semaphore_free(rdev, &sem, NULL);
6402 		return r;
6403 	}
6404 
6405 	if (radeon_fence_need_sync(*fence, ring->idx)) {
6406 		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
6407 					    ring->idx);
6408 		radeon_fence_note_sync(*fence, ring->idx);
6409 	} else {
6410 		radeon_semaphore_free(rdev, &sem, NULL);
6411 	}
6412 
6413 	for (i = 0; i < num_loops; i++) {
6414 		cur_size_in_bytes = size_in_bytes;
6415 		if (cur_size_in_bytes > 0xFFFFF)
6416 			cur_size_in_bytes = 0xFFFFF;
6417 		size_in_bytes -= cur_size_in_bytes;
6418 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
6419 		radeon_ring_write(ring, dst_offset & 0xffffffff);
6420 		radeon_ring_write(ring, src_offset & 0xffffffff);
6421 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
6422 		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
6423 		src_offset += cur_size_in_bytes;
6424 		dst_offset += cur_size_in_bytes;
6425 	}
6426 
6427 	r = radeon_fence_emit(rdev, fence, ring->idx);
6428 	if (r) {
6429 		radeon_ring_unlock_undo(rdev, ring);
6430 		return r;
6431 	}
6432 
6433 	radeon_ring_unlock_commit(rdev, ring);
6434 	radeon_semaphore_free(rdev, &sem, *fence);
6435 
6436 	return r;
6437 }
6438 
6439 /*
6440  * startup/shutdown callbacks
6441  */
6442 static int si_startup(struct radeon_device *rdev)
6443 {
6444 	struct radeon_ring *ring;
6445 	int r;
6446 
6447 	/* enable pcie gen2/3 link */
6448 	si_pcie_gen3_enable(rdev);
6449 	/* enable aspm */
6450 	si_program_aspm(rdev);
6451 
6452 	si_mc_program(rdev);
6453 
6454 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6455 	    !rdev->rlc_fw || !rdev->mc_fw) {
6456 		r = si_init_microcode(rdev);
6457 		if (r) {
6458 			DRM_ERROR("Failed to load firmware!\n");
6459 			return r;
6460 		}
6461 	}
6462 
6463 	r = si_mc_load_microcode(rdev);
6464 	if (r) {
6465 		DRM_ERROR("Failed to load MC firmware!\n");
6466 		return r;
6467 	}
6468 
6469 	r = r600_vram_scratch_init(rdev);
6470 	if (r)
6471 		return r;
6472 
6473 	r = si_pcie_gart_enable(rdev);
6474 	if (r)
6475 		return r;
6476 	si_gpu_init(rdev);
6477 
6478 	/* allocate rlc buffers */
6479 	r = si_rlc_init(rdev);
6480 	if (r) {
6481 		DRM_ERROR("Failed to init rlc BOs!\n");
6482 		return r;
6483 	}
6484 
6485 	/* allocate wb buffer */
6486 	r = radeon_wb_init(rdev);
6487 	if (r)
6488 		return r;
6489 
6490 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6491 	if (r) {
6492 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6493 		return r;
6494 	}
6495 
6496 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6497 	if (r) {
6498 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6499 		return r;
6500 	}
6501 
6502 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6503 	if (r) {
6504 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6505 		return r;
6506 	}
6507 
6508 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6509 	if (r) {
6510 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6511 		return r;
6512 	}
6513 
6514 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6515 	if (r) {
6516 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6517 		return r;
6518 	}
6519 
6520 	if (rdev->has_uvd) {
6521 		r = rv770_uvd_resume(rdev);
6522 		if (!r) {
6523 			r = radeon_fence_driver_start_ring(rdev,
6524 							   R600_RING_TYPE_UVD_INDEX);
6525 			if (r)
6526 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6527 		}
6528 		if (r)
6529 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6530 	}
6531 
6532 	/* Enable IRQ */
6533 	if (!rdev->irq.installed) {
6534 		r = radeon_irq_kms_init(rdev);
6535 		if (r)
6536 			return r;
6537 	}
6538 
6539 	r = si_irq_init(rdev);
6540 	if (r) {
6541 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6542 		radeon_irq_kms_fini(rdev);
6543 		return r;
6544 	}
6545 	si_irq_set(rdev);
6546 
6547 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6548 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6549 			     CP_RB0_RPTR, CP_RB0_WPTR,
6550 			     0, 0xfffff, RADEON_CP_PACKET2);
6551 	if (r)
6552 		return r;
6553 
6554 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6555 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6556 			     CP_RB1_RPTR, CP_RB1_WPTR,
6557 			     0, 0xfffff, RADEON_CP_PACKET2);
6558 	if (r)
6559 		return r;
6560 
6561 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6562 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6563 			     CP_RB2_RPTR, CP_RB2_WPTR,
6564 			     0, 0xfffff, RADEON_CP_PACKET2);
6565 	if (r)
6566 		return r;
6567 
6568 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6569 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6570 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6571 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6572 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6573 	if (r)
6574 		return r;
6575 
6576 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6577 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6578 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6579 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6580 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6581 	if (r)
6582 		return r;
6583 
6584 	r = si_cp_load_microcode(rdev);
6585 	if (r)
6586 		return r;
6587 	r = si_cp_resume(rdev);
6588 	if (r)
6589 		return r;
6590 
6591 	r = cayman_dma_resume(rdev);
6592 	if (r)
6593 		return r;
6594 
6595 	if (rdev->has_uvd) {
6596 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6597 		if (ring->ring_size) {
6598 			r = radeon_ring_init(rdev, ring, ring->ring_size,
6599 					     R600_WB_UVD_RPTR_OFFSET,
6600 					     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6601 					     0, 0xfffff, RADEON_CP_PACKET2);
6602 			if (!r)
6603 				r = r600_uvd_init(rdev);
6604 			if (r)
6605 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6606 		}
6607 	}
6608 
6609 	r = radeon_ib_pool_init(rdev);
6610 	if (r) {
6611 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6612 		return r;
6613 	}
6614 
6615 	r = radeon_vm_manager_init(rdev);
6616 	if (r) {
6617 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6618 		return r;
6619 	}
6620 
6621 	return 0;
6622 }
6623 
6624 int si_resume(struct radeon_device *rdev)
6625 {
6626 	int r;
6627 
6628 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6629 	 * posting will perform necessary task to bring back GPU into good
6630 	 * shape.
6631 	 */
6632 	/* post card */
6633 	atom_asic_init(rdev->mode_info.atom_context);
6634 
6635 	/* init golden registers */
6636 	si_init_golden_registers(rdev);
6637 
6638 	rdev->accel_working = true;
6639 	r = si_startup(rdev);
6640 	if (r) {
6641 		DRM_ERROR("si startup failed on resume\n");
6642 		rdev->accel_working = false;
6643 		return r;
6644 	}
6645 
6646 	return r;
6647 
6648 }
6649 
6650 int si_suspend(struct radeon_device *rdev)
6651 {
6652 	radeon_vm_manager_fini(rdev);
6653 	si_cp_enable(rdev, false);
6654 	cayman_dma_stop(rdev);
6655 	if (rdev->has_uvd) {
6656 		r600_uvd_stop(rdev);
6657 		radeon_uvd_suspend(rdev);
6658 	}
6659 	si_irq_suspend(rdev);
6660 	radeon_wb_disable(rdev);
6661 	si_pcie_gart_disable(rdev);
6662 	return 0;
6663 }
6664 
6665 /* Plan is to move initialization in that function and use
6666  * helper function so that radeon_device_init pretty much
6667  * do nothing more than calling asic specific function. This
6668  * should also allow to remove a bunch of callback function
6669  * like vram_info.
6670  */
6671 int si_init(struct radeon_device *rdev)
6672 {
6673 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6674 	int r;
6675 
6676 	/* Read BIOS */
6677 	if (!radeon_get_bios(rdev)) {
6678 		if (ASIC_IS_AVIVO(rdev))
6679 			return -EINVAL;
6680 	}
6681 	/* Must be an ATOMBIOS */
6682 	if (!rdev->is_atom_bios) {
6683 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6684 		return -EINVAL;
6685 	}
6686 	r = radeon_atombios_init(rdev);
6687 	if (r)
6688 		return r;
6689 
6690 	/* Post card if necessary */
6691 	if (!radeon_card_posted(rdev)) {
6692 		if (!rdev->bios) {
6693 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6694 			return -EINVAL;
6695 		}
6696 		DRM_INFO("GPU not posted. posting now...\n");
6697 		atom_asic_init(rdev->mode_info.atom_context);
6698 	}
6699 	/* init golden registers */
6700 	si_init_golden_registers(rdev);
6701 	/* Initialize scratch registers */
6702 	si_scratch_init(rdev);
6703 	/* Initialize surface registers */
6704 	radeon_surface_init(rdev);
6705 	/* Initialize clocks */
6706 	radeon_get_clock_info(rdev->ddev);
6707 
6708 	/* Fence driver */
6709 	r = radeon_fence_driver_init(rdev);
6710 	if (r)
6711 		return r;
6712 
6713 	/* initialize memory controller */
6714 	r = si_mc_init(rdev);
6715 	if (r)
6716 		return r;
6717 	/* Memory manager */
6718 	r = radeon_bo_init(rdev);
6719 	if (r)
6720 		return r;
6721 
6722 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6723 	ring->ring_obj = NULL;
6724 	r600_ring_init(rdev, ring, 1024 * 1024);
6725 
6726 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6727 	ring->ring_obj = NULL;
6728 	r600_ring_init(rdev, ring, 1024 * 1024);
6729 
6730 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6731 	ring->ring_obj = NULL;
6732 	r600_ring_init(rdev, ring, 1024 * 1024);
6733 
6734 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6735 	ring->ring_obj = NULL;
6736 	r600_ring_init(rdev, ring, 64 * 1024);
6737 
6738 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6739 	ring->ring_obj = NULL;
6740 	r600_ring_init(rdev, ring, 64 * 1024);
6741 
6742 	if (rdev->has_uvd) {
6743 		r = radeon_uvd_init(rdev);
6744 		if (!r) {
6745 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6746 			ring->ring_obj = NULL;
6747 			r600_ring_init(rdev, ring, 4096);
6748 		}
6749 	}
6750 
6751 	rdev->ih.ring_obj = NULL;
6752 	r600_ih_ring_init(rdev, 64 * 1024);
6753 
6754 	r = r600_pcie_gart_init(rdev);
6755 	if (r)
6756 		return r;
6757 
6758 	rdev->accel_working = true;
6759 	r = si_startup(rdev);
6760 	if (r) {
6761 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6762 		si_cp_fini(rdev);
6763 		cayman_dma_fini(rdev);
6764 		si_irq_fini(rdev);
6765 		si_rlc_fini(rdev);
6766 		radeon_wb_fini(rdev);
6767 		radeon_ib_pool_fini(rdev);
6768 		radeon_vm_manager_fini(rdev);
6769 		radeon_irq_kms_fini(rdev);
6770 		si_pcie_gart_fini(rdev);
6771 		rdev->accel_working = false;
6772 	}
6773 
6774 	/* Don't start up if the MC ucode is missing.
6775 	 * The default clocks and voltages before the MC ucode
6776 	 * is loaded are not suffient for advanced operations.
6777 	 */
6778 	if (!rdev->mc_fw) {
6779 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6780 		return -EINVAL;
6781 	}
6782 
6783 	return 0;
6784 }
6785 
6786 void si_fini(struct radeon_device *rdev)
6787 {
6788 	si_cp_fini(rdev);
6789 	cayman_dma_fini(rdev);
6790 	si_irq_fini(rdev);
6791 	si_rlc_fini(rdev);
6792 	si_fini_cg(rdev);
6793 	si_fini_pg(rdev);
6794 	radeon_wb_fini(rdev);
6795 	radeon_vm_manager_fini(rdev);
6796 	radeon_ib_pool_fini(rdev);
6797 	radeon_irq_kms_fini(rdev);
6798 	if (rdev->has_uvd) {
6799 		r600_uvd_stop(rdev);
6800 		radeon_uvd_fini(rdev);
6801 	}
6802 	si_pcie_gart_fini(rdev);
6803 	r600_vram_scratch_fini(rdev);
6804 	radeon_gem_fini(rdev);
6805 	radeon_fence_driver_fini(rdev);
6806 	radeon_bo_fini(rdev);
6807 	radeon_atombios_fini(rdev);
6808 	si_fini_microcode(rdev);
6809 	kfree(rdev->bios);
6810 	rdev->bios = NULL;
6811 }
6812 
6813 /**
6814  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6815  *
6816  * @rdev: radeon_device pointer
6817  *
6818  * Fetches a GPU clock counter snapshot (SI).
6819  * Returns the 64 bit clock counter snapshot.
6820  */
6821 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6822 {
6823 	uint64_t clock;
6824 
6825 	spin_lock(&rdev->gpu_clock_mutex);
6826 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6827 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6828 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6829 	spin_unlock(&rdev->gpu_clock_mutex);
6830 	return clock;
6831 }
6832 
6833 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6834 {
6835 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6836 	int r;
6837 
6838 	/* bypass vclk and dclk with bclk */
6839 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6840 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6841 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6842 
6843 	/* put PLL in bypass mode */
6844 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6845 
6846 	if (!vclk || !dclk) {
6847 		/* keep the Bypass mode, put PLL to sleep */
6848 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6849 		return 0;
6850 	}
6851 
6852 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6853 					  16384, 0x03FFFFFF, 0, 128, 5,
6854 					  &fb_div, &vclk_div, &dclk_div);
6855 	if (r)
6856 		return r;
6857 
6858 	/* set RESET_ANTI_MUX to 0 */
6859 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6860 
6861 	/* set VCO_MODE to 1 */
6862 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6863 
6864 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6865 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6866 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6867 
6868 	/* deassert UPLL_RESET */
6869 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6870 
6871 	mdelay(1);
6872 
6873 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6874 	if (r)
6875 		return r;
6876 
6877 	/* assert UPLL_RESET again */
6878 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6879 
6880 	/* disable spread spectrum. */
6881 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6882 
6883 	/* set feedback divider */
6884 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6885 
6886 	/* set ref divider to 0 */
6887 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6888 
6889 	if (fb_div < 307200)
6890 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6891 	else
6892 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6893 
6894 	/* set PDIV_A and PDIV_B */
6895 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6896 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6897 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6898 
6899 	/* give the PLL some time to settle */
6900 	mdelay(15);
6901 
6902 	/* deassert PLL_RESET */
6903 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6904 
6905 	mdelay(15);
6906 
6907 	/* switch from bypass mode to normal mode */
6908 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6909 
6910 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6911 	if (r)
6912 		return r;
6913 
6914 	/* switch VCLK and DCLK selection */
6915 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6916 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6917 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6918 
6919 	mdelay(100);
6920 
6921 	return 0;
6922 }
6923 
6924 static struct pci_dev dev_to_pcidev(device_t dev)
6925 {
6926     struct pci_dev pdev;
6927     pdev.dev = dev;
6928     return pdev;
6929 }
6930 
6931 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6932 {
6933 #if 0
6934 	struct pci_dev *root = rdev->dev->bus->self;
6935 #else
6936 	device_t root = device_get_parent(rdev->dev);
6937 #endif
6938 	int bridge_pos, gpu_pos;
6939 	u32 speed_cntl, mask, current_data_rate;
6940 	int ret, i;
6941 	u16 tmp16;
6942 	struct pci_dev root_pdev = dev_to_pcidev(root);
6943 	struct pci_dev pdev = dev_to_pcidev(rdev->dev);
6944 
6945 	if (radeon_pcie_gen2 == 0)
6946 		return;
6947 
6948 	if (rdev->flags & RADEON_IS_IGP)
6949 		return;
6950 
6951 	if (!(rdev->flags & RADEON_IS_PCIE))
6952 		return;
6953 
6954 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6955 	if (ret != 0)
6956 		return;
6957 
6958 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6959 		return;
6960 
6961 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6962 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6963 		LC_CURRENT_DATA_RATE_SHIFT;
6964 	if (mask & DRM_PCIE_SPEED_80) {
6965 		if (current_data_rate == 2) {
6966 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6967 			return;
6968 		}
6969 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6970 	} else if (mask & DRM_PCIE_SPEED_50) {
6971 		if (current_data_rate == 1) {
6972 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6973 			return;
6974 		}
6975 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6976 	}
6977 
6978 	bridge_pos = pci_get_pciecap_ptr(root);
6979 	if (!bridge_pos)
6980 		return;
6981 
6982 	gpu_pos = pci_get_pciecap_ptr(rdev->dev);
6983 	if (!gpu_pos)
6984 		return;
6985 
6986 	if (mask & DRM_PCIE_SPEED_80) {
6987 		/* re-try equalization if gen3 is not already enabled */
6988 		if (current_data_rate != 2) {
6989 			u16 bridge_cfg, gpu_cfg;
6990 			u16 bridge_cfg2, gpu_cfg2;
6991 			u32 max_lw, current_lw, tmp;
6992 
6993 			pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6994 			pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6995 
6996 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6997 			pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6998 
6999 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7000 			pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7001 
7002 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7003 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7004 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7005 
7006 			if (current_lw < max_lw) {
7007 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7008 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7009 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7010 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7011 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7012 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7013 				}
7014 			}
7015 
7016 			for (i = 0; i < 10; i++) {
7017 				/* check status */
7018 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7019 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7020 					break;
7021 
7022 				pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7023 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7024 
7025 				pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7026 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7027 
7028 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7029 				tmp |= LC_SET_QUIESCE;
7030 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7031 
7032 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7033 				tmp |= LC_REDO_EQ;
7034 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7035 
7036 				mdelay(100);
7037 
7038 				/* linkctl */
7039 				pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7040 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7041 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7042 				pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7043 
7044 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7045 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7046 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7047 				pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7048 
7049 				/* linkctl2 */
7050 				pci_read_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7051 				tmp16 &= ~((1 << 4) | (7 << 9));
7052 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7053 				pci_write_config_word(&root_pdev, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7054 
7055 				pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7056 				tmp16 &= ~((1 << 4) | (7 << 9));
7057 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7058 				pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7059 
7060 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7061 				tmp &= ~LC_SET_QUIESCE;
7062 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7063 			}
7064 		}
7065 	}
7066 
7067 	/* set the link speed */
7068 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7069 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7070 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7071 
7072 	pci_read_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7073 	tmp16 &= ~0xf;
7074 	if (mask & DRM_PCIE_SPEED_80)
7075 		tmp16 |= 3; /* gen3 */
7076 	else if (mask & DRM_PCIE_SPEED_50)
7077 		tmp16 |= 2; /* gen2 */
7078 	else
7079 		tmp16 |= 1; /* gen1 */
7080 	pci_write_config_word(&pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7081 
7082 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7083 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7084 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7085 
7086 	for (i = 0; i < rdev->usec_timeout; i++) {
7087 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7088 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7089 			break;
7090 		udelay(1);
7091 	}
7092 }
7093 
7094 static void si_program_aspm(struct radeon_device *rdev)
7095 {
7096 	u32 data, orig;
7097 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7098 	bool disable_clkreq = false;
7099 
7100 	if (radeon_aspm == 0)
7101 		return;
7102 
7103 	if (!(rdev->flags & RADEON_IS_PCIE))
7104 		return;
7105 
7106 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7107 	data &= ~LC_XMIT_N_FTS_MASK;
7108 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7109 	if (orig != data)
7110 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7111 
7112 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7113 	data |= LC_GO_TO_RECOVERY;
7114 	if (orig != data)
7115 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7116 
7117 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7118 	data |= P_IGNORE_EDB_ERR;
7119 	if (orig != data)
7120 		WREG32_PCIE(PCIE_P_CNTL, data);
7121 
7122 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7123 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7124 	data |= LC_PMI_TO_L1_DIS;
7125 	if (!disable_l0s)
7126 		data |= LC_L0S_INACTIVITY(7);
7127 
7128 	if (!disable_l1) {
7129 		data |= LC_L1_INACTIVITY(7);
7130 		data &= ~LC_PMI_TO_L1_DIS;
7131 		if (orig != data)
7132 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7133 
7134 		if (!disable_plloff_in_l1) {
7135 			bool clk_req_support;
7136 
7137 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7138 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7139 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7140 			if (orig != data)
7141 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7142 
7143 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7144 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7145 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7146 			if (orig != data)
7147 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7148 
7149 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7150 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7151 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7152 			if (orig != data)
7153 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7154 
7155 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7156 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7157 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7158 			if (orig != data)
7159 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7160 
7161 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7162 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7163 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7164 				if (orig != data)
7165 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7166 
7167 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7168 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7169 				if (orig != data)
7170 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7171 
7172 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7173 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7174 				if (orig != data)
7175 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7176 
7177 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7178 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7179 				if (orig != data)
7180 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7181 
7182 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7183 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7184 				if (orig != data)
7185 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7186 
7187 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7188 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7189 				if (orig != data)
7190 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7191 
7192 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7193 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7194 				if (orig != data)
7195 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7196 
7197 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7198 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7199 				if (orig != data)
7200 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7201 			}
7202 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7203 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7204 			data |= LC_DYN_LANES_PWR_STATE(3);
7205 			if (orig != data)
7206 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7207 
7208 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7209 			data &= ~LS2_EXIT_TIME_MASK;
7210 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7211 				data |= LS2_EXIT_TIME(5);
7212 			if (orig != data)
7213 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7214 
7215 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7216 			data &= ~LS2_EXIT_TIME_MASK;
7217 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7218 				data |= LS2_EXIT_TIME(5);
7219 			if (orig != data)
7220 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7221 
7222 			if (!disable_clkreq) {
7223 #ifdef MN_TODO
7224 				struct pci_dev *root = rdev->pdev->bus->self;
7225 				u32 lnkcap;
7226 
7227 				clk_req_support = false;
7228 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7229 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7230 					clk_req_support = true;
7231 #else
7232 				clk_req_support = false;
7233 #endif
7234 			} else {
7235 				clk_req_support = false;
7236 			}
7237 
7238 			if (clk_req_support) {
7239 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7240 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7241 				if (orig != data)
7242 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7243 
7244 				orig = data = RREG32(THM_CLK_CNTL);
7245 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7246 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7247 				if (orig != data)
7248 					WREG32(THM_CLK_CNTL, data);
7249 
7250 				orig = data = RREG32(MISC_CLK_CNTL);
7251 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7252 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7253 				if (orig != data)
7254 					WREG32(MISC_CLK_CNTL, data);
7255 
7256 				orig = data = RREG32(CG_CLKPIN_CNTL);
7257 				data &= ~BCLK_AS_XCLK;
7258 				if (orig != data)
7259 					WREG32(CG_CLKPIN_CNTL, data);
7260 
7261 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7262 				data &= ~FORCE_BIF_REFCLK_EN;
7263 				if (orig != data)
7264 					WREG32(CG_CLKPIN_CNTL_2, data);
7265 
7266 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7267 				data &= ~MPLL_CLKOUT_SEL_MASK;
7268 				data |= MPLL_CLKOUT_SEL(4);
7269 				if (orig != data)
7270 					WREG32(MPLL_BYPASSCLK_SEL, data);
7271 
7272 				orig = data = RREG32(SPLL_CNTL_MODE);
7273 				data &= ~SPLL_REFCLK_SEL_MASK;
7274 				if (orig != data)
7275 					WREG32(SPLL_CNTL_MODE, data);
7276 			}
7277 		}
7278 	} else {
7279 		if (orig != data)
7280 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7281 	}
7282 
7283 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7284 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7285 	if (orig != data)
7286 		WREG32_PCIE(PCIE_CNTL2, data);
7287 
7288 	if (!disable_l0s) {
7289 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7290 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7291 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7292 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7293 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7294 				data &= ~LC_L0S_INACTIVITY_MASK;
7295 				if (orig != data)
7296 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7297 			}
7298 		}
7299 	}
7300 }
7301