xref: /linux/drivers/gpu/drm/radeon/si.c (revision 44f57d78)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37 
38 
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61 
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
69 
70 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
71 MODULE_FIRMWARE("radeon/VERDE_me.bin");
72 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
75 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
76 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
77 
78 MODULE_FIRMWARE("radeon/verde_pfp.bin");
79 MODULE_FIRMWARE("radeon/verde_me.bin");
80 MODULE_FIRMWARE("radeon/verde_ce.bin");
81 MODULE_FIRMWARE("radeon/verde_mc.bin");
82 MODULE_FIRMWARE("radeon/verde_rlc.bin");
83 MODULE_FIRMWARE("radeon/verde_smc.bin");
84 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
85 
86 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
87 MODULE_FIRMWARE("radeon/OLAND_me.bin");
88 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
89 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
91 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
92 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
93 
94 MODULE_FIRMWARE("radeon/oland_pfp.bin");
95 MODULE_FIRMWARE("radeon/oland_me.bin");
96 MODULE_FIRMWARE("radeon/oland_ce.bin");
97 MODULE_FIRMWARE("radeon/oland_mc.bin");
98 MODULE_FIRMWARE("radeon/oland_rlc.bin");
99 MODULE_FIRMWARE("radeon/oland_smc.bin");
100 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
101 
102 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
109 
110 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
111 MODULE_FIRMWARE("radeon/hainan_me.bin");
112 MODULE_FIRMWARE("radeon/hainan_ce.bin");
113 MODULE_FIRMWARE("radeon/hainan_mc.bin");
114 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
115 MODULE_FIRMWARE("radeon/hainan_smc.bin");
116 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
117 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
118 
119 MODULE_FIRMWARE("radeon/si58_mc.bin");
120 
121 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
122 static void si_pcie_gen3_enable(struct radeon_device *rdev);
123 static void si_program_aspm(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
127 extern void r600_ih_ring_fini(struct radeon_device *rdev);
128 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
129 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
130 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
131 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
132 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
133 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
134 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
135 					 bool enable);
136 static void si_init_pg(struct radeon_device *rdev);
137 static void si_init_cg(struct radeon_device *rdev);
138 static void si_fini_pg(struct radeon_device *rdev);
139 static void si_fini_cg(struct radeon_device *rdev);
140 static void si_rlc_stop(struct radeon_device *rdev);
141 
142 static const u32 crtc_offsets[] =
143 {
144 	EVERGREEN_CRTC0_REGISTER_OFFSET,
145 	EVERGREEN_CRTC1_REGISTER_OFFSET,
146 	EVERGREEN_CRTC2_REGISTER_OFFSET,
147 	EVERGREEN_CRTC3_REGISTER_OFFSET,
148 	EVERGREEN_CRTC4_REGISTER_OFFSET,
149 	EVERGREEN_CRTC5_REGISTER_OFFSET
150 };
151 
152 static const u32 si_disp_int_status[] =
153 {
154 	DISP_INTERRUPT_STATUS,
155 	DISP_INTERRUPT_STATUS_CONTINUE,
156 	DISP_INTERRUPT_STATUS_CONTINUE2,
157 	DISP_INTERRUPT_STATUS_CONTINUE3,
158 	DISP_INTERRUPT_STATUS_CONTINUE4,
159 	DISP_INTERRUPT_STATUS_CONTINUE5
160 };
161 
162 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
163 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
164 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
165 
166 static const u32 verde_rlc_save_restore_register_list[] =
167 {
168 	(0x8000 << 16) | (0x98f4 >> 2),
169 	0x00000000,
170 	(0x8040 << 16) | (0x98f4 >> 2),
171 	0x00000000,
172 	(0x8000 << 16) | (0xe80 >> 2),
173 	0x00000000,
174 	(0x8040 << 16) | (0xe80 >> 2),
175 	0x00000000,
176 	(0x8000 << 16) | (0x89bc >> 2),
177 	0x00000000,
178 	(0x8040 << 16) | (0x89bc >> 2),
179 	0x00000000,
180 	(0x8000 << 16) | (0x8c1c >> 2),
181 	0x00000000,
182 	(0x8040 << 16) | (0x8c1c >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x98f0 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0xe7c >> 2),
187 	0x00000000,
188 	(0x8000 << 16) | (0x9148 >> 2),
189 	0x00000000,
190 	(0x8040 << 16) | (0x9148 >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x9150 >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0x897c >> 2),
195 	0x00000000,
196 	(0x9c00 << 16) | (0x8d8c >> 2),
197 	0x00000000,
198 	(0x9c00 << 16) | (0xac54 >> 2),
199 	0X00000000,
200 	0x3,
201 	(0x9c00 << 16) | (0x98f8 >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9910 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9914 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x9918 >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x991c >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9920 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9924 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9928 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x992c >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9930 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9934 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x9938 >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x993c >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9940 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9944 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x9948 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x994c >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x9950 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x9954 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x9958 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x995c >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x9960 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x9964 >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x9968 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x996c >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0x9970 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0x9974 >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0x9978 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0x997c >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0x9980 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0x9984 >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x9988 >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0x998c >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0x8c00 >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0x8c14 >> 2),
270 	0x00000000,
271 	(0x9c00 << 16) | (0x8c04 >> 2),
272 	0x00000000,
273 	(0x9c00 << 16) | (0x8c08 >> 2),
274 	0x00000000,
275 	(0x8000 << 16) | (0x9b7c >> 2),
276 	0x00000000,
277 	(0x8040 << 16) | (0x9b7c >> 2),
278 	0x00000000,
279 	(0x8000 << 16) | (0xe84 >> 2),
280 	0x00000000,
281 	(0x8040 << 16) | (0xe84 >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0x89c0 >> 2),
284 	0x00000000,
285 	(0x8040 << 16) | (0x89c0 >> 2),
286 	0x00000000,
287 	(0x8000 << 16) | (0x914c >> 2),
288 	0x00000000,
289 	(0x8040 << 16) | (0x914c >> 2),
290 	0x00000000,
291 	(0x8000 << 16) | (0x8c20 >> 2),
292 	0x00000000,
293 	(0x8040 << 16) | (0x8c20 >> 2),
294 	0x00000000,
295 	(0x8000 << 16) | (0x9354 >> 2),
296 	0x00000000,
297 	(0x8040 << 16) | (0x9354 >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0x9060 >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0x9364 >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0x9100 >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0x913c >> 2),
306 	0x00000000,
307 	(0x8000 << 16) | (0x90e0 >> 2),
308 	0x00000000,
309 	(0x8000 << 16) | (0x90e4 >> 2),
310 	0x00000000,
311 	(0x8000 << 16) | (0x90e8 >> 2),
312 	0x00000000,
313 	(0x8040 << 16) | (0x90e0 >> 2),
314 	0x00000000,
315 	(0x8040 << 16) | (0x90e4 >> 2),
316 	0x00000000,
317 	(0x8040 << 16) | (0x90e8 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0x8bcc >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0x8b24 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x88c4 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x8e50 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x8c0c >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x8e58 >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x8e5c >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x9508 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0x950c >> 2),
336 	0x00000000,
337 	(0x9c00 << 16) | (0x9494 >> 2),
338 	0x00000000,
339 	(0x9c00 << 16) | (0xac0c >> 2),
340 	0x00000000,
341 	(0x9c00 << 16) | (0xac10 >> 2),
342 	0x00000000,
343 	(0x9c00 << 16) | (0xac14 >> 2),
344 	0x00000000,
345 	(0x9c00 << 16) | (0xae00 >> 2),
346 	0x00000000,
347 	(0x9c00 << 16) | (0xac08 >> 2),
348 	0x00000000,
349 	(0x9c00 << 16) | (0x88d4 >> 2),
350 	0x00000000,
351 	(0x9c00 << 16) | (0x88c8 >> 2),
352 	0x00000000,
353 	(0x9c00 << 16) | (0x88cc >> 2),
354 	0x00000000,
355 	(0x9c00 << 16) | (0x89b0 >> 2),
356 	0x00000000,
357 	(0x9c00 << 16) | (0x8b10 >> 2),
358 	0x00000000,
359 	(0x9c00 << 16) | (0x8a14 >> 2),
360 	0x00000000,
361 	(0x9c00 << 16) | (0x9830 >> 2),
362 	0x00000000,
363 	(0x9c00 << 16) | (0x9834 >> 2),
364 	0x00000000,
365 	(0x9c00 << 16) | (0x9838 >> 2),
366 	0x00000000,
367 	(0x9c00 << 16) | (0x9a10 >> 2),
368 	0x00000000,
369 	(0x8000 << 16) | (0x9870 >> 2),
370 	0x00000000,
371 	(0x8000 << 16) | (0x9874 >> 2),
372 	0x00000000,
373 	(0x8001 << 16) | (0x9870 >> 2),
374 	0x00000000,
375 	(0x8001 << 16) | (0x9874 >> 2),
376 	0x00000000,
377 	(0x8040 << 16) | (0x9870 >> 2),
378 	0x00000000,
379 	(0x8040 << 16) | (0x9874 >> 2),
380 	0x00000000,
381 	(0x8041 << 16) | (0x9870 >> 2),
382 	0x00000000,
383 	(0x8041 << 16) | (0x9874 >> 2),
384 	0x00000000,
385 	0x00000000
386 };
387 
388 static const u32 tahiti_golden_rlc_registers[] =
389 {
390 	0xc424, 0xffffffff, 0x00601005,
391 	0xc47c, 0xffffffff, 0x10104040,
392 	0xc488, 0xffffffff, 0x0100000a,
393 	0xc314, 0xffffffff, 0x00000800,
394 	0xc30c, 0xffffffff, 0x800000f4,
395 	0xf4a8, 0xffffffff, 0x00000000
396 };
397 
398 static const u32 tahiti_golden_registers[] =
399 {
400 	0x9a10, 0x00010000, 0x00018208,
401 	0x9830, 0xffffffff, 0x00000000,
402 	0x9834, 0xf00fffff, 0x00000400,
403 	0x9838, 0x0002021c, 0x00020200,
404 	0xc78, 0x00000080, 0x00000000,
405 	0xd030, 0x000300c0, 0x00800040,
406 	0xd830, 0x000300c0, 0x00800040,
407 	0x5bb0, 0x000000f0, 0x00000070,
408 	0x5bc0, 0x00200000, 0x50100000,
409 	0x7030, 0x31000311, 0x00000011,
410 	0x277c, 0x00000003, 0x000007ff,
411 	0x240c, 0x000007ff, 0x00000000,
412 	0x8a14, 0xf000001f, 0x00000007,
413 	0x8b24, 0xffffffff, 0x00ffffff,
414 	0x8b10, 0x0000ff0f, 0x00000000,
415 	0x28a4c, 0x07ffffff, 0x4e000000,
416 	0x28350, 0x3f3f3fff, 0x2a00126a,
417 	0x30, 0x000000ff, 0x0040,
418 	0x34, 0x00000040, 0x00004040,
419 	0x9100, 0x07ffffff, 0x03000000,
420 	0x8e88, 0x01ff1f3f, 0x00000000,
421 	0x8e84, 0x01ff1f3f, 0x00000000,
422 	0x9060, 0x0000007f, 0x00000020,
423 	0x9508, 0x00010000, 0x00010000,
424 	0xac14, 0x00000200, 0x000002fb,
425 	0xac10, 0xffffffff, 0x0000543b,
426 	0xac0c, 0xffffffff, 0xa9210876,
427 	0x88d0, 0xffffffff, 0x000fff40,
428 	0x88d4, 0x0000001f, 0x00000010,
429 	0x1410, 0x20000000, 0x20fffed8,
430 	0x15c0, 0x000c0fc0, 0x000c0400
431 };
432 
433 static const u32 tahiti_golden_registers2[] =
434 {
435 	0xc64, 0x00000001, 0x00000001
436 };
437 
438 static const u32 pitcairn_golden_rlc_registers[] =
439 {
440 	0xc424, 0xffffffff, 0x00601004,
441 	0xc47c, 0xffffffff, 0x10102020,
442 	0xc488, 0xffffffff, 0x01000020,
443 	0xc314, 0xffffffff, 0x00000800,
444 	0xc30c, 0xffffffff, 0x800000a4
445 };
446 
447 static const u32 pitcairn_golden_registers[] =
448 {
449 	0x9a10, 0x00010000, 0x00018208,
450 	0x9830, 0xffffffff, 0x00000000,
451 	0x9834, 0xf00fffff, 0x00000400,
452 	0x9838, 0x0002021c, 0x00020200,
453 	0xc78, 0x00000080, 0x00000000,
454 	0xd030, 0x000300c0, 0x00800040,
455 	0xd830, 0x000300c0, 0x00800040,
456 	0x5bb0, 0x000000f0, 0x00000070,
457 	0x5bc0, 0x00200000, 0x50100000,
458 	0x7030, 0x31000311, 0x00000011,
459 	0x2ae4, 0x00073ffe, 0x000022a2,
460 	0x240c, 0x000007ff, 0x00000000,
461 	0x8a14, 0xf000001f, 0x00000007,
462 	0x8b24, 0xffffffff, 0x00ffffff,
463 	0x8b10, 0x0000ff0f, 0x00000000,
464 	0x28a4c, 0x07ffffff, 0x4e000000,
465 	0x28350, 0x3f3f3fff, 0x2a00126a,
466 	0x30, 0x000000ff, 0x0040,
467 	0x34, 0x00000040, 0x00004040,
468 	0x9100, 0x07ffffff, 0x03000000,
469 	0x9060, 0x0000007f, 0x00000020,
470 	0x9508, 0x00010000, 0x00010000,
471 	0xac14, 0x000003ff, 0x000000f7,
472 	0xac10, 0xffffffff, 0x00000000,
473 	0xac0c, 0xffffffff, 0x32761054,
474 	0x88d4, 0x0000001f, 0x00000010,
475 	0x15c0, 0x000c0fc0, 0x000c0400
476 };
477 
478 static const u32 verde_golden_rlc_registers[] =
479 {
480 	0xc424, 0xffffffff, 0x033f1005,
481 	0xc47c, 0xffffffff, 0x10808020,
482 	0xc488, 0xffffffff, 0x00800008,
483 	0xc314, 0xffffffff, 0x00001000,
484 	0xc30c, 0xffffffff, 0x80010014
485 };
486 
487 static const u32 verde_golden_registers[] =
488 {
489 	0x9a10, 0x00010000, 0x00018208,
490 	0x9830, 0xffffffff, 0x00000000,
491 	0x9834, 0xf00fffff, 0x00000400,
492 	0x9838, 0x0002021c, 0x00020200,
493 	0xc78, 0x00000080, 0x00000000,
494 	0xd030, 0x000300c0, 0x00800040,
495 	0xd030, 0x000300c0, 0x00800040,
496 	0xd830, 0x000300c0, 0x00800040,
497 	0xd830, 0x000300c0, 0x00800040,
498 	0x5bb0, 0x000000f0, 0x00000070,
499 	0x5bc0, 0x00200000, 0x50100000,
500 	0x7030, 0x31000311, 0x00000011,
501 	0x2ae4, 0x00073ffe, 0x000022a2,
502 	0x2ae4, 0x00073ffe, 0x000022a2,
503 	0x2ae4, 0x00073ffe, 0x000022a2,
504 	0x240c, 0x000007ff, 0x00000000,
505 	0x240c, 0x000007ff, 0x00000000,
506 	0x240c, 0x000007ff, 0x00000000,
507 	0x8a14, 0xf000001f, 0x00000007,
508 	0x8a14, 0xf000001f, 0x00000007,
509 	0x8a14, 0xf000001f, 0x00000007,
510 	0x8b24, 0xffffffff, 0x00ffffff,
511 	0x8b10, 0x0000ff0f, 0x00000000,
512 	0x28a4c, 0x07ffffff, 0x4e000000,
513 	0x28350, 0x3f3f3fff, 0x0000124a,
514 	0x28350, 0x3f3f3fff, 0x0000124a,
515 	0x28350, 0x3f3f3fff, 0x0000124a,
516 	0x30, 0x000000ff, 0x0040,
517 	0x34, 0x00000040, 0x00004040,
518 	0x9100, 0x07ffffff, 0x03000000,
519 	0x9100, 0x07ffffff, 0x03000000,
520 	0x8e88, 0x01ff1f3f, 0x00000000,
521 	0x8e88, 0x01ff1f3f, 0x00000000,
522 	0x8e88, 0x01ff1f3f, 0x00000000,
523 	0x8e84, 0x01ff1f3f, 0x00000000,
524 	0x8e84, 0x01ff1f3f, 0x00000000,
525 	0x8e84, 0x01ff1f3f, 0x00000000,
526 	0x9060, 0x0000007f, 0x00000020,
527 	0x9508, 0x00010000, 0x00010000,
528 	0xac14, 0x000003ff, 0x00000003,
529 	0xac14, 0x000003ff, 0x00000003,
530 	0xac14, 0x000003ff, 0x00000003,
531 	0xac10, 0xffffffff, 0x00000000,
532 	0xac10, 0xffffffff, 0x00000000,
533 	0xac10, 0xffffffff, 0x00000000,
534 	0xac0c, 0xffffffff, 0x00001032,
535 	0xac0c, 0xffffffff, 0x00001032,
536 	0xac0c, 0xffffffff, 0x00001032,
537 	0x88d4, 0x0000001f, 0x00000010,
538 	0x88d4, 0x0000001f, 0x00000010,
539 	0x88d4, 0x0000001f, 0x00000010,
540 	0x15c0, 0x000c0fc0, 0x000c0400
541 };
542 
543 static const u32 oland_golden_rlc_registers[] =
544 {
545 	0xc424, 0xffffffff, 0x00601005,
546 	0xc47c, 0xffffffff, 0x10104040,
547 	0xc488, 0xffffffff, 0x0100000a,
548 	0xc314, 0xffffffff, 0x00000800,
549 	0xc30c, 0xffffffff, 0x800000f4
550 };
551 
552 static const u32 oland_golden_registers[] =
553 {
554 	0x9a10, 0x00010000, 0x00018208,
555 	0x9830, 0xffffffff, 0x00000000,
556 	0x9834, 0xf00fffff, 0x00000400,
557 	0x9838, 0x0002021c, 0x00020200,
558 	0xc78, 0x00000080, 0x00000000,
559 	0xd030, 0x000300c0, 0x00800040,
560 	0xd830, 0x000300c0, 0x00800040,
561 	0x5bb0, 0x000000f0, 0x00000070,
562 	0x5bc0, 0x00200000, 0x50100000,
563 	0x7030, 0x31000311, 0x00000011,
564 	0x2ae4, 0x00073ffe, 0x000022a2,
565 	0x240c, 0x000007ff, 0x00000000,
566 	0x8a14, 0xf000001f, 0x00000007,
567 	0x8b24, 0xffffffff, 0x00ffffff,
568 	0x8b10, 0x0000ff0f, 0x00000000,
569 	0x28a4c, 0x07ffffff, 0x4e000000,
570 	0x28350, 0x3f3f3fff, 0x00000082,
571 	0x30, 0x000000ff, 0x0040,
572 	0x34, 0x00000040, 0x00004040,
573 	0x9100, 0x07ffffff, 0x03000000,
574 	0x9060, 0x0000007f, 0x00000020,
575 	0x9508, 0x00010000, 0x00010000,
576 	0xac14, 0x000003ff, 0x000000f3,
577 	0xac10, 0xffffffff, 0x00000000,
578 	0xac0c, 0xffffffff, 0x00003210,
579 	0x88d4, 0x0000001f, 0x00000010,
580 	0x15c0, 0x000c0fc0, 0x000c0400
581 };
582 
583 static const u32 hainan_golden_registers[] =
584 {
585 	0x9a10, 0x00010000, 0x00018208,
586 	0x9830, 0xffffffff, 0x00000000,
587 	0x9834, 0xf00fffff, 0x00000400,
588 	0x9838, 0x0002021c, 0x00020200,
589 	0xd0c0, 0xff000fff, 0x00000100,
590 	0xd030, 0x000300c0, 0x00800040,
591 	0xd8c0, 0xff000fff, 0x00000100,
592 	0xd830, 0x000300c0, 0x00800040,
593 	0x2ae4, 0x00073ffe, 0x000022a2,
594 	0x240c, 0x000007ff, 0x00000000,
595 	0x8a14, 0xf000001f, 0x00000007,
596 	0x8b24, 0xffffffff, 0x00ffffff,
597 	0x8b10, 0x0000ff0f, 0x00000000,
598 	0x28a4c, 0x07ffffff, 0x4e000000,
599 	0x28350, 0x3f3f3fff, 0x00000000,
600 	0x30, 0x000000ff, 0x0040,
601 	0x34, 0x00000040, 0x00004040,
602 	0x9100, 0x03e00000, 0x03600000,
603 	0x9060, 0x0000007f, 0x00000020,
604 	0x9508, 0x00010000, 0x00010000,
605 	0xac14, 0x000003ff, 0x000000f1,
606 	0xac10, 0xffffffff, 0x00000000,
607 	0xac0c, 0xffffffff, 0x00003210,
608 	0x88d4, 0x0000001f, 0x00000010,
609 	0x15c0, 0x000c0fc0, 0x000c0400
610 };
611 
612 static const u32 hainan_golden_registers2[] =
613 {
614 	0x98f8, 0xffffffff, 0x02010001
615 };
616 
617 static const u32 tahiti_mgcg_cgcg_init[] =
618 {
619 	0xc400, 0xffffffff, 0xfffffffc,
620 	0x802c, 0xffffffff, 0xe0000000,
621 	0x9a60, 0xffffffff, 0x00000100,
622 	0x92a4, 0xffffffff, 0x00000100,
623 	0xc164, 0xffffffff, 0x00000100,
624 	0x9774, 0xffffffff, 0x00000100,
625 	0x8984, 0xffffffff, 0x06000100,
626 	0x8a18, 0xffffffff, 0x00000100,
627 	0x92a0, 0xffffffff, 0x00000100,
628 	0xc380, 0xffffffff, 0x00000100,
629 	0x8b28, 0xffffffff, 0x00000100,
630 	0x9144, 0xffffffff, 0x00000100,
631 	0x8d88, 0xffffffff, 0x00000100,
632 	0x8d8c, 0xffffffff, 0x00000100,
633 	0x9030, 0xffffffff, 0x00000100,
634 	0x9034, 0xffffffff, 0x00000100,
635 	0x9038, 0xffffffff, 0x00000100,
636 	0x903c, 0xffffffff, 0x00000100,
637 	0xad80, 0xffffffff, 0x00000100,
638 	0xac54, 0xffffffff, 0x00000100,
639 	0x897c, 0xffffffff, 0x06000100,
640 	0x9868, 0xffffffff, 0x00000100,
641 	0x9510, 0xffffffff, 0x00000100,
642 	0xaf04, 0xffffffff, 0x00000100,
643 	0xae04, 0xffffffff, 0x00000100,
644 	0x949c, 0xffffffff, 0x00000100,
645 	0x802c, 0xffffffff, 0xe0000000,
646 	0x9160, 0xffffffff, 0x00010000,
647 	0x9164, 0xffffffff, 0x00030002,
648 	0x9168, 0xffffffff, 0x00040007,
649 	0x916c, 0xffffffff, 0x00060005,
650 	0x9170, 0xffffffff, 0x00090008,
651 	0x9174, 0xffffffff, 0x00020001,
652 	0x9178, 0xffffffff, 0x00040003,
653 	0x917c, 0xffffffff, 0x00000007,
654 	0x9180, 0xffffffff, 0x00060005,
655 	0x9184, 0xffffffff, 0x00090008,
656 	0x9188, 0xffffffff, 0x00030002,
657 	0x918c, 0xffffffff, 0x00050004,
658 	0x9190, 0xffffffff, 0x00000008,
659 	0x9194, 0xffffffff, 0x00070006,
660 	0x9198, 0xffffffff, 0x000a0009,
661 	0x919c, 0xffffffff, 0x00040003,
662 	0x91a0, 0xffffffff, 0x00060005,
663 	0x91a4, 0xffffffff, 0x00000009,
664 	0x91a8, 0xffffffff, 0x00080007,
665 	0x91ac, 0xffffffff, 0x000b000a,
666 	0x91b0, 0xffffffff, 0x00050004,
667 	0x91b4, 0xffffffff, 0x00070006,
668 	0x91b8, 0xffffffff, 0x0008000b,
669 	0x91bc, 0xffffffff, 0x000a0009,
670 	0x91c0, 0xffffffff, 0x000d000c,
671 	0x91c4, 0xffffffff, 0x00060005,
672 	0x91c8, 0xffffffff, 0x00080007,
673 	0x91cc, 0xffffffff, 0x0000000b,
674 	0x91d0, 0xffffffff, 0x000a0009,
675 	0x91d4, 0xffffffff, 0x000d000c,
676 	0x91d8, 0xffffffff, 0x00070006,
677 	0x91dc, 0xffffffff, 0x00090008,
678 	0x91e0, 0xffffffff, 0x0000000c,
679 	0x91e4, 0xffffffff, 0x000b000a,
680 	0x91e8, 0xffffffff, 0x000e000d,
681 	0x91ec, 0xffffffff, 0x00080007,
682 	0x91f0, 0xffffffff, 0x000a0009,
683 	0x91f4, 0xffffffff, 0x0000000d,
684 	0x91f8, 0xffffffff, 0x000c000b,
685 	0x91fc, 0xffffffff, 0x000f000e,
686 	0x9200, 0xffffffff, 0x00090008,
687 	0x9204, 0xffffffff, 0x000b000a,
688 	0x9208, 0xffffffff, 0x000c000f,
689 	0x920c, 0xffffffff, 0x000e000d,
690 	0x9210, 0xffffffff, 0x00110010,
691 	0x9214, 0xffffffff, 0x000a0009,
692 	0x9218, 0xffffffff, 0x000c000b,
693 	0x921c, 0xffffffff, 0x0000000f,
694 	0x9220, 0xffffffff, 0x000e000d,
695 	0x9224, 0xffffffff, 0x00110010,
696 	0x9228, 0xffffffff, 0x000b000a,
697 	0x922c, 0xffffffff, 0x000d000c,
698 	0x9230, 0xffffffff, 0x00000010,
699 	0x9234, 0xffffffff, 0x000f000e,
700 	0x9238, 0xffffffff, 0x00120011,
701 	0x923c, 0xffffffff, 0x000c000b,
702 	0x9240, 0xffffffff, 0x000e000d,
703 	0x9244, 0xffffffff, 0x00000011,
704 	0x9248, 0xffffffff, 0x0010000f,
705 	0x924c, 0xffffffff, 0x00130012,
706 	0x9250, 0xffffffff, 0x000d000c,
707 	0x9254, 0xffffffff, 0x000f000e,
708 	0x9258, 0xffffffff, 0x00100013,
709 	0x925c, 0xffffffff, 0x00120011,
710 	0x9260, 0xffffffff, 0x00150014,
711 	0x9264, 0xffffffff, 0x000e000d,
712 	0x9268, 0xffffffff, 0x0010000f,
713 	0x926c, 0xffffffff, 0x00000013,
714 	0x9270, 0xffffffff, 0x00120011,
715 	0x9274, 0xffffffff, 0x00150014,
716 	0x9278, 0xffffffff, 0x000f000e,
717 	0x927c, 0xffffffff, 0x00110010,
718 	0x9280, 0xffffffff, 0x00000014,
719 	0x9284, 0xffffffff, 0x00130012,
720 	0x9288, 0xffffffff, 0x00160015,
721 	0x928c, 0xffffffff, 0x0010000f,
722 	0x9290, 0xffffffff, 0x00120011,
723 	0x9294, 0xffffffff, 0x00000015,
724 	0x9298, 0xffffffff, 0x00140013,
725 	0x929c, 0xffffffff, 0x00170016,
726 	0x9150, 0xffffffff, 0x96940200,
727 	0x8708, 0xffffffff, 0x00900100,
728 	0xc478, 0xffffffff, 0x00000080,
729 	0xc404, 0xffffffff, 0x0020003f,
730 	0x30, 0xffffffff, 0x0000001c,
731 	0x34, 0x000f0000, 0x000f0000,
732 	0x160c, 0xffffffff, 0x00000100,
733 	0x1024, 0xffffffff, 0x00000100,
734 	0x102c, 0x00000101, 0x00000000,
735 	0x20a8, 0xffffffff, 0x00000104,
736 	0x264c, 0x000c0000, 0x000c0000,
737 	0x2648, 0x000c0000, 0x000c0000,
738 	0x55e4, 0xff000fff, 0x00000100,
739 	0x55e8, 0x00000001, 0x00000001,
740 	0x2f50, 0x00000001, 0x00000001,
741 	0x30cc, 0xc0000fff, 0x00000104,
742 	0xc1e4, 0x00000001, 0x00000001,
743 	0xd0c0, 0xfffffff0, 0x00000100,
744 	0xd8c0, 0xfffffff0, 0x00000100
745 };
746 
747 static const u32 pitcairn_mgcg_cgcg_init[] =
748 {
749 	0xc400, 0xffffffff, 0xfffffffc,
750 	0x802c, 0xffffffff, 0xe0000000,
751 	0x9a60, 0xffffffff, 0x00000100,
752 	0x92a4, 0xffffffff, 0x00000100,
753 	0xc164, 0xffffffff, 0x00000100,
754 	0x9774, 0xffffffff, 0x00000100,
755 	0x8984, 0xffffffff, 0x06000100,
756 	0x8a18, 0xffffffff, 0x00000100,
757 	0x92a0, 0xffffffff, 0x00000100,
758 	0xc380, 0xffffffff, 0x00000100,
759 	0x8b28, 0xffffffff, 0x00000100,
760 	0x9144, 0xffffffff, 0x00000100,
761 	0x8d88, 0xffffffff, 0x00000100,
762 	0x8d8c, 0xffffffff, 0x00000100,
763 	0x9030, 0xffffffff, 0x00000100,
764 	0x9034, 0xffffffff, 0x00000100,
765 	0x9038, 0xffffffff, 0x00000100,
766 	0x903c, 0xffffffff, 0x00000100,
767 	0xad80, 0xffffffff, 0x00000100,
768 	0xac54, 0xffffffff, 0x00000100,
769 	0x897c, 0xffffffff, 0x06000100,
770 	0x9868, 0xffffffff, 0x00000100,
771 	0x9510, 0xffffffff, 0x00000100,
772 	0xaf04, 0xffffffff, 0x00000100,
773 	0xae04, 0xffffffff, 0x00000100,
774 	0x949c, 0xffffffff, 0x00000100,
775 	0x802c, 0xffffffff, 0xe0000000,
776 	0x9160, 0xffffffff, 0x00010000,
777 	0x9164, 0xffffffff, 0x00030002,
778 	0x9168, 0xffffffff, 0x00040007,
779 	0x916c, 0xffffffff, 0x00060005,
780 	0x9170, 0xffffffff, 0x00090008,
781 	0x9174, 0xffffffff, 0x00020001,
782 	0x9178, 0xffffffff, 0x00040003,
783 	0x917c, 0xffffffff, 0x00000007,
784 	0x9180, 0xffffffff, 0x00060005,
785 	0x9184, 0xffffffff, 0x00090008,
786 	0x9188, 0xffffffff, 0x00030002,
787 	0x918c, 0xffffffff, 0x00050004,
788 	0x9190, 0xffffffff, 0x00000008,
789 	0x9194, 0xffffffff, 0x00070006,
790 	0x9198, 0xffffffff, 0x000a0009,
791 	0x919c, 0xffffffff, 0x00040003,
792 	0x91a0, 0xffffffff, 0x00060005,
793 	0x91a4, 0xffffffff, 0x00000009,
794 	0x91a8, 0xffffffff, 0x00080007,
795 	0x91ac, 0xffffffff, 0x000b000a,
796 	0x91b0, 0xffffffff, 0x00050004,
797 	0x91b4, 0xffffffff, 0x00070006,
798 	0x91b8, 0xffffffff, 0x0008000b,
799 	0x91bc, 0xffffffff, 0x000a0009,
800 	0x91c0, 0xffffffff, 0x000d000c,
801 	0x9200, 0xffffffff, 0x00090008,
802 	0x9204, 0xffffffff, 0x000b000a,
803 	0x9208, 0xffffffff, 0x000c000f,
804 	0x920c, 0xffffffff, 0x000e000d,
805 	0x9210, 0xffffffff, 0x00110010,
806 	0x9214, 0xffffffff, 0x000a0009,
807 	0x9218, 0xffffffff, 0x000c000b,
808 	0x921c, 0xffffffff, 0x0000000f,
809 	0x9220, 0xffffffff, 0x000e000d,
810 	0x9224, 0xffffffff, 0x00110010,
811 	0x9228, 0xffffffff, 0x000b000a,
812 	0x922c, 0xffffffff, 0x000d000c,
813 	0x9230, 0xffffffff, 0x00000010,
814 	0x9234, 0xffffffff, 0x000f000e,
815 	0x9238, 0xffffffff, 0x00120011,
816 	0x923c, 0xffffffff, 0x000c000b,
817 	0x9240, 0xffffffff, 0x000e000d,
818 	0x9244, 0xffffffff, 0x00000011,
819 	0x9248, 0xffffffff, 0x0010000f,
820 	0x924c, 0xffffffff, 0x00130012,
821 	0x9250, 0xffffffff, 0x000d000c,
822 	0x9254, 0xffffffff, 0x000f000e,
823 	0x9258, 0xffffffff, 0x00100013,
824 	0x925c, 0xffffffff, 0x00120011,
825 	0x9260, 0xffffffff, 0x00150014,
826 	0x9150, 0xffffffff, 0x96940200,
827 	0x8708, 0xffffffff, 0x00900100,
828 	0xc478, 0xffffffff, 0x00000080,
829 	0xc404, 0xffffffff, 0x0020003f,
830 	0x30, 0xffffffff, 0x0000001c,
831 	0x34, 0x000f0000, 0x000f0000,
832 	0x160c, 0xffffffff, 0x00000100,
833 	0x1024, 0xffffffff, 0x00000100,
834 	0x102c, 0x00000101, 0x00000000,
835 	0x20a8, 0xffffffff, 0x00000104,
836 	0x55e4, 0xff000fff, 0x00000100,
837 	0x55e8, 0x00000001, 0x00000001,
838 	0x2f50, 0x00000001, 0x00000001,
839 	0x30cc, 0xc0000fff, 0x00000104,
840 	0xc1e4, 0x00000001, 0x00000001,
841 	0xd0c0, 0xfffffff0, 0x00000100,
842 	0xd8c0, 0xfffffff0, 0x00000100
843 };
844 
845 static const u32 verde_mgcg_cgcg_init[] =
846 {
847 	0xc400, 0xffffffff, 0xfffffffc,
848 	0x802c, 0xffffffff, 0xe0000000,
849 	0x9a60, 0xffffffff, 0x00000100,
850 	0x92a4, 0xffffffff, 0x00000100,
851 	0xc164, 0xffffffff, 0x00000100,
852 	0x9774, 0xffffffff, 0x00000100,
853 	0x8984, 0xffffffff, 0x06000100,
854 	0x8a18, 0xffffffff, 0x00000100,
855 	0x92a0, 0xffffffff, 0x00000100,
856 	0xc380, 0xffffffff, 0x00000100,
857 	0x8b28, 0xffffffff, 0x00000100,
858 	0x9144, 0xffffffff, 0x00000100,
859 	0x8d88, 0xffffffff, 0x00000100,
860 	0x8d8c, 0xffffffff, 0x00000100,
861 	0x9030, 0xffffffff, 0x00000100,
862 	0x9034, 0xffffffff, 0x00000100,
863 	0x9038, 0xffffffff, 0x00000100,
864 	0x903c, 0xffffffff, 0x00000100,
865 	0xad80, 0xffffffff, 0x00000100,
866 	0xac54, 0xffffffff, 0x00000100,
867 	0x897c, 0xffffffff, 0x06000100,
868 	0x9868, 0xffffffff, 0x00000100,
869 	0x9510, 0xffffffff, 0x00000100,
870 	0xaf04, 0xffffffff, 0x00000100,
871 	0xae04, 0xffffffff, 0x00000100,
872 	0x949c, 0xffffffff, 0x00000100,
873 	0x802c, 0xffffffff, 0xe0000000,
874 	0x9160, 0xffffffff, 0x00010000,
875 	0x9164, 0xffffffff, 0x00030002,
876 	0x9168, 0xffffffff, 0x00040007,
877 	0x916c, 0xffffffff, 0x00060005,
878 	0x9170, 0xffffffff, 0x00090008,
879 	0x9174, 0xffffffff, 0x00020001,
880 	0x9178, 0xffffffff, 0x00040003,
881 	0x917c, 0xffffffff, 0x00000007,
882 	0x9180, 0xffffffff, 0x00060005,
883 	0x9184, 0xffffffff, 0x00090008,
884 	0x9188, 0xffffffff, 0x00030002,
885 	0x918c, 0xffffffff, 0x00050004,
886 	0x9190, 0xffffffff, 0x00000008,
887 	0x9194, 0xffffffff, 0x00070006,
888 	0x9198, 0xffffffff, 0x000a0009,
889 	0x919c, 0xffffffff, 0x00040003,
890 	0x91a0, 0xffffffff, 0x00060005,
891 	0x91a4, 0xffffffff, 0x00000009,
892 	0x91a8, 0xffffffff, 0x00080007,
893 	0x91ac, 0xffffffff, 0x000b000a,
894 	0x91b0, 0xffffffff, 0x00050004,
895 	0x91b4, 0xffffffff, 0x00070006,
896 	0x91b8, 0xffffffff, 0x0008000b,
897 	0x91bc, 0xffffffff, 0x000a0009,
898 	0x91c0, 0xffffffff, 0x000d000c,
899 	0x9200, 0xffffffff, 0x00090008,
900 	0x9204, 0xffffffff, 0x000b000a,
901 	0x9208, 0xffffffff, 0x000c000f,
902 	0x920c, 0xffffffff, 0x000e000d,
903 	0x9210, 0xffffffff, 0x00110010,
904 	0x9214, 0xffffffff, 0x000a0009,
905 	0x9218, 0xffffffff, 0x000c000b,
906 	0x921c, 0xffffffff, 0x0000000f,
907 	0x9220, 0xffffffff, 0x000e000d,
908 	0x9224, 0xffffffff, 0x00110010,
909 	0x9228, 0xffffffff, 0x000b000a,
910 	0x922c, 0xffffffff, 0x000d000c,
911 	0x9230, 0xffffffff, 0x00000010,
912 	0x9234, 0xffffffff, 0x000f000e,
913 	0x9238, 0xffffffff, 0x00120011,
914 	0x923c, 0xffffffff, 0x000c000b,
915 	0x9240, 0xffffffff, 0x000e000d,
916 	0x9244, 0xffffffff, 0x00000011,
917 	0x9248, 0xffffffff, 0x0010000f,
918 	0x924c, 0xffffffff, 0x00130012,
919 	0x9250, 0xffffffff, 0x000d000c,
920 	0x9254, 0xffffffff, 0x000f000e,
921 	0x9258, 0xffffffff, 0x00100013,
922 	0x925c, 0xffffffff, 0x00120011,
923 	0x9260, 0xffffffff, 0x00150014,
924 	0x9150, 0xffffffff, 0x96940200,
925 	0x8708, 0xffffffff, 0x00900100,
926 	0xc478, 0xffffffff, 0x00000080,
927 	0xc404, 0xffffffff, 0x0020003f,
928 	0x30, 0xffffffff, 0x0000001c,
929 	0x34, 0x000f0000, 0x000f0000,
930 	0x160c, 0xffffffff, 0x00000100,
931 	0x1024, 0xffffffff, 0x00000100,
932 	0x102c, 0x00000101, 0x00000000,
933 	0x20a8, 0xffffffff, 0x00000104,
934 	0x264c, 0x000c0000, 0x000c0000,
935 	0x2648, 0x000c0000, 0x000c0000,
936 	0x55e4, 0xff000fff, 0x00000100,
937 	0x55e8, 0x00000001, 0x00000001,
938 	0x2f50, 0x00000001, 0x00000001,
939 	0x30cc, 0xc0000fff, 0x00000104,
940 	0xc1e4, 0x00000001, 0x00000001,
941 	0xd0c0, 0xfffffff0, 0x00000100,
942 	0xd8c0, 0xfffffff0, 0x00000100
943 };
944 
945 static const u32 oland_mgcg_cgcg_init[] =
946 {
947 	0xc400, 0xffffffff, 0xfffffffc,
948 	0x802c, 0xffffffff, 0xe0000000,
949 	0x9a60, 0xffffffff, 0x00000100,
950 	0x92a4, 0xffffffff, 0x00000100,
951 	0xc164, 0xffffffff, 0x00000100,
952 	0x9774, 0xffffffff, 0x00000100,
953 	0x8984, 0xffffffff, 0x06000100,
954 	0x8a18, 0xffffffff, 0x00000100,
955 	0x92a0, 0xffffffff, 0x00000100,
956 	0xc380, 0xffffffff, 0x00000100,
957 	0x8b28, 0xffffffff, 0x00000100,
958 	0x9144, 0xffffffff, 0x00000100,
959 	0x8d88, 0xffffffff, 0x00000100,
960 	0x8d8c, 0xffffffff, 0x00000100,
961 	0x9030, 0xffffffff, 0x00000100,
962 	0x9034, 0xffffffff, 0x00000100,
963 	0x9038, 0xffffffff, 0x00000100,
964 	0x903c, 0xffffffff, 0x00000100,
965 	0xad80, 0xffffffff, 0x00000100,
966 	0xac54, 0xffffffff, 0x00000100,
967 	0x897c, 0xffffffff, 0x06000100,
968 	0x9868, 0xffffffff, 0x00000100,
969 	0x9510, 0xffffffff, 0x00000100,
970 	0xaf04, 0xffffffff, 0x00000100,
971 	0xae04, 0xffffffff, 0x00000100,
972 	0x949c, 0xffffffff, 0x00000100,
973 	0x802c, 0xffffffff, 0xe0000000,
974 	0x9160, 0xffffffff, 0x00010000,
975 	0x9164, 0xffffffff, 0x00030002,
976 	0x9168, 0xffffffff, 0x00040007,
977 	0x916c, 0xffffffff, 0x00060005,
978 	0x9170, 0xffffffff, 0x00090008,
979 	0x9174, 0xffffffff, 0x00020001,
980 	0x9178, 0xffffffff, 0x00040003,
981 	0x917c, 0xffffffff, 0x00000007,
982 	0x9180, 0xffffffff, 0x00060005,
983 	0x9184, 0xffffffff, 0x00090008,
984 	0x9188, 0xffffffff, 0x00030002,
985 	0x918c, 0xffffffff, 0x00050004,
986 	0x9190, 0xffffffff, 0x00000008,
987 	0x9194, 0xffffffff, 0x00070006,
988 	0x9198, 0xffffffff, 0x000a0009,
989 	0x919c, 0xffffffff, 0x00040003,
990 	0x91a0, 0xffffffff, 0x00060005,
991 	0x91a4, 0xffffffff, 0x00000009,
992 	0x91a8, 0xffffffff, 0x00080007,
993 	0x91ac, 0xffffffff, 0x000b000a,
994 	0x91b0, 0xffffffff, 0x00050004,
995 	0x91b4, 0xffffffff, 0x00070006,
996 	0x91b8, 0xffffffff, 0x0008000b,
997 	0x91bc, 0xffffffff, 0x000a0009,
998 	0x91c0, 0xffffffff, 0x000d000c,
999 	0x91c4, 0xffffffff, 0x00060005,
1000 	0x91c8, 0xffffffff, 0x00080007,
1001 	0x91cc, 0xffffffff, 0x0000000b,
1002 	0x91d0, 0xffffffff, 0x000a0009,
1003 	0x91d4, 0xffffffff, 0x000d000c,
1004 	0x9150, 0xffffffff, 0x96940200,
1005 	0x8708, 0xffffffff, 0x00900100,
1006 	0xc478, 0xffffffff, 0x00000080,
1007 	0xc404, 0xffffffff, 0x0020003f,
1008 	0x30, 0xffffffff, 0x0000001c,
1009 	0x34, 0x000f0000, 0x000f0000,
1010 	0x160c, 0xffffffff, 0x00000100,
1011 	0x1024, 0xffffffff, 0x00000100,
1012 	0x102c, 0x00000101, 0x00000000,
1013 	0x20a8, 0xffffffff, 0x00000104,
1014 	0x264c, 0x000c0000, 0x000c0000,
1015 	0x2648, 0x000c0000, 0x000c0000,
1016 	0x55e4, 0xff000fff, 0x00000100,
1017 	0x55e8, 0x00000001, 0x00000001,
1018 	0x2f50, 0x00000001, 0x00000001,
1019 	0x30cc, 0xc0000fff, 0x00000104,
1020 	0xc1e4, 0x00000001, 0x00000001,
1021 	0xd0c0, 0xfffffff0, 0x00000100,
1022 	0xd8c0, 0xfffffff0, 0x00000100
1023 };
1024 
1025 static const u32 hainan_mgcg_cgcg_init[] =
1026 {
1027 	0xc400, 0xffffffff, 0xfffffffc,
1028 	0x802c, 0xffffffff, 0xe0000000,
1029 	0x9a60, 0xffffffff, 0x00000100,
1030 	0x92a4, 0xffffffff, 0x00000100,
1031 	0xc164, 0xffffffff, 0x00000100,
1032 	0x9774, 0xffffffff, 0x00000100,
1033 	0x8984, 0xffffffff, 0x06000100,
1034 	0x8a18, 0xffffffff, 0x00000100,
1035 	0x92a0, 0xffffffff, 0x00000100,
1036 	0xc380, 0xffffffff, 0x00000100,
1037 	0x8b28, 0xffffffff, 0x00000100,
1038 	0x9144, 0xffffffff, 0x00000100,
1039 	0x8d88, 0xffffffff, 0x00000100,
1040 	0x8d8c, 0xffffffff, 0x00000100,
1041 	0x9030, 0xffffffff, 0x00000100,
1042 	0x9034, 0xffffffff, 0x00000100,
1043 	0x9038, 0xffffffff, 0x00000100,
1044 	0x903c, 0xffffffff, 0x00000100,
1045 	0xad80, 0xffffffff, 0x00000100,
1046 	0xac54, 0xffffffff, 0x00000100,
1047 	0x897c, 0xffffffff, 0x06000100,
1048 	0x9868, 0xffffffff, 0x00000100,
1049 	0x9510, 0xffffffff, 0x00000100,
1050 	0xaf04, 0xffffffff, 0x00000100,
1051 	0xae04, 0xffffffff, 0x00000100,
1052 	0x949c, 0xffffffff, 0x00000100,
1053 	0x802c, 0xffffffff, 0xe0000000,
1054 	0x9160, 0xffffffff, 0x00010000,
1055 	0x9164, 0xffffffff, 0x00030002,
1056 	0x9168, 0xffffffff, 0x00040007,
1057 	0x916c, 0xffffffff, 0x00060005,
1058 	0x9170, 0xffffffff, 0x00090008,
1059 	0x9174, 0xffffffff, 0x00020001,
1060 	0x9178, 0xffffffff, 0x00040003,
1061 	0x917c, 0xffffffff, 0x00000007,
1062 	0x9180, 0xffffffff, 0x00060005,
1063 	0x9184, 0xffffffff, 0x00090008,
1064 	0x9188, 0xffffffff, 0x00030002,
1065 	0x918c, 0xffffffff, 0x00050004,
1066 	0x9190, 0xffffffff, 0x00000008,
1067 	0x9194, 0xffffffff, 0x00070006,
1068 	0x9198, 0xffffffff, 0x000a0009,
1069 	0x919c, 0xffffffff, 0x00040003,
1070 	0x91a0, 0xffffffff, 0x00060005,
1071 	0x91a4, 0xffffffff, 0x00000009,
1072 	0x91a8, 0xffffffff, 0x00080007,
1073 	0x91ac, 0xffffffff, 0x000b000a,
1074 	0x91b0, 0xffffffff, 0x00050004,
1075 	0x91b4, 0xffffffff, 0x00070006,
1076 	0x91b8, 0xffffffff, 0x0008000b,
1077 	0x91bc, 0xffffffff, 0x000a0009,
1078 	0x91c0, 0xffffffff, 0x000d000c,
1079 	0x91c4, 0xffffffff, 0x00060005,
1080 	0x91c8, 0xffffffff, 0x00080007,
1081 	0x91cc, 0xffffffff, 0x0000000b,
1082 	0x91d0, 0xffffffff, 0x000a0009,
1083 	0x91d4, 0xffffffff, 0x000d000c,
1084 	0x9150, 0xffffffff, 0x96940200,
1085 	0x8708, 0xffffffff, 0x00900100,
1086 	0xc478, 0xffffffff, 0x00000080,
1087 	0xc404, 0xffffffff, 0x0020003f,
1088 	0x30, 0xffffffff, 0x0000001c,
1089 	0x34, 0x000f0000, 0x000f0000,
1090 	0x160c, 0xffffffff, 0x00000100,
1091 	0x1024, 0xffffffff, 0x00000100,
1092 	0x20a8, 0xffffffff, 0x00000104,
1093 	0x264c, 0x000c0000, 0x000c0000,
1094 	0x2648, 0x000c0000, 0x000c0000,
1095 	0x2f50, 0x00000001, 0x00000001,
1096 	0x30cc, 0xc0000fff, 0x00000104,
1097 	0xc1e4, 0x00000001, 0x00000001,
1098 	0xd0c0, 0xfffffff0, 0x00000100,
1099 	0xd8c0, 0xfffffff0, 0x00000100
1100 };
1101 
1102 static u32 verde_pg_init[] =
1103 {
1104 	0x353c, 0xffffffff, 0x40000,
1105 	0x3538, 0xffffffff, 0x200010ff,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x0,
1108 	0x353c, 0xffffffff, 0x0,
1109 	0x353c, 0xffffffff, 0x0,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x7007,
1112 	0x3538, 0xffffffff, 0x300010ff,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x353c, 0xffffffff, 0x0,
1116 	0x353c, 0xffffffff, 0x0,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x400000,
1119 	0x3538, 0xffffffff, 0x100010ff,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x353c, 0xffffffff, 0x0,
1122 	0x353c, 0xffffffff, 0x0,
1123 	0x353c, 0xffffffff, 0x0,
1124 	0x353c, 0xffffffff, 0x0,
1125 	0x353c, 0xffffffff, 0x120200,
1126 	0x3538, 0xffffffff, 0x500010ff,
1127 	0x353c, 0xffffffff, 0x0,
1128 	0x353c, 0xffffffff, 0x0,
1129 	0x353c, 0xffffffff, 0x0,
1130 	0x353c, 0xffffffff, 0x0,
1131 	0x353c, 0xffffffff, 0x0,
1132 	0x353c, 0xffffffff, 0x1e1e16,
1133 	0x3538, 0xffffffff, 0x600010ff,
1134 	0x353c, 0xffffffff, 0x0,
1135 	0x353c, 0xffffffff, 0x0,
1136 	0x353c, 0xffffffff, 0x0,
1137 	0x353c, 0xffffffff, 0x0,
1138 	0x353c, 0xffffffff, 0x0,
1139 	0x353c, 0xffffffff, 0x171f1e,
1140 	0x3538, 0xffffffff, 0x700010ff,
1141 	0x353c, 0xffffffff, 0x0,
1142 	0x353c, 0xffffffff, 0x0,
1143 	0x353c, 0xffffffff, 0x0,
1144 	0x353c, 0xffffffff, 0x0,
1145 	0x353c, 0xffffffff, 0x0,
1146 	0x353c, 0xffffffff, 0x0,
1147 	0x3538, 0xffffffff, 0x9ff,
1148 	0x3500, 0xffffffff, 0x0,
1149 	0x3504, 0xffffffff, 0x10000800,
1150 	0x3504, 0xffffffff, 0xf,
1151 	0x3504, 0xffffffff, 0xf,
1152 	0x3500, 0xffffffff, 0x4,
1153 	0x3504, 0xffffffff, 0x1000051e,
1154 	0x3504, 0xffffffff, 0xffff,
1155 	0x3504, 0xffffffff, 0xffff,
1156 	0x3500, 0xffffffff, 0x8,
1157 	0x3504, 0xffffffff, 0x80500,
1158 	0x3500, 0xffffffff, 0x12,
1159 	0x3504, 0xffffffff, 0x9050c,
1160 	0x3500, 0xffffffff, 0x1d,
1161 	0x3504, 0xffffffff, 0xb052c,
1162 	0x3500, 0xffffffff, 0x2a,
1163 	0x3504, 0xffffffff, 0x1053e,
1164 	0x3500, 0xffffffff, 0x2d,
1165 	0x3504, 0xffffffff, 0x10546,
1166 	0x3500, 0xffffffff, 0x30,
1167 	0x3504, 0xffffffff, 0xa054e,
1168 	0x3500, 0xffffffff, 0x3c,
1169 	0x3504, 0xffffffff, 0x1055f,
1170 	0x3500, 0xffffffff, 0x3f,
1171 	0x3504, 0xffffffff, 0x10567,
1172 	0x3500, 0xffffffff, 0x42,
1173 	0x3504, 0xffffffff, 0x1056f,
1174 	0x3500, 0xffffffff, 0x45,
1175 	0x3504, 0xffffffff, 0x10572,
1176 	0x3500, 0xffffffff, 0x48,
1177 	0x3504, 0xffffffff, 0x20575,
1178 	0x3500, 0xffffffff, 0x4c,
1179 	0x3504, 0xffffffff, 0x190801,
1180 	0x3500, 0xffffffff, 0x67,
1181 	0x3504, 0xffffffff, 0x1082a,
1182 	0x3500, 0xffffffff, 0x6a,
1183 	0x3504, 0xffffffff, 0x1b082d,
1184 	0x3500, 0xffffffff, 0x87,
1185 	0x3504, 0xffffffff, 0x310851,
1186 	0x3500, 0xffffffff, 0xba,
1187 	0x3504, 0xffffffff, 0x891,
1188 	0x3500, 0xffffffff, 0xbc,
1189 	0x3504, 0xffffffff, 0x893,
1190 	0x3500, 0xffffffff, 0xbe,
1191 	0x3504, 0xffffffff, 0x20895,
1192 	0x3500, 0xffffffff, 0xc2,
1193 	0x3504, 0xffffffff, 0x20899,
1194 	0x3500, 0xffffffff, 0xc6,
1195 	0x3504, 0xffffffff, 0x2089d,
1196 	0x3500, 0xffffffff, 0xca,
1197 	0x3504, 0xffffffff, 0x8a1,
1198 	0x3500, 0xffffffff, 0xcc,
1199 	0x3504, 0xffffffff, 0x8a3,
1200 	0x3500, 0xffffffff, 0xce,
1201 	0x3504, 0xffffffff, 0x308a5,
1202 	0x3500, 0xffffffff, 0xd3,
1203 	0x3504, 0xffffffff, 0x6d08cd,
1204 	0x3500, 0xffffffff, 0x142,
1205 	0x3504, 0xffffffff, 0x2000095a,
1206 	0x3504, 0xffffffff, 0x1,
1207 	0x3500, 0xffffffff, 0x144,
1208 	0x3504, 0xffffffff, 0x301f095b,
1209 	0x3500, 0xffffffff, 0x165,
1210 	0x3504, 0xffffffff, 0xc094d,
1211 	0x3500, 0xffffffff, 0x173,
1212 	0x3504, 0xffffffff, 0xf096d,
1213 	0x3500, 0xffffffff, 0x184,
1214 	0x3504, 0xffffffff, 0x15097f,
1215 	0x3500, 0xffffffff, 0x19b,
1216 	0x3504, 0xffffffff, 0xc0998,
1217 	0x3500, 0xffffffff, 0x1a9,
1218 	0x3504, 0xffffffff, 0x409a7,
1219 	0x3500, 0xffffffff, 0x1af,
1220 	0x3504, 0xffffffff, 0xcdc,
1221 	0x3500, 0xffffffff, 0x1b1,
1222 	0x3504, 0xffffffff, 0x800,
1223 	0x3508, 0xffffffff, 0x6c9b2000,
1224 	0x3510, 0xfc00, 0x2000,
1225 	0x3544, 0xffffffff, 0xfc0,
1226 	0x28d4, 0x00000100, 0x100
1227 };
1228 
1229 static void si_init_golden_registers(struct radeon_device *rdev)
1230 {
1231 	switch (rdev->family) {
1232 	case CHIP_TAHITI:
1233 		radeon_program_register_sequence(rdev,
1234 						 tahiti_golden_registers,
1235 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1236 		radeon_program_register_sequence(rdev,
1237 						 tahiti_golden_rlc_registers,
1238 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1239 		radeon_program_register_sequence(rdev,
1240 						 tahiti_mgcg_cgcg_init,
1241 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1242 		radeon_program_register_sequence(rdev,
1243 						 tahiti_golden_registers2,
1244 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1245 		break;
1246 	case CHIP_PITCAIRN:
1247 		radeon_program_register_sequence(rdev,
1248 						 pitcairn_golden_registers,
1249 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1250 		radeon_program_register_sequence(rdev,
1251 						 pitcairn_golden_rlc_registers,
1252 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1253 		radeon_program_register_sequence(rdev,
1254 						 pitcairn_mgcg_cgcg_init,
1255 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1256 		break;
1257 	case CHIP_VERDE:
1258 		radeon_program_register_sequence(rdev,
1259 						 verde_golden_registers,
1260 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1261 		radeon_program_register_sequence(rdev,
1262 						 verde_golden_rlc_registers,
1263 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1264 		radeon_program_register_sequence(rdev,
1265 						 verde_mgcg_cgcg_init,
1266 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1267 		radeon_program_register_sequence(rdev,
1268 						 verde_pg_init,
1269 						 (const u32)ARRAY_SIZE(verde_pg_init));
1270 		break;
1271 	case CHIP_OLAND:
1272 		radeon_program_register_sequence(rdev,
1273 						 oland_golden_registers,
1274 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1275 		radeon_program_register_sequence(rdev,
1276 						 oland_golden_rlc_registers,
1277 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1278 		radeon_program_register_sequence(rdev,
1279 						 oland_mgcg_cgcg_init,
1280 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1281 		break;
1282 	case CHIP_HAINAN:
1283 		radeon_program_register_sequence(rdev,
1284 						 hainan_golden_registers,
1285 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1286 		radeon_program_register_sequence(rdev,
1287 						 hainan_golden_registers2,
1288 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1289 		radeon_program_register_sequence(rdev,
1290 						 hainan_mgcg_cgcg_init,
1291 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1292 		break;
1293 	default:
1294 		break;
1295 	}
1296 }
1297 
1298 /**
1299  * si_get_allowed_info_register - fetch the register for the info ioctl
1300  *
1301  * @rdev: radeon_device pointer
1302  * @reg: register offset in bytes
1303  * @val: register value
1304  *
1305  * Returns 0 for success or -EINVAL for an invalid register
1306  *
1307  */
1308 int si_get_allowed_info_register(struct radeon_device *rdev,
1309 				 u32 reg, u32 *val)
1310 {
1311 	switch (reg) {
1312 	case GRBM_STATUS:
1313 	case GRBM_STATUS2:
1314 	case GRBM_STATUS_SE0:
1315 	case GRBM_STATUS_SE1:
1316 	case SRBM_STATUS:
1317 	case SRBM_STATUS2:
1318 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1319 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1320 	case UVD_STATUS:
1321 		*val = RREG32(reg);
1322 		return 0;
1323 	default:
1324 		return -EINVAL;
1325 	}
1326 }
1327 
1328 #define PCIE_BUS_CLK                10000
1329 #define TCLK                        (PCIE_BUS_CLK / 10)
1330 
1331 /**
1332  * si_get_xclk - get the xclk
1333  *
1334  * @rdev: radeon_device pointer
1335  *
1336  * Returns the reference clock used by the gfx engine
1337  * (SI).
1338  */
1339 u32 si_get_xclk(struct radeon_device *rdev)
1340 {
1341 	u32 reference_clock = rdev->clock.spll.reference_freq;
1342 	u32 tmp;
1343 
1344 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1345 	if (tmp & MUX_TCLK_TO_XCLK)
1346 		return TCLK;
1347 
1348 	tmp = RREG32(CG_CLKPIN_CNTL);
1349 	if (tmp & XTALIN_DIVIDE)
1350 		return reference_clock / 4;
1351 
1352 	return reference_clock;
1353 }
1354 
1355 /* get temperature in millidegrees */
1356 int si_get_temp(struct radeon_device *rdev)
1357 {
1358 	u32 temp;
1359 	int actual_temp = 0;
1360 
1361 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1362 		CTF_TEMP_SHIFT;
1363 
1364 	if (temp & 0x200)
1365 		actual_temp = 255;
1366 	else
1367 		actual_temp = temp & 0x1ff;
1368 
1369 	actual_temp = (actual_temp * 1000);
1370 
1371 	return actual_temp;
1372 }
1373 
1374 #define TAHITI_IO_MC_REGS_SIZE 36
1375 
1376 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1377 	{0x0000006f, 0x03044000},
1378 	{0x00000070, 0x0480c018},
1379 	{0x00000071, 0x00000040},
1380 	{0x00000072, 0x01000000},
1381 	{0x00000074, 0x000000ff},
1382 	{0x00000075, 0x00143400},
1383 	{0x00000076, 0x08ec0800},
1384 	{0x00000077, 0x040000cc},
1385 	{0x00000079, 0x00000000},
1386 	{0x0000007a, 0x21000409},
1387 	{0x0000007c, 0x00000000},
1388 	{0x0000007d, 0xe8000000},
1389 	{0x0000007e, 0x044408a8},
1390 	{0x0000007f, 0x00000003},
1391 	{0x00000080, 0x00000000},
1392 	{0x00000081, 0x01000000},
1393 	{0x00000082, 0x02000000},
1394 	{0x00000083, 0x00000000},
1395 	{0x00000084, 0xe3f3e4f4},
1396 	{0x00000085, 0x00052024},
1397 	{0x00000087, 0x00000000},
1398 	{0x00000088, 0x66036603},
1399 	{0x00000089, 0x01000000},
1400 	{0x0000008b, 0x1c0a0000},
1401 	{0x0000008c, 0xff010000},
1402 	{0x0000008e, 0xffffefff},
1403 	{0x0000008f, 0xfff3efff},
1404 	{0x00000090, 0xfff3efbf},
1405 	{0x00000094, 0x00101101},
1406 	{0x00000095, 0x00000fff},
1407 	{0x00000096, 0x00116fff},
1408 	{0x00000097, 0x60010000},
1409 	{0x00000098, 0x10010000},
1410 	{0x00000099, 0x00006000},
1411 	{0x0000009a, 0x00001000},
1412 	{0x0000009f, 0x00a77400}
1413 };
1414 
1415 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1416 	{0x0000006f, 0x03044000},
1417 	{0x00000070, 0x0480c018},
1418 	{0x00000071, 0x00000040},
1419 	{0x00000072, 0x01000000},
1420 	{0x00000074, 0x000000ff},
1421 	{0x00000075, 0x00143400},
1422 	{0x00000076, 0x08ec0800},
1423 	{0x00000077, 0x040000cc},
1424 	{0x00000079, 0x00000000},
1425 	{0x0000007a, 0x21000409},
1426 	{0x0000007c, 0x00000000},
1427 	{0x0000007d, 0xe8000000},
1428 	{0x0000007e, 0x044408a8},
1429 	{0x0000007f, 0x00000003},
1430 	{0x00000080, 0x00000000},
1431 	{0x00000081, 0x01000000},
1432 	{0x00000082, 0x02000000},
1433 	{0x00000083, 0x00000000},
1434 	{0x00000084, 0xe3f3e4f4},
1435 	{0x00000085, 0x00052024},
1436 	{0x00000087, 0x00000000},
1437 	{0x00000088, 0x66036603},
1438 	{0x00000089, 0x01000000},
1439 	{0x0000008b, 0x1c0a0000},
1440 	{0x0000008c, 0xff010000},
1441 	{0x0000008e, 0xffffefff},
1442 	{0x0000008f, 0xfff3efff},
1443 	{0x00000090, 0xfff3efbf},
1444 	{0x00000094, 0x00101101},
1445 	{0x00000095, 0x00000fff},
1446 	{0x00000096, 0x00116fff},
1447 	{0x00000097, 0x60010000},
1448 	{0x00000098, 0x10010000},
1449 	{0x00000099, 0x00006000},
1450 	{0x0000009a, 0x00001000},
1451 	{0x0000009f, 0x00a47400}
1452 };
1453 
1454 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1455 	{0x0000006f, 0x03044000},
1456 	{0x00000070, 0x0480c018},
1457 	{0x00000071, 0x00000040},
1458 	{0x00000072, 0x01000000},
1459 	{0x00000074, 0x000000ff},
1460 	{0x00000075, 0x00143400},
1461 	{0x00000076, 0x08ec0800},
1462 	{0x00000077, 0x040000cc},
1463 	{0x00000079, 0x00000000},
1464 	{0x0000007a, 0x21000409},
1465 	{0x0000007c, 0x00000000},
1466 	{0x0000007d, 0xe8000000},
1467 	{0x0000007e, 0x044408a8},
1468 	{0x0000007f, 0x00000003},
1469 	{0x00000080, 0x00000000},
1470 	{0x00000081, 0x01000000},
1471 	{0x00000082, 0x02000000},
1472 	{0x00000083, 0x00000000},
1473 	{0x00000084, 0xe3f3e4f4},
1474 	{0x00000085, 0x00052024},
1475 	{0x00000087, 0x00000000},
1476 	{0x00000088, 0x66036603},
1477 	{0x00000089, 0x01000000},
1478 	{0x0000008b, 0x1c0a0000},
1479 	{0x0000008c, 0xff010000},
1480 	{0x0000008e, 0xffffefff},
1481 	{0x0000008f, 0xfff3efff},
1482 	{0x00000090, 0xfff3efbf},
1483 	{0x00000094, 0x00101101},
1484 	{0x00000095, 0x00000fff},
1485 	{0x00000096, 0x00116fff},
1486 	{0x00000097, 0x60010000},
1487 	{0x00000098, 0x10010000},
1488 	{0x00000099, 0x00006000},
1489 	{0x0000009a, 0x00001000},
1490 	{0x0000009f, 0x00a37400}
1491 };
1492 
1493 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1494 	{0x0000006f, 0x03044000},
1495 	{0x00000070, 0x0480c018},
1496 	{0x00000071, 0x00000040},
1497 	{0x00000072, 0x01000000},
1498 	{0x00000074, 0x000000ff},
1499 	{0x00000075, 0x00143400},
1500 	{0x00000076, 0x08ec0800},
1501 	{0x00000077, 0x040000cc},
1502 	{0x00000079, 0x00000000},
1503 	{0x0000007a, 0x21000409},
1504 	{0x0000007c, 0x00000000},
1505 	{0x0000007d, 0xe8000000},
1506 	{0x0000007e, 0x044408a8},
1507 	{0x0000007f, 0x00000003},
1508 	{0x00000080, 0x00000000},
1509 	{0x00000081, 0x01000000},
1510 	{0x00000082, 0x02000000},
1511 	{0x00000083, 0x00000000},
1512 	{0x00000084, 0xe3f3e4f4},
1513 	{0x00000085, 0x00052024},
1514 	{0x00000087, 0x00000000},
1515 	{0x00000088, 0x66036603},
1516 	{0x00000089, 0x01000000},
1517 	{0x0000008b, 0x1c0a0000},
1518 	{0x0000008c, 0xff010000},
1519 	{0x0000008e, 0xffffefff},
1520 	{0x0000008f, 0xfff3efff},
1521 	{0x00000090, 0xfff3efbf},
1522 	{0x00000094, 0x00101101},
1523 	{0x00000095, 0x00000fff},
1524 	{0x00000096, 0x00116fff},
1525 	{0x00000097, 0x60010000},
1526 	{0x00000098, 0x10010000},
1527 	{0x00000099, 0x00006000},
1528 	{0x0000009a, 0x00001000},
1529 	{0x0000009f, 0x00a17730}
1530 };
1531 
1532 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1533 	{0x0000006f, 0x03044000},
1534 	{0x00000070, 0x0480c018},
1535 	{0x00000071, 0x00000040},
1536 	{0x00000072, 0x01000000},
1537 	{0x00000074, 0x000000ff},
1538 	{0x00000075, 0x00143400},
1539 	{0x00000076, 0x08ec0800},
1540 	{0x00000077, 0x040000cc},
1541 	{0x00000079, 0x00000000},
1542 	{0x0000007a, 0x21000409},
1543 	{0x0000007c, 0x00000000},
1544 	{0x0000007d, 0xe8000000},
1545 	{0x0000007e, 0x044408a8},
1546 	{0x0000007f, 0x00000003},
1547 	{0x00000080, 0x00000000},
1548 	{0x00000081, 0x01000000},
1549 	{0x00000082, 0x02000000},
1550 	{0x00000083, 0x00000000},
1551 	{0x00000084, 0xe3f3e4f4},
1552 	{0x00000085, 0x00052024},
1553 	{0x00000087, 0x00000000},
1554 	{0x00000088, 0x66036603},
1555 	{0x00000089, 0x01000000},
1556 	{0x0000008b, 0x1c0a0000},
1557 	{0x0000008c, 0xff010000},
1558 	{0x0000008e, 0xffffefff},
1559 	{0x0000008f, 0xfff3efff},
1560 	{0x00000090, 0xfff3efbf},
1561 	{0x00000094, 0x00101101},
1562 	{0x00000095, 0x00000fff},
1563 	{0x00000096, 0x00116fff},
1564 	{0x00000097, 0x60010000},
1565 	{0x00000098, 0x10010000},
1566 	{0x00000099, 0x00006000},
1567 	{0x0000009a, 0x00001000},
1568 	{0x0000009f, 0x00a07730}
1569 };
1570 
1571 /* ucode loading */
1572 int si_mc_load_microcode(struct radeon_device *rdev)
1573 {
1574 	const __be32 *fw_data = NULL;
1575 	const __le32 *new_fw_data = NULL;
1576 	u32 running;
1577 	u32 *io_mc_regs = NULL;
1578 	const __le32 *new_io_mc_regs = NULL;
1579 	int i, regs_size, ucode_size;
1580 
1581 	if (!rdev->mc_fw)
1582 		return -EINVAL;
1583 
1584 	if (rdev->new_fw) {
1585 		const struct mc_firmware_header_v1_0 *hdr =
1586 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1587 
1588 		radeon_ucode_print_mc_hdr(&hdr->header);
1589 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1590 		new_io_mc_regs = (const __le32 *)
1591 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1592 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1593 		new_fw_data = (const __le32 *)
1594 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1595 	} else {
1596 		ucode_size = rdev->mc_fw->size / 4;
1597 
1598 		switch (rdev->family) {
1599 		case CHIP_TAHITI:
1600 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1601 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1602 			break;
1603 		case CHIP_PITCAIRN:
1604 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1605 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1606 			break;
1607 		case CHIP_VERDE:
1608 		default:
1609 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1610 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1611 			break;
1612 		case CHIP_OLAND:
1613 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1614 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1615 			break;
1616 		case CHIP_HAINAN:
1617 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1618 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1619 			break;
1620 		}
1621 		fw_data = (const __be32 *)rdev->mc_fw->data;
1622 	}
1623 
1624 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1625 
1626 	if (running == 0) {
1627 		/* reset the engine and set to writable */
1628 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1629 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1630 
1631 		/* load mc io regs */
1632 		for (i = 0; i < regs_size; i++) {
1633 			if (rdev->new_fw) {
1634 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1635 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1636 			} else {
1637 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1638 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1639 			}
1640 		}
1641 		/* load the MC ucode */
1642 		for (i = 0; i < ucode_size; i++) {
1643 			if (rdev->new_fw)
1644 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1645 			else
1646 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1647 		}
1648 
1649 		/* put the engine back into the active state */
1650 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1651 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1652 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1653 
1654 		/* wait for training to complete */
1655 		for (i = 0; i < rdev->usec_timeout; i++) {
1656 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1657 				break;
1658 			udelay(1);
1659 		}
1660 		for (i = 0; i < rdev->usec_timeout; i++) {
1661 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1662 				break;
1663 			udelay(1);
1664 		}
1665 	}
1666 
1667 	return 0;
1668 }
1669 
1670 static int si_init_microcode(struct radeon_device *rdev)
1671 {
1672 	const char *chip_name;
1673 	const char *new_chip_name;
1674 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1675 	size_t smc_req_size, mc2_req_size;
1676 	char fw_name[30];
1677 	int err;
1678 	int new_fw = 0;
1679 	bool new_smc = false;
1680 	bool si58_fw = false;
1681 	bool banks2_fw = false;
1682 
1683 	DRM_DEBUG("\n");
1684 
1685 	switch (rdev->family) {
1686 	case CHIP_TAHITI:
1687 		chip_name = "TAHITI";
1688 		new_chip_name = "tahiti";
1689 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1690 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1691 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1692 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1693 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1694 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1695 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1696 		break;
1697 	case CHIP_PITCAIRN:
1698 		chip_name = "PITCAIRN";
1699 		if ((rdev->pdev->revision == 0x81) &&
1700 		    ((rdev->pdev->device == 0x6810) ||
1701 		     (rdev->pdev->device == 0x6811)))
1702 			new_smc = true;
1703 		new_chip_name = "pitcairn";
1704 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1705 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1706 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1707 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1708 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1709 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1710 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1711 		break;
1712 	case CHIP_VERDE:
1713 		chip_name = "VERDE";
1714 		if (((rdev->pdev->device == 0x6820) &&
1715 		     ((rdev->pdev->revision == 0x81) ||
1716 		      (rdev->pdev->revision == 0x83))) ||
1717 		    ((rdev->pdev->device == 0x6821) &&
1718 		     ((rdev->pdev->revision == 0x83) ||
1719 		      (rdev->pdev->revision == 0x87))) ||
1720 		    ((rdev->pdev->revision == 0x87) &&
1721 		     ((rdev->pdev->device == 0x6823) ||
1722 		      (rdev->pdev->device == 0x682b))))
1723 			new_smc = true;
1724 		new_chip_name = "verde";
1725 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1726 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1727 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1728 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1729 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1730 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1731 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1732 		break;
1733 	case CHIP_OLAND:
1734 		chip_name = "OLAND";
1735 		if (((rdev->pdev->revision == 0x81) &&
1736 		     ((rdev->pdev->device == 0x6600) ||
1737 		      (rdev->pdev->device == 0x6604) ||
1738 		      (rdev->pdev->device == 0x6605) ||
1739 		      (rdev->pdev->device == 0x6610))) ||
1740 		    ((rdev->pdev->revision == 0x83) &&
1741 		     (rdev->pdev->device == 0x6610)))
1742 			new_smc = true;
1743 		new_chip_name = "oland";
1744 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1745 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1746 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1747 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1748 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1749 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1750 		break;
1751 	case CHIP_HAINAN:
1752 		chip_name = "HAINAN";
1753 		if (((rdev->pdev->revision == 0x81) &&
1754 		     (rdev->pdev->device == 0x6660)) ||
1755 		    ((rdev->pdev->revision == 0x83) &&
1756 		     ((rdev->pdev->device == 0x6660) ||
1757 		      (rdev->pdev->device == 0x6663) ||
1758 		      (rdev->pdev->device == 0x6665) ||
1759 		      (rdev->pdev->device == 0x6667))))
1760 			new_smc = true;
1761 		else if ((rdev->pdev->revision == 0xc3) &&
1762 			 (rdev->pdev->device == 0x6665))
1763 			banks2_fw = true;
1764 		new_chip_name = "hainan";
1765 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1766 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1767 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1768 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1769 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1770 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1771 		break;
1772 	default: BUG();
1773 	}
1774 
1775 	/* this memory configuration requires special firmware */
1776 	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1777 		si58_fw = true;
1778 
1779 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1780 
1781 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1782 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1783 	if (err) {
1784 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1785 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1786 		if (err)
1787 			goto out;
1788 		if (rdev->pfp_fw->size != pfp_req_size) {
1789 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1790 			       rdev->pfp_fw->size, fw_name);
1791 			err = -EINVAL;
1792 			goto out;
1793 		}
1794 	} else {
1795 		err = radeon_ucode_validate(rdev->pfp_fw);
1796 		if (err) {
1797 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1798 			       fw_name);
1799 			goto out;
1800 		} else {
1801 			new_fw++;
1802 		}
1803 	}
1804 
1805 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1806 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1807 	if (err) {
1808 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1809 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1810 		if (err)
1811 			goto out;
1812 		if (rdev->me_fw->size != me_req_size) {
1813 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1814 			       rdev->me_fw->size, fw_name);
1815 			err = -EINVAL;
1816 		}
1817 	} else {
1818 		err = radeon_ucode_validate(rdev->me_fw);
1819 		if (err) {
1820 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1821 			       fw_name);
1822 			goto out;
1823 		} else {
1824 			new_fw++;
1825 		}
1826 	}
1827 
1828 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1829 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1830 	if (err) {
1831 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1832 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1833 		if (err)
1834 			goto out;
1835 		if (rdev->ce_fw->size != ce_req_size) {
1836 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1837 			       rdev->ce_fw->size, fw_name);
1838 			err = -EINVAL;
1839 		}
1840 	} else {
1841 		err = radeon_ucode_validate(rdev->ce_fw);
1842 		if (err) {
1843 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1844 			       fw_name);
1845 			goto out;
1846 		} else {
1847 			new_fw++;
1848 		}
1849 	}
1850 
1851 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1852 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1853 	if (err) {
1854 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1855 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1856 		if (err)
1857 			goto out;
1858 		if (rdev->rlc_fw->size != rlc_req_size) {
1859 			pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1860 			       rdev->rlc_fw->size, fw_name);
1861 			err = -EINVAL;
1862 		}
1863 	} else {
1864 		err = radeon_ucode_validate(rdev->rlc_fw);
1865 		if (err) {
1866 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1867 			       fw_name);
1868 			goto out;
1869 		} else {
1870 			new_fw++;
1871 		}
1872 	}
1873 
1874 	if (si58_fw)
1875 		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1876 	else
1877 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1878 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1879 	if (err) {
1880 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1881 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1882 		if (err) {
1883 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1884 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1885 			if (err)
1886 				goto out;
1887 		}
1888 		if ((rdev->mc_fw->size != mc_req_size) &&
1889 		    (rdev->mc_fw->size != mc2_req_size)) {
1890 			pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1891 			       rdev->mc_fw->size, fw_name);
1892 			err = -EINVAL;
1893 		}
1894 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1895 	} else {
1896 		err = radeon_ucode_validate(rdev->mc_fw);
1897 		if (err) {
1898 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1899 			       fw_name);
1900 			goto out;
1901 		} else {
1902 			new_fw++;
1903 		}
1904 	}
1905 
1906 	if (banks2_fw)
1907 		snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1908 	else if (new_smc)
1909 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1910 	else
1911 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1912 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1913 	if (err) {
1914 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1915 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1916 		if (err) {
1917 			pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1918 			release_firmware(rdev->smc_fw);
1919 			rdev->smc_fw = NULL;
1920 			err = 0;
1921 		} else if (rdev->smc_fw->size != smc_req_size) {
1922 			pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1923 			       rdev->smc_fw->size, fw_name);
1924 			err = -EINVAL;
1925 		}
1926 	} else {
1927 		err = radeon_ucode_validate(rdev->smc_fw);
1928 		if (err) {
1929 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1930 			       fw_name);
1931 			goto out;
1932 		} else {
1933 			new_fw++;
1934 		}
1935 	}
1936 
1937 	if (new_fw == 0) {
1938 		rdev->new_fw = false;
1939 	} else if (new_fw < 6) {
1940 		pr_err("si_fw: mixing new and old firmware!\n");
1941 		err = -EINVAL;
1942 	} else {
1943 		rdev->new_fw = true;
1944 	}
1945 out:
1946 	if (err) {
1947 		if (err != -EINVAL)
1948 			pr_err("si_cp: Failed to load firmware \"%s\"\n",
1949 			       fw_name);
1950 		release_firmware(rdev->pfp_fw);
1951 		rdev->pfp_fw = NULL;
1952 		release_firmware(rdev->me_fw);
1953 		rdev->me_fw = NULL;
1954 		release_firmware(rdev->ce_fw);
1955 		rdev->ce_fw = NULL;
1956 		release_firmware(rdev->rlc_fw);
1957 		rdev->rlc_fw = NULL;
1958 		release_firmware(rdev->mc_fw);
1959 		rdev->mc_fw = NULL;
1960 		release_firmware(rdev->smc_fw);
1961 		rdev->smc_fw = NULL;
1962 	}
1963 	return err;
1964 }
1965 
1966 /* watermark setup */
1967 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1968 				   struct radeon_crtc *radeon_crtc,
1969 				   struct drm_display_mode *mode,
1970 				   struct drm_display_mode *other_mode)
1971 {
1972 	u32 tmp, buffer_alloc, i;
1973 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1974 	/*
1975 	 * Line Buffer Setup
1976 	 * There are 3 line buffers, each one shared by 2 display controllers.
1977 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1978 	 * the display controllers.  The paritioning is done via one of four
1979 	 * preset allocations specified in bits 21:20:
1980 	 *  0 - half lb
1981 	 *  2 - whole lb, other crtc must be disabled
1982 	 */
1983 	/* this can get tricky if we have two large displays on a paired group
1984 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1985 	 * non-linked crtcs for maximum line buffer allocation.
1986 	 */
1987 	if (radeon_crtc->base.enabled && mode) {
1988 		if (other_mode) {
1989 			tmp = 0; /* 1/2 */
1990 			buffer_alloc = 1;
1991 		} else {
1992 			tmp = 2; /* whole */
1993 			buffer_alloc = 2;
1994 		}
1995 	} else {
1996 		tmp = 0;
1997 		buffer_alloc = 0;
1998 	}
1999 
2000 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
2001 	       DC_LB_MEMORY_CONFIG(tmp));
2002 
2003 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
2004 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
2005 	for (i = 0; i < rdev->usec_timeout; i++) {
2006 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
2007 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
2008 			break;
2009 		udelay(1);
2010 	}
2011 
2012 	if (radeon_crtc->base.enabled && mode) {
2013 		switch (tmp) {
2014 		case 0:
2015 		default:
2016 			return 4096 * 2;
2017 		case 2:
2018 			return 8192 * 2;
2019 		}
2020 	}
2021 
2022 	/* controller not enabled, so no lb used */
2023 	return 0;
2024 }
2025 
2026 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2027 {
2028 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2029 
2030 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2031 	case 0:
2032 	default:
2033 		return 1;
2034 	case 1:
2035 		return 2;
2036 	case 2:
2037 		return 4;
2038 	case 3:
2039 		return 8;
2040 	case 4:
2041 		return 3;
2042 	case 5:
2043 		return 6;
2044 	case 6:
2045 		return 10;
2046 	case 7:
2047 		return 12;
2048 	case 8:
2049 		return 16;
2050 	}
2051 }
2052 
2053 struct dce6_wm_params {
2054 	u32 dram_channels; /* number of dram channels */
2055 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2056 	u32 sclk;          /* engine clock in kHz */
2057 	u32 disp_clk;      /* display clock in kHz */
2058 	u32 src_width;     /* viewport width */
2059 	u32 active_time;   /* active display time in ns */
2060 	u32 blank_time;    /* blank time in ns */
2061 	bool interlaced;    /* mode is interlaced */
2062 	fixed20_12 vsc;    /* vertical scale ratio */
2063 	u32 num_heads;     /* number of active crtcs */
2064 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2065 	u32 lb_size;       /* line buffer allocated to pipe */
2066 	u32 vtaps;         /* vertical scaler taps */
2067 };
2068 
2069 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2070 {
2071 	/* Calculate raw DRAM Bandwidth */
2072 	fixed20_12 dram_efficiency; /* 0.7 */
2073 	fixed20_12 yclk, dram_channels, bandwidth;
2074 	fixed20_12 a;
2075 
2076 	a.full = dfixed_const(1000);
2077 	yclk.full = dfixed_const(wm->yclk);
2078 	yclk.full = dfixed_div(yclk, a);
2079 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2080 	a.full = dfixed_const(10);
2081 	dram_efficiency.full = dfixed_const(7);
2082 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2083 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2084 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2085 
2086 	return dfixed_trunc(bandwidth);
2087 }
2088 
2089 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2090 {
2091 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2092 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2093 	fixed20_12 yclk, dram_channels, bandwidth;
2094 	fixed20_12 a;
2095 
2096 	a.full = dfixed_const(1000);
2097 	yclk.full = dfixed_const(wm->yclk);
2098 	yclk.full = dfixed_div(yclk, a);
2099 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2100 	a.full = dfixed_const(10);
2101 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2102 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2103 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2104 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2105 
2106 	return dfixed_trunc(bandwidth);
2107 }
2108 
2109 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2110 {
2111 	/* Calculate the display Data return Bandwidth */
2112 	fixed20_12 return_efficiency; /* 0.8 */
2113 	fixed20_12 sclk, bandwidth;
2114 	fixed20_12 a;
2115 
2116 	a.full = dfixed_const(1000);
2117 	sclk.full = dfixed_const(wm->sclk);
2118 	sclk.full = dfixed_div(sclk, a);
2119 	a.full = dfixed_const(10);
2120 	return_efficiency.full = dfixed_const(8);
2121 	return_efficiency.full = dfixed_div(return_efficiency, a);
2122 	a.full = dfixed_const(32);
2123 	bandwidth.full = dfixed_mul(a, sclk);
2124 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2125 
2126 	return dfixed_trunc(bandwidth);
2127 }
2128 
2129 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2130 {
2131 	return 32;
2132 }
2133 
2134 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2135 {
2136 	/* Calculate the DMIF Request Bandwidth */
2137 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2138 	fixed20_12 disp_clk, sclk, bandwidth;
2139 	fixed20_12 a, b1, b2;
2140 	u32 min_bandwidth;
2141 
2142 	a.full = dfixed_const(1000);
2143 	disp_clk.full = dfixed_const(wm->disp_clk);
2144 	disp_clk.full = dfixed_div(disp_clk, a);
2145 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2146 	b1.full = dfixed_mul(a, disp_clk);
2147 
2148 	a.full = dfixed_const(1000);
2149 	sclk.full = dfixed_const(wm->sclk);
2150 	sclk.full = dfixed_div(sclk, a);
2151 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2152 	b2.full = dfixed_mul(a, sclk);
2153 
2154 	a.full = dfixed_const(10);
2155 	disp_clk_request_efficiency.full = dfixed_const(8);
2156 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2157 
2158 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2159 
2160 	a.full = dfixed_const(min_bandwidth);
2161 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2162 
2163 	return dfixed_trunc(bandwidth);
2164 }
2165 
2166 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2167 {
2168 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2169 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2170 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2171 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2172 
2173 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2174 }
2175 
2176 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2177 {
2178 	/* Calculate the display mode Average Bandwidth
2179 	 * DisplayMode should contain the source and destination dimensions,
2180 	 * timing, etc.
2181 	 */
2182 	fixed20_12 bpp;
2183 	fixed20_12 line_time;
2184 	fixed20_12 src_width;
2185 	fixed20_12 bandwidth;
2186 	fixed20_12 a;
2187 
2188 	a.full = dfixed_const(1000);
2189 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2190 	line_time.full = dfixed_div(line_time, a);
2191 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2192 	src_width.full = dfixed_const(wm->src_width);
2193 	bandwidth.full = dfixed_mul(src_width, bpp);
2194 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2195 	bandwidth.full = dfixed_div(bandwidth, line_time);
2196 
2197 	return dfixed_trunc(bandwidth);
2198 }
2199 
2200 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2201 {
2202 	/* First calcualte the latency in ns */
2203 	u32 mc_latency = 2000; /* 2000 ns. */
2204 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2205 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2206 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2207 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2208 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2209 		(wm->num_heads * cursor_line_pair_return_time);
2210 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2211 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2212 	u32 tmp, dmif_size = 12288;
2213 	fixed20_12 a, b, c;
2214 
2215 	if (wm->num_heads == 0)
2216 		return 0;
2217 
2218 	a.full = dfixed_const(2);
2219 	b.full = dfixed_const(1);
2220 	if ((wm->vsc.full > a.full) ||
2221 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2222 	    (wm->vtaps >= 5) ||
2223 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2224 		max_src_lines_per_dst_line = 4;
2225 	else
2226 		max_src_lines_per_dst_line = 2;
2227 
2228 	a.full = dfixed_const(available_bandwidth);
2229 	b.full = dfixed_const(wm->num_heads);
2230 	a.full = dfixed_div(a, b);
2231 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2232 	tmp = min(dfixed_trunc(a), tmp);
2233 
2234 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2235 
2236 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2237 	b.full = dfixed_const(1000);
2238 	c.full = dfixed_const(lb_fill_bw);
2239 	b.full = dfixed_div(c, b);
2240 	a.full = dfixed_div(a, b);
2241 	line_fill_time = dfixed_trunc(a);
2242 
2243 	if (line_fill_time < wm->active_time)
2244 		return latency;
2245 	else
2246 		return latency + (line_fill_time - wm->active_time);
2247 
2248 }
2249 
2250 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2251 {
2252 	if (dce6_average_bandwidth(wm) <=
2253 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2254 		return true;
2255 	else
2256 		return false;
2257 };
2258 
2259 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2260 {
2261 	if (dce6_average_bandwidth(wm) <=
2262 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2263 		return true;
2264 	else
2265 		return false;
2266 };
2267 
2268 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2269 {
2270 	u32 lb_partitions = wm->lb_size / wm->src_width;
2271 	u32 line_time = wm->active_time + wm->blank_time;
2272 	u32 latency_tolerant_lines;
2273 	u32 latency_hiding;
2274 	fixed20_12 a;
2275 
2276 	a.full = dfixed_const(1);
2277 	if (wm->vsc.full > a.full)
2278 		latency_tolerant_lines = 1;
2279 	else {
2280 		if (lb_partitions <= (wm->vtaps + 1))
2281 			latency_tolerant_lines = 1;
2282 		else
2283 			latency_tolerant_lines = 2;
2284 	}
2285 
2286 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2287 
2288 	if (dce6_latency_watermark(wm) <= latency_hiding)
2289 		return true;
2290 	else
2291 		return false;
2292 }
2293 
2294 static void dce6_program_watermarks(struct radeon_device *rdev,
2295 					 struct radeon_crtc *radeon_crtc,
2296 					 u32 lb_size, u32 num_heads)
2297 {
2298 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2299 	struct dce6_wm_params wm_low, wm_high;
2300 	u32 dram_channels;
2301 	u32 active_time;
2302 	u32 line_time = 0;
2303 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2304 	u32 priority_a_mark = 0, priority_b_mark = 0;
2305 	u32 priority_a_cnt = PRIORITY_OFF;
2306 	u32 priority_b_cnt = PRIORITY_OFF;
2307 	u32 tmp, arb_control3;
2308 	fixed20_12 a, b, c;
2309 
2310 	if (radeon_crtc->base.enabled && num_heads && mode) {
2311 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2312 					    (u32)mode->clock);
2313 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2314 					  (u32)mode->clock);
2315 		line_time = min(line_time, (u32)65535);
2316 		priority_a_cnt = 0;
2317 		priority_b_cnt = 0;
2318 
2319 		if (rdev->family == CHIP_ARUBA)
2320 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2321 		else
2322 			dram_channels = si_get_number_of_dram_channels(rdev);
2323 
2324 		/* watermark for high clocks */
2325 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2326 			wm_high.yclk =
2327 				radeon_dpm_get_mclk(rdev, false) * 10;
2328 			wm_high.sclk =
2329 				radeon_dpm_get_sclk(rdev, false) * 10;
2330 		} else {
2331 			wm_high.yclk = rdev->pm.current_mclk * 10;
2332 			wm_high.sclk = rdev->pm.current_sclk * 10;
2333 		}
2334 
2335 		wm_high.disp_clk = mode->clock;
2336 		wm_high.src_width = mode->crtc_hdisplay;
2337 		wm_high.active_time = active_time;
2338 		wm_high.blank_time = line_time - wm_high.active_time;
2339 		wm_high.interlaced = false;
2340 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2341 			wm_high.interlaced = true;
2342 		wm_high.vsc = radeon_crtc->vsc;
2343 		wm_high.vtaps = 1;
2344 		if (radeon_crtc->rmx_type != RMX_OFF)
2345 			wm_high.vtaps = 2;
2346 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2347 		wm_high.lb_size = lb_size;
2348 		wm_high.dram_channels = dram_channels;
2349 		wm_high.num_heads = num_heads;
2350 
2351 		/* watermark for low clocks */
2352 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2353 			wm_low.yclk =
2354 				radeon_dpm_get_mclk(rdev, true) * 10;
2355 			wm_low.sclk =
2356 				radeon_dpm_get_sclk(rdev, true) * 10;
2357 		} else {
2358 			wm_low.yclk = rdev->pm.current_mclk * 10;
2359 			wm_low.sclk = rdev->pm.current_sclk * 10;
2360 		}
2361 
2362 		wm_low.disp_clk = mode->clock;
2363 		wm_low.src_width = mode->crtc_hdisplay;
2364 		wm_low.active_time = active_time;
2365 		wm_low.blank_time = line_time - wm_low.active_time;
2366 		wm_low.interlaced = false;
2367 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2368 			wm_low.interlaced = true;
2369 		wm_low.vsc = radeon_crtc->vsc;
2370 		wm_low.vtaps = 1;
2371 		if (radeon_crtc->rmx_type != RMX_OFF)
2372 			wm_low.vtaps = 2;
2373 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2374 		wm_low.lb_size = lb_size;
2375 		wm_low.dram_channels = dram_channels;
2376 		wm_low.num_heads = num_heads;
2377 
2378 		/* set for high clocks */
2379 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2380 		/* set for low clocks */
2381 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2382 
2383 		/* possibly force display priority to high */
2384 		/* should really do this at mode validation time... */
2385 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2386 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2387 		    !dce6_check_latency_hiding(&wm_high) ||
2388 		    (rdev->disp_priority == 2)) {
2389 			DRM_DEBUG_KMS("force priority to high\n");
2390 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2391 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2392 		}
2393 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2394 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2395 		    !dce6_check_latency_hiding(&wm_low) ||
2396 		    (rdev->disp_priority == 2)) {
2397 			DRM_DEBUG_KMS("force priority to high\n");
2398 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2399 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2400 		}
2401 
2402 		a.full = dfixed_const(1000);
2403 		b.full = dfixed_const(mode->clock);
2404 		b.full = dfixed_div(b, a);
2405 		c.full = dfixed_const(latency_watermark_a);
2406 		c.full = dfixed_mul(c, b);
2407 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2408 		c.full = dfixed_div(c, a);
2409 		a.full = dfixed_const(16);
2410 		c.full = dfixed_div(c, a);
2411 		priority_a_mark = dfixed_trunc(c);
2412 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2413 
2414 		a.full = dfixed_const(1000);
2415 		b.full = dfixed_const(mode->clock);
2416 		b.full = dfixed_div(b, a);
2417 		c.full = dfixed_const(latency_watermark_b);
2418 		c.full = dfixed_mul(c, b);
2419 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2420 		c.full = dfixed_div(c, a);
2421 		a.full = dfixed_const(16);
2422 		c.full = dfixed_div(c, a);
2423 		priority_b_mark = dfixed_trunc(c);
2424 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2425 
2426 		/* Save number of lines the linebuffer leads before the scanout */
2427 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2428 	}
2429 
2430 	/* select wm A */
2431 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2432 	tmp = arb_control3;
2433 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2434 	tmp |= LATENCY_WATERMARK_MASK(1);
2435 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2436 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2437 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2438 		LATENCY_HIGH_WATERMARK(line_time)));
2439 	/* select wm B */
2440 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2441 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2442 	tmp |= LATENCY_WATERMARK_MASK(2);
2443 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2444 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2445 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2446 		LATENCY_HIGH_WATERMARK(line_time)));
2447 	/* restore original selection */
2448 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2449 
2450 	/* write the priority marks */
2451 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2452 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2453 
2454 	/* save values for DPM */
2455 	radeon_crtc->line_time = line_time;
2456 	radeon_crtc->wm_high = latency_watermark_a;
2457 	radeon_crtc->wm_low = latency_watermark_b;
2458 }
2459 
2460 void dce6_bandwidth_update(struct radeon_device *rdev)
2461 {
2462 	struct drm_display_mode *mode0 = NULL;
2463 	struct drm_display_mode *mode1 = NULL;
2464 	u32 num_heads = 0, lb_size;
2465 	int i;
2466 
2467 	if (!rdev->mode_info.mode_config_initialized)
2468 		return;
2469 
2470 	radeon_update_display_priority(rdev);
2471 
2472 	for (i = 0; i < rdev->num_crtc; i++) {
2473 		if (rdev->mode_info.crtcs[i]->base.enabled)
2474 			num_heads++;
2475 	}
2476 	for (i = 0; i < rdev->num_crtc; i += 2) {
2477 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2478 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2479 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2480 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2481 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2482 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2483 	}
2484 }
2485 
2486 /*
2487  * Core functions
2488  */
2489 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2490 {
2491 	u32 *tile = rdev->config.si.tile_mode_array;
2492 	const u32 num_tile_mode_states =
2493 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2494 	u32 reg_offset, split_equal_to_row_size;
2495 
2496 	switch (rdev->config.si.mem_row_size_in_kb) {
2497 	case 1:
2498 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2499 		break;
2500 	case 2:
2501 	default:
2502 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2503 		break;
2504 	case 4:
2505 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2506 		break;
2507 	}
2508 
2509 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2510 		tile[reg_offset] = 0;
2511 
2512 	switch(rdev->family) {
2513 	case CHIP_TAHITI:
2514 	case CHIP_PITCAIRN:
2515 		/* non-AA compressed depth or any compressed stencil */
2516 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2518 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2519 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2520 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2521 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2523 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2524 		/* 2xAA/4xAA compressed depth only */
2525 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2528 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2529 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2530 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2532 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2533 		/* 8xAA compressed depth only */
2534 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2536 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2537 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2538 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2539 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2542 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2543 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2545 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2546 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2547 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2548 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2550 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2551 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2552 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2553 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2554 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2555 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2556 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2557 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2559 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2560 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2561 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2563 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2564 			   TILE_SPLIT(split_equal_to_row_size) |
2565 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2566 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2568 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2569 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2570 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2572 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2573 			   TILE_SPLIT(split_equal_to_row_size) |
2574 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2575 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2578 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2579 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2581 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582 			   TILE_SPLIT(split_equal_to_row_size) |
2583 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2584 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2585 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2586 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2587 		/* 1D and 1D Array Surfaces */
2588 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2589 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2590 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2591 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2592 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2593 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2595 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2596 		/* Displayable maps. */
2597 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2598 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2599 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2600 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2601 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2602 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2604 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2605 		/* Display 8bpp. */
2606 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2607 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2609 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2610 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2611 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2614 		/* Display 16bpp. */
2615 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2617 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2618 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2619 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2620 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2622 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2623 		/* Display 32bpp. */
2624 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2627 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2628 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2629 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2631 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2632 		/* Thin. */
2633 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2634 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2635 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2637 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2638 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2640 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2641 		/* Thin 8 bpp. */
2642 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2644 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2645 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2646 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2647 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2650 		/* Thin 16 bpp. */
2651 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2653 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2654 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2655 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2656 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2658 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2659 		/* Thin 32 bpp. */
2660 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2662 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2663 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2664 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2665 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2667 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2668 		/* Thin 64 bpp. */
2669 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2671 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2672 			   TILE_SPLIT(split_equal_to_row_size) |
2673 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2674 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2676 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2677 		/* 8 bpp PRT. */
2678 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2680 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2681 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2682 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2683 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2684 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2685 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2686 		/* 16 bpp PRT */
2687 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2688 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2689 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2690 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2691 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2692 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2694 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2695 		/* 32 bpp PRT */
2696 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2698 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2699 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2700 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2701 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2703 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2704 		/* 64 bpp PRT */
2705 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2707 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2709 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2710 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2712 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2713 		/* 128 bpp PRT */
2714 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2716 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2717 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2718 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2719 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2721 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2722 
2723 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2724 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2725 		break;
2726 
2727 	case CHIP_VERDE:
2728 	case CHIP_OLAND:
2729 	case CHIP_HAINAN:
2730 		/* non-AA compressed depth or any compressed stencil */
2731 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2733 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2734 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2735 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2736 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2738 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2739 		/* 2xAA/4xAA compressed depth only */
2740 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2742 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2744 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2745 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2747 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2748 		/* 8xAA compressed depth only */
2749 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2751 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2753 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2754 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2756 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2757 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2758 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2759 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2760 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2761 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2762 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2763 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2765 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2766 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2767 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2768 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2769 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2771 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2772 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2774 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2775 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2776 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2777 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2778 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2779 			   TILE_SPLIT(split_equal_to_row_size) |
2780 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2781 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2782 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2783 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2784 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2785 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2786 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2787 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2788 			   TILE_SPLIT(split_equal_to_row_size) |
2789 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2790 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2792 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2793 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2794 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2796 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797 			   TILE_SPLIT(split_equal_to_row_size) |
2798 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2799 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2801 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2802 		/* 1D and 1D Array Surfaces */
2803 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2804 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2805 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2807 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2808 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2811 		/* Displayable maps. */
2812 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2813 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2814 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2815 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2816 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2817 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2819 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2820 		/* Display 8bpp. */
2821 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2823 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2824 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2825 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2826 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2829 		/* Display 16bpp. */
2830 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2831 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2833 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2834 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2835 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2836 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2837 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2838 		/* Display 32bpp. */
2839 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2841 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2842 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2843 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2844 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2847 		/* Thin. */
2848 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2849 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2850 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2851 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2852 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2853 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2855 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2856 		/* Thin 8 bpp. */
2857 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2858 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2859 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2860 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2861 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2862 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2865 		/* Thin 16 bpp. */
2866 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2867 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2868 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2869 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2870 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2871 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2873 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2874 		/* Thin 32 bpp. */
2875 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2877 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2878 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2879 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2880 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2883 		/* Thin 64 bpp. */
2884 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2886 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2887 			   TILE_SPLIT(split_equal_to_row_size) |
2888 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2889 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2891 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2892 		/* 8 bpp PRT. */
2893 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2895 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2896 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2897 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2898 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2899 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2900 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2901 		/* 16 bpp PRT */
2902 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2904 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2905 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2906 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2907 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2908 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2909 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2910 		/* 32 bpp PRT */
2911 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2913 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2914 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2915 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2916 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2917 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2918 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2919 		/* 64 bpp PRT */
2920 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2921 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2922 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2923 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2924 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2925 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2927 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2928 		/* 128 bpp PRT */
2929 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2930 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2931 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2932 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2933 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2934 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2935 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2936 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2937 
2938 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2939 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2940 		break;
2941 
2942 	default:
2943 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2944 	}
2945 }
2946 
2947 static void si_select_se_sh(struct radeon_device *rdev,
2948 			    u32 se_num, u32 sh_num)
2949 {
2950 	u32 data = INSTANCE_BROADCAST_WRITES;
2951 
2952 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2953 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2954 	else if (se_num == 0xffffffff)
2955 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2956 	else if (sh_num == 0xffffffff)
2957 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2958 	else
2959 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2960 	WREG32(GRBM_GFX_INDEX, data);
2961 }
2962 
2963 static u32 si_create_bitmask(u32 bit_width)
2964 {
2965 	u32 i, mask = 0;
2966 
2967 	for (i = 0; i < bit_width; i++) {
2968 		mask <<= 1;
2969 		mask |= 1;
2970 	}
2971 	return mask;
2972 }
2973 
2974 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2975 {
2976 	u32 data, mask;
2977 
2978 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2979 	if (data & 1)
2980 		data &= INACTIVE_CUS_MASK;
2981 	else
2982 		data = 0;
2983 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2984 
2985 	data >>= INACTIVE_CUS_SHIFT;
2986 
2987 	mask = si_create_bitmask(cu_per_sh);
2988 
2989 	return ~data & mask;
2990 }
2991 
2992 static void si_setup_spi(struct radeon_device *rdev,
2993 			 u32 se_num, u32 sh_per_se,
2994 			 u32 cu_per_sh)
2995 {
2996 	int i, j, k;
2997 	u32 data, mask, active_cu;
2998 
2999 	for (i = 0; i < se_num; i++) {
3000 		for (j = 0; j < sh_per_se; j++) {
3001 			si_select_se_sh(rdev, i, j);
3002 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
3003 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
3004 
3005 			mask = 1;
3006 			for (k = 0; k < 16; k++) {
3007 				mask <<= k;
3008 				if (active_cu & mask) {
3009 					data &= ~mask;
3010 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3011 					break;
3012 				}
3013 			}
3014 		}
3015 	}
3016 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3017 }
3018 
3019 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3020 			      u32 max_rb_num_per_se,
3021 			      u32 sh_per_se)
3022 {
3023 	u32 data, mask;
3024 
3025 	data = RREG32(CC_RB_BACKEND_DISABLE);
3026 	if (data & 1)
3027 		data &= BACKEND_DISABLE_MASK;
3028 	else
3029 		data = 0;
3030 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3031 
3032 	data >>= BACKEND_DISABLE_SHIFT;
3033 
3034 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3035 
3036 	return data & mask;
3037 }
3038 
3039 static void si_setup_rb(struct radeon_device *rdev,
3040 			u32 se_num, u32 sh_per_se,
3041 			u32 max_rb_num_per_se)
3042 {
3043 	int i, j;
3044 	u32 data, mask;
3045 	u32 disabled_rbs = 0;
3046 	u32 enabled_rbs = 0;
3047 
3048 	for (i = 0; i < se_num; i++) {
3049 		for (j = 0; j < sh_per_se; j++) {
3050 			si_select_se_sh(rdev, i, j);
3051 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3052 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3053 		}
3054 	}
3055 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3056 
3057 	mask = 1;
3058 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3059 		if (!(disabled_rbs & mask))
3060 			enabled_rbs |= mask;
3061 		mask <<= 1;
3062 	}
3063 
3064 	rdev->config.si.backend_enable_mask = enabled_rbs;
3065 
3066 	for (i = 0; i < se_num; i++) {
3067 		si_select_se_sh(rdev, i, 0xffffffff);
3068 		data = 0;
3069 		for (j = 0; j < sh_per_se; j++) {
3070 			switch (enabled_rbs & 3) {
3071 			case 1:
3072 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3073 				break;
3074 			case 2:
3075 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3076 				break;
3077 			case 3:
3078 			default:
3079 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3080 				break;
3081 			}
3082 			enabled_rbs >>= 2;
3083 		}
3084 		WREG32(PA_SC_RASTER_CONFIG, data);
3085 	}
3086 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3087 }
3088 
3089 static void si_gpu_init(struct radeon_device *rdev)
3090 {
3091 	u32 gb_addr_config = 0;
3092 	u32 mc_shared_chmap, mc_arb_ramcfg;
3093 	u32 sx_debug_1;
3094 	u32 hdp_host_path_cntl;
3095 	u32 tmp;
3096 	int i, j;
3097 
3098 	switch (rdev->family) {
3099 	case CHIP_TAHITI:
3100 		rdev->config.si.max_shader_engines = 2;
3101 		rdev->config.si.max_tile_pipes = 12;
3102 		rdev->config.si.max_cu_per_sh = 8;
3103 		rdev->config.si.max_sh_per_se = 2;
3104 		rdev->config.si.max_backends_per_se = 4;
3105 		rdev->config.si.max_texture_channel_caches = 12;
3106 		rdev->config.si.max_gprs = 256;
3107 		rdev->config.si.max_gs_threads = 32;
3108 		rdev->config.si.max_hw_contexts = 8;
3109 
3110 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3111 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3112 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3113 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3114 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3115 		break;
3116 	case CHIP_PITCAIRN:
3117 		rdev->config.si.max_shader_engines = 2;
3118 		rdev->config.si.max_tile_pipes = 8;
3119 		rdev->config.si.max_cu_per_sh = 5;
3120 		rdev->config.si.max_sh_per_se = 2;
3121 		rdev->config.si.max_backends_per_se = 4;
3122 		rdev->config.si.max_texture_channel_caches = 8;
3123 		rdev->config.si.max_gprs = 256;
3124 		rdev->config.si.max_gs_threads = 32;
3125 		rdev->config.si.max_hw_contexts = 8;
3126 
3127 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3128 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3129 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3130 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3131 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3132 		break;
3133 	case CHIP_VERDE:
3134 	default:
3135 		rdev->config.si.max_shader_engines = 1;
3136 		rdev->config.si.max_tile_pipes = 4;
3137 		rdev->config.si.max_cu_per_sh = 5;
3138 		rdev->config.si.max_sh_per_se = 2;
3139 		rdev->config.si.max_backends_per_se = 4;
3140 		rdev->config.si.max_texture_channel_caches = 4;
3141 		rdev->config.si.max_gprs = 256;
3142 		rdev->config.si.max_gs_threads = 32;
3143 		rdev->config.si.max_hw_contexts = 8;
3144 
3145 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3146 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3147 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3148 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3149 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3150 		break;
3151 	case CHIP_OLAND:
3152 		rdev->config.si.max_shader_engines = 1;
3153 		rdev->config.si.max_tile_pipes = 4;
3154 		rdev->config.si.max_cu_per_sh = 6;
3155 		rdev->config.si.max_sh_per_se = 1;
3156 		rdev->config.si.max_backends_per_se = 2;
3157 		rdev->config.si.max_texture_channel_caches = 4;
3158 		rdev->config.si.max_gprs = 256;
3159 		rdev->config.si.max_gs_threads = 16;
3160 		rdev->config.si.max_hw_contexts = 8;
3161 
3162 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3163 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3164 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3165 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3166 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3167 		break;
3168 	case CHIP_HAINAN:
3169 		rdev->config.si.max_shader_engines = 1;
3170 		rdev->config.si.max_tile_pipes = 4;
3171 		rdev->config.si.max_cu_per_sh = 5;
3172 		rdev->config.si.max_sh_per_se = 1;
3173 		rdev->config.si.max_backends_per_se = 1;
3174 		rdev->config.si.max_texture_channel_caches = 2;
3175 		rdev->config.si.max_gprs = 256;
3176 		rdev->config.si.max_gs_threads = 16;
3177 		rdev->config.si.max_hw_contexts = 8;
3178 
3179 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3180 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3181 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3182 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3183 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3184 		break;
3185 	}
3186 
3187 	/* Initialize HDP */
3188 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3189 		WREG32((0x2c14 + j), 0x00000000);
3190 		WREG32((0x2c18 + j), 0x00000000);
3191 		WREG32((0x2c1c + j), 0x00000000);
3192 		WREG32((0x2c20 + j), 0x00000000);
3193 		WREG32((0x2c24 + j), 0x00000000);
3194 	}
3195 
3196 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3197 	WREG32(SRBM_INT_CNTL, 1);
3198 	WREG32(SRBM_INT_ACK, 1);
3199 
3200 	evergreen_fix_pci_max_read_req_size(rdev);
3201 
3202 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3203 
3204 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3205 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3206 
3207 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3208 	rdev->config.si.mem_max_burst_length_bytes = 256;
3209 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3210 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3211 	if (rdev->config.si.mem_row_size_in_kb > 4)
3212 		rdev->config.si.mem_row_size_in_kb = 4;
3213 	/* XXX use MC settings? */
3214 	rdev->config.si.shader_engine_tile_size = 32;
3215 	rdev->config.si.num_gpus = 1;
3216 	rdev->config.si.multi_gpu_tile_size = 64;
3217 
3218 	/* fix up row size */
3219 	gb_addr_config &= ~ROW_SIZE_MASK;
3220 	switch (rdev->config.si.mem_row_size_in_kb) {
3221 	case 1:
3222 	default:
3223 		gb_addr_config |= ROW_SIZE(0);
3224 		break;
3225 	case 2:
3226 		gb_addr_config |= ROW_SIZE(1);
3227 		break;
3228 	case 4:
3229 		gb_addr_config |= ROW_SIZE(2);
3230 		break;
3231 	}
3232 
3233 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3234 	 * not have bank info, so create a custom tiling dword.
3235 	 * bits 3:0   num_pipes
3236 	 * bits 7:4   num_banks
3237 	 * bits 11:8  group_size
3238 	 * bits 15:12 row_size
3239 	 */
3240 	rdev->config.si.tile_config = 0;
3241 	switch (rdev->config.si.num_tile_pipes) {
3242 	case 1:
3243 		rdev->config.si.tile_config |= (0 << 0);
3244 		break;
3245 	case 2:
3246 		rdev->config.si.tile_config |= (1 << 0);
3247 		break;
3248 	case 4:
3249 		rdev->config.si.tile_config |= (2 << 0);
3250 		break;
3251 	case 8:
3252 	default:
3253 		/* XXX what about 12? */
3254 		rdev->config.si.tile_config |= (3 << 0);
3255 		break;
3256 	}
3257 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3258 	case 0: /* four banks */
3259 		rdev->config.si.tile_config |= 0 << 4;
3260 		break;
3261 	case 1: /* eight banks */
3262 		rdev->config.si.tile_config |= 1 << 4;
3263 		break;
3264 	case 2: /* sixteen banks */
3265 	default:
3266 		rdev->config.si.tile_config |= 2 << 4;
3267 		break;
3268 	}
3269 	rdev->config.si.tile_config |=
3270 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3271 	rdev->config.si.tile_config |=
3272 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3273 
3274 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3275 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3276 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3277 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3278 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3279 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3280 	if (rdev->has_uvd) {
3281 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3282 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3283 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3284 	}
3285 
3286 	si_tiling_mode_table_init(rdev);
3287 
3288 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3289 		    rdev->config.si.max_sh_per_se,
3290 		    rdev->config.si.max_backends_per_se);
3291 
3292 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3293 		     rdev->config.si.max_sh_per_se,
3294 		     rdev->config.si.max_cu_per_sh);
3295 
3296 	rdev->config.si.active_cus = 0;
3297 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3298 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3299 			rdev->config.si.active_cus +=
3300 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3301 		}
3302 	}
3303 
3304 	/* set HW defaults for 3D engine */
3305 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3306 				     ROQ_IB2_START(0x2b)));
3307 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3308 
3309 	sx_debug_1 = RREG32(SX_DEBUG_1);
3310 	WREG32(SX_DEBUG_1, sx_debug_1);
3311 
3312 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3313 
3314 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3315 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3316 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3317 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3318 
3319 	WREG32(VGT_NUM_INSTANCES, 1);
3320 
3321 	WREG32(CP_PERFMON_CNTL, 0);
3322 
3323 	WREG32(SQ_CONFIG, 0);
3324 
3325 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3326 					  FORCE_EOV_MAX_REZ_CNT(255)));
3327 
3328 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3329 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3330 
3331 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3332 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3333 
3334 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3335 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3336 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3337 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3338 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3339 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3340 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3341 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3342 
3343 	tmp = RREG32(HDP_MISC_CNTL);
3344 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3345 	WREG32(HDP_MISC_CNTL, tmp);
3346 
3347 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3348 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3349 
3350 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3351 
3352 	udelay(50);
3353 }
3354 
3355 /*
3356  * GPU scratch registers helpers function.
3357  */
3358 static void si_scratch_init(struct radeon_device *rdev)
3359 {
3360 	int i;
3361 
3362 	rdev->scratch.num_reg = 7;
3363 	rdev->scratch.reg_base = SCRATCH_REG0;
3364 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3365 		rdev->scratch.free[i] = true;
3366 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3367 	}
3368 }
3369 
3370 void si_fence_ring_emit(struct radeon_device *rdev,
3371 			struct radeon_fence *fence)
3372 {
3373 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3374 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3375 
3376 	/* flush read cache over gart */
3377 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3378 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3379 	radeon_ring_write(ring, 0);
3380 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3381 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3382 			  PACKET3_TC_ACTION_ENA |
3383 			  PACKET3_SH_KCACHE_ACTION_ENA |
3384 			  PACKET3_SH_ICACHE_ACTION_ENA);
3385 	radeon_ring_write(ring, 0xFFFFFFFF);
3386 	radeon_ring_write(ring, 0);
3387 	radeon_ring_write(ring, 10); /* poll interval */
3388 	/* EVENT_WRITE_EOP - flush caches, send int */
3389 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3390 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3391 	radeon_ring_write(ring, lower_32_bits(addr));
3392 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3393 	radeon_ring_write(ring, fence->seq);
3394 	radeon_ring_write(ring, 0);
3395 }
3396 
3397 /*
3398  * IB stuff
3399  */
3400 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3401 {
3402 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3403 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3404 	u32 header;
3405 
3406 	if (ib->is_const_ib) {
3407 		/* set switch buffer packet before const IB */
3408 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3409 		radeon_ring_write(ring, 0);
3410 
3411 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3412 	} else {
3413 		u32 next_rptr;
3414 		if (ring->rptr_save_reg) {
3415 			next_rptr = ring->wptr + 3 + 4 + 8;
3416 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3417 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3418 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3419 			radeon_ring_write(ring, next_rptr);
3420 		} else if (rdev->wb.enabled) {
3421 			next_rptr = ring->wptr + 5 + 4 + 8;
3422 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3423 			radeon_ring_write(ring, (1 << 8));
3424 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3425 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3426 			radeon_ring_write(ring, next_rptr);
3427 		}
3428 
3429 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3430 	}
3431 
3432 	radeon_ring_write(ring, header);
3433 	radeon_ring_write(ring,
3434 #ifdef __BIG_ENDIAN
3435 			  (2 << 0) |
3436 #endif
3437 			  (ib->gpu_addr & 0xFFFFFFFC));
3438 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3439 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3440 
3441 	if (!ib->is_const_ib) {
3442 		/* flush read cache over gart for this vmid */
3443 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3444 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3445 		radeon_ring_write(ring, vm_id);
3446 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3447 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3448 				  PACKET3_TC_ACTION_ENA |
3449 				  PACKET3_SH_KCACHE_ACTION_ENA |
3450 				  PACKET3_SH_ICACHE_ACTION_ENA);
3451 		radeon_ring_write(ring, 0xFFFFFFFF);
3452 		radeon_ring_write(ring, 0);
3453 		radeon_ring_write(ring, 10); /* poll interval */
3454 	}
3455 }
3456 
3457 /*
3458  * CP.
3459  */
3460 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3461 {
3462 	if (enable)
3463 		WREG32(CP_ME_CNTL, 0);
3464 	else {
3465 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3466 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3467 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3468 		WREG32(SCRATCH_UMSK, 0);
3469 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3470 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3471 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3472 	}
3473 	udelay(50);
3474 }
3475 
3476 static int si_cp_load_microcode(struct radeon_device *rdev)
3477 {
3478 	int i;
3479 
3480 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3481 		return -EINVAL;
3482 
3483 	si_cp_enable(rdev, false);
3484 
3485 	if (rdev->new_fw) {
3486 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3487 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3488 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3489 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3490 		const struct gfx_firmware_header_v1_0 *me_hdr =
3491 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3492 		const __le32 *fw_data;
3493 		u32 fw_size;
3494 
3495 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3496 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3497 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3498 
3499 		/* PFP */
3500 		fw_data = (const __le32 *)
3501 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3502 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3503 		WREG32(CP_PFP_UCODE_ADDR, 0);
3504 		for (i = 0; i < fw_size; i++)
3505 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3506 		WREG32(CP_PFP_UCODE_ADDR, 0);
3507 
3508 		/* CE */
3509 		fw_data = (const __le32 *)
3510 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3511 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3512 		WREG32(CP_CE_UCODE_ADDR, 0);
3513 		for (i = 0; i < fw_size; i++)
3514 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3515 		WREG32(CP_CE_UCODE_ADDR, 0);
3516 
3517 		/* ME */
3518 		fw_data = (const __be32 *)
3519 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3520 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3521 		WREG32(CP_ME_RAM_WADDR, 0);
3522 		for (i = 0; i < fw_size; i++)
3523 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3524 		WREG32(CP_ME_RAM_WADDR, 0);
3525 	} else {
3526 		const __be32 *fw_data;
3527 
3528 		/* PFP */
3529 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3530 		WREG32(CP_PFP_UCODE_ADDR, 0);
3531 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3532 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3533 		WREG32(CP_PFP_UCODE_ADDR, 0);
3534 
3535 		/* CE */
3536 		fw_data = (const __be32 *)rdev->ce_fw->data;
3537 		WREG32(CP_CE_UCODE_ADDR, 0);
3538 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3539 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3540 		WREG32(CP_CE_UCODE_ADDR, 0);
3541 
3542 		/* ME */
3543 		fw_data = (const __be32 *)rdev->me_fw->data;
3544 		WREG32(CP_ME_RAM_WADDR, 0);
3545 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3546 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3547 		WREG32(CP_ME_RAM_WADDR, 0);
3548 	}
3549 
3550 	WREG32(CP_PFP_UCODE_ADDR, 0);
3551 	WREG32(CP_CE_UCODE_ADDR, 0);
3552 	WREG32(CP_ME_RAM_WADDR, 0);
3553 	WREG32(CP_ME_RAM_RADDR, 0);
3554 	return 0;
3555 }
3556 
3557 static int si_cp_start(struct radeon_device *rdev)
3558 {
3559 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3560 	int r, i;
3561 
3562 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3563 	if (r) {
3564 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3565 		return r;
3566 	}
3567 	/* init the CP */
3568 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3569 	radeon_ring_write(ring, 0x1);
3570 	radeon_ring_write(ring, 0x0);
3571 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3572 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3573 	radeon_ring_write(ring, 0);
3574 	radeon_ring_write(ring, 0);
3575 
3576 	/* init the CE partitions */
3577 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3578 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3579 	radeon_ring_write(ring, 0xc000);
3580 	radeon_ring_write(ring, 0xe000);
3581 	radeon_ring_unlock_commit(rdev, ring, false);
3582 
3583 	si_cp_enable(rdev, true);
3584 
3585 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3586 	if (r) {
3587 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3588 		return r;
3589 	}
3590 
3591 	/* setup clear context state */
3592 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3593 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3594 
3595 	for (i = 0; i < si_default_size; i++)
3596 		radeon_ring_write(ring, si_default_state[i]);
3597 
3598 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3599 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3600 
3601 	/* set clear context state */
3602 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3603 	radeon_ring_write(ring, 0);
3604 
3605 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3606 	radeon_ring_write(ring, 0x00000316);
3607 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3608 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3609 
3610 	radeon_ring_unlock_commit(rdev, ring, false);
3611 
3612 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3613 		ring = &rdev->ring[i];
3614 		r = radeon_ring_lock(rdev, ring, 2);
3615 
3616 		/* clear the compute context state */
3617 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3618 		radeon_ring_write(ring, 0);
3619 
3620 		radeon_ring_unlock_commit(rdev, ring, false);
3621 	}
3622 
3623 	return 0;
3624 }
3625 
3626 static void si_cp_fini(struct radeon_device *rdev)
3627 {
3628 	struct radeon_ring *ring;
3629 	si_cp_enable(rdev, false);
3630 
3631 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3632 	radeon_ring_fini(rdev, ring);
3633 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3634 
3635 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3636 	radeon_ring_fini(rdev, ring);
3637 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3638 
3639 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3640 	radeon_ring_fini(rdev, ring);
3641 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3642 }
3643 
3644 static int si_cp_resume(struct radeon_device *rdev)
3645 {
3646 	struct radeon_ring *ring;
3647 	u32 tmp;
3648 	u32 rb_bufsz;
3649 	int r;
3650 
3651 	si_enable_gui_idle_interrupt(rdev, false);
3652 
3653 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3654 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3655 
3656 	/* Set the write pointer delay */
3657 	WREG32(CP_RB_WPTR_DELAY, 0);
3658 
3659 	WREG32(CP_DEBUG, 0);
3660 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3661 
3662 	/* ring 0 - compute and gfx */
3663 	/* Set ring buffer size */
3664 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3665 	rb_bufsz = order_base_2(ring->ring_size / 8);
3666 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3667 #ifdef __BIG_ENDIAN
3668 	tmp |= BUF_SWAP_32BIT;
3669 #endif
3670 	WREG32(CP_RB0_CNTL, tmp);
3671 
3672 	/* Initialize the ring buffer's read and write pointers */
3673 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3674 	ring->wptr = 0;
3675 	WREG32(CP_RB0_WPTR, ring->wptr);
3676 
3677 	/* set the wb address whether it's enabled or not */
3678 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3679 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3680 
3681 	if (rdev->wb.enabled)
3682 		WREG32(SCRATCH_UMSK, 0xff);
3683 	else {
3684 		tmp |= RB_NO_UPDATE;
3685 		WREG32(SCRATCH_UMSK, 0);
3686 	}
3687 
3688 	mdelay(1);
3689 	WREG32(CP_RB0_CNTL, tmp);
3690 
3691 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3692 
3693 	/* ring1  - compute only */
3694 	/* Set ring buffer size */
3695 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3696 	rb_bufsz = order_base_2(ring->ring_size / 8);
3697 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3698 #ifdef __BIG_ENDIAN
3699 	tmp |= BUF_SWAP_32BIT;
3700 #endif
3701 	WREG32(CP_RB1_CNTL, tmp);
3702 
3703 	/* Initialize the ring buffer's read and write pointers */
3704 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3705 	ring->wptr = 0;
3706 	WREG32(CP_RB1_WPTR, ring->wptr);
3707 
3708 	/* set the wb address whether it's enabled or not */
3709 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3710 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3711 
3712 	mdelay(1);
3713 	WREG32(CP_RB1_CNTL, tmp);
3714 
3715 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3716 
3717 	/* ring2 - compute only */
3718 	/* Set ring buffer size */
3719 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3720 	rb_bufsz = order_base_2(ring->ring_size / 8);
3721 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3722 #ifdef __BIG_ENDIAN
3723 	tmp |= BUF_SWAP_32BIT;
3724 #endif
3725 	WREG32(CP_RB2_CNTL, tmp);
3726 
3727 	/* Initialize the ring buffer's read and write pointers */
3728 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3729 	ring->wptr = 0;
3730 	WREG32(CP_RB2_WPTR, ring->wptr);
3731 
3732 	/* set the wb address whether it's enabled or not */
3733 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3734 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3735 
3736 	mdelay(1);
3737 	WREG32(CP_RB2_CNTL, tmp);
3738 
3739 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3740 
3741 	/* start the rings */
3742 	si_cp_start(rdev);
3743 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3744 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3745 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3746 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3747 	if (r) {
3748 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3749 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3750 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3751 		return r;
3752 	}
3753 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3754 	if (r) {
3755 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3756 	}
3757 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3758 	if (r) {
3759 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3760 	}
3761 
3762 	si_enable_gui_idle_interrupt(rdev, true);
3763 
3764 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3765 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3766 
3767 	return 0;
3768 }
3769 
3770 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3771 {
3772 	u32 reset_mask = 0;
3773 	u32 tmp;
3774 
3775 	/* GRBM_STATUS */
3776 	tmp = RREG32(GRBM_STATUS);
3777 	if (tmp & (PA_BUSY | SC_BUSY |
3778 		   BCI_BUSY | SX_BUSY |
3779 		   TA_BUSY | VGT_BUSY |
3780 		   DB_BUSY | CB_BUSY |
3781 		   GDS_BUSY | SPI_BUSY |
3782 		   IA_BUSY | IA_BUSY_NO_DMA))
3783 		reset_mask |= RADEON_RESET_GFX;
3784 
3785 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3786 		   CP_BUSY | CP_COHERENCY_BUSY))
3787 		reset_mask |= RADEON_RESET_CP;
3788 
3789 	if (tmp & GRBM_EE_BUSY)
3790 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3791 
3792 	/* GRBM_STATUS2 */
3793 	tmp = RREG32(GRBM_STATUS2);
3794 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3795 		reset_mask |= RADEON_RESET_RLC;
3796 
3797 	/* DMA_STATUS_REG 0 */
3798 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3799 	if (!(tmp & DMA_IDLE))
3800 		reset_mask |= RADEON_RESET_DMA;
3801 
3802 	/* DMA_STATUS_REG 1 */
3803 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3804 	if (!(tmp & DMA_IDLE))
3805 		reset_mask |= RADEON_RESET_DMA1;
3806 
3807 	/* SRBM_STATUS2 */
3808 	tmp = RREG32(SRBM_STATUS2);
3809 	if (tmp & DMA_BUSY)
3810 		reset_mask |= RADEON_RESET_DMA;
3811 
3812 	if (tmp & DMA1_BUSY)
3813 		reset_mask |= RADEON_RESET_DMA1;
3814 
3815 	/* SRBM_STATUS */
3816 	tmp = RREG32(SRBM_STATUS);
3817 
3818 	if (tmp & IH_BUSY)
3819 		reset_mask |= RADEON_RESET_IH;
3820 
3821 	if (tmp & SEM_BUSY)
3822 		reset_mask |= RADEON_RESET_SEM;
3823 
3824 	if (tmp & GRBM_RQ_PENDING)
3825 		reset_mask |= RADEON_RESET_GRBM;
3826 
3827 	if (tmp & VMC_BUSY)
3828 		reset_mask |= RADEON_RESET_VMC;
3829 
3830 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3831 		   MCC_BUSY | MCD_BUSY))
3832 		reset_mask |= RADEON_RESET_MC;
3833 
3834 	if (evergreen_is_display_hung(rdev))
3835 		reset_mask |= RADEON_RESET_DISPLAY;
3836 
3837 	/* VM_L2_STATUS */
3838 	tmp = RREG32(VM_L2_STATUS);
3839 	if (tmp & L2_BUSY)
3840 		reset_mask |= RADEON_RESET_VMC;
3841 
3842 	/* Skip MC reset as it's mostly likely not hung, just busy */
3843 	if (reset_mask & RADEON_RESET_MC) {
3844 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3845 		reset_mask &= ~RADEON_RESET_MC;
3846 	}
3847 
3848 	return reset_mask;
3849 }
3850 
3851 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3852 {
3853 	struct evergreen_mc_save save;
3854 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3855 	u32 tmp;
3856 
3857 	if (reset_mask == 0)
3858 		return;
3859 
3860 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3861 
3862 	evergreen_print_gpu_status_regs(rdev);
3863 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3864 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3865 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3866 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3867 
3868 	/* disable PG/CG */
3869 	si_fini_pg(rdev);
3870 	si_fini_cg(rdev);
3871 
3872 	/* stop the rlc */
3873 	si_rlc_stop(rdev);
3874 
3875 	/* Disable CP parsing/prefetching */
3876 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3877 
3878 	if (reset_mask & RADEON_RESET_DMA) {
3879 		/* dma0 */
3880 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3881 		tmp &= ~DMA_RB_ENABLE;
3882 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3883 	}
3884 	if (reset_mask & RADEON_RESET_DMA1) {
3885 		/* dma1 */
3886 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3887 		tmp &= ~DMA_RB_ENABLE;
3888 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3889 	}
3890 
3891 	udelay(50);
3892 
3893 	evergreen_mc_stop(rdev, &save);
3894 	if (evergreen_mc_wait_for_idle(rdev)) {
3895 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3896 	}
3897 
3898 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3899 		grbm_soft_reset = SOFT_RESET_CB |
3900 			SOFT_RESET_DB |
3901 			SOFT_RESET_GDS |
3902 			SOFT_RESET_PA |
3903 			SOFT_RESET_SC |
3904 			SOFT_RESET_BCI |
3905 			SOFT_RESET_SPI |
3906 			SOFT_RESET_SX |
3907 			SOFT_RESET_TC |
3908 			SOFT_RESET_TA |
3909 			SOFT_RESET_VGT |
3910 			SOFT_RESET_IA;
3911 	}
3912 
3913 	if (reset_mask & RADEON_RESET_CP) {
3914 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3915 
3916 		srbm_soft_reset |= SOFT_RESET_GRBM;
3917 	}
3918 
3919 	if (reset_mask & RADEON_RESET_DMA)
3920 		srbm_soft_reset |= SOFT_RESET_DMA;
3921 
3922 	if (reset_mask & RADEON_RESET_DMA1)
3923 		srbm_soft_reset |= SOFT_RESET_DMA1;
3924 
3925 	if (reset_mask & RADEON_RESET_DISPLAY)
3926 		srbm_soft_reset |= SOFT_RESET_DC;
3927 
3928 	if (reset_mask & RADEON_RESET_RLC)
3929 		grbm_soft_reset |= SOFT_RESET_RLC;
3930 
3931 	if (reset_mask & RADEON_RESET_SEM)
3932 		srbm_soft_reset |= SOFT_RESET_SEM;
3933 
3934 	if (reset_mask & RADEON_RESET_IH)
3935 		srbm_soft_reset |= SOFT_RESET_IH;
3936 
3937 	if (reset_mask & RADEON_RESET_GRBM)
3938 		srbm_soft_reset |= SOFT_RESET_GRBM;
3939 
3940 	if (reset_mask & RADEON_RESET_VMC)
3941 		srbm_soft_reset |= SOFT_RESET_VMC;
3942 
3943 	if (reset_mask & RADEON_RESET_MC)
3944 		srbm_soft_reset |= SOFT_RESET_MC;
3945 
3946 	if (grbm_soft_reset) {
3947 		tmp = RREG32(GRBM_SOFT_RESET);
3948 		tmp |= grbm_soft_reset;
3949 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3950 		WREG32(GRBM_SOFT_RESET, tmp);
3951 		tmp = RREG32(GRBM_SOFT_RESET);
3952 
3953 		udelay(50);
3954 
3955 		tmp &= ~grbm_soft_reset;
3956 		WREG32(GRBM_SOFT_RESET, tmp);
3957 		tmp = RREG32(GRBM_SOFT_RESET);
3958 	}
3959 
3960 	if (srbm_soft_reset) {
3961 		tmp = RREG32(SRBM_SOFT_RESET);
3962 		tmp |= srbm_soft_reset;
3963 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3964 		WREG32(SRBM_SOFT_RESET, tmp);
3965 		tmp = RREG32(SRBM_SOFT_RESET);
3966 
3967 		udelay(50);
3968 
3969 		tmp &= ~srbm_soft_reset;
3970 		WREG32(SRBM_SOFT_RESET, tmp);
3971 		tmp = RREG32(SRBM_SOFT_RESET);
3972 	}
3973 
3974 	/* Wait a little for things to settle down */
3975 	udelay(50);
3976 
3977 	evergreen_mc_resume(rdev, &save);
3978 	udelay(50);
3979 
3980 	evergreen_print_gpu_status_regs(rdev);
3981 }
3982 
3983 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3984 {
3985 	u32 tmp, i;
3986 
3987 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3988 	tmp |= SPLL_BYPASS_EN;
3989 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3990 
3991 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3992 	tmp |= SPLL_CTLREQ_CHG;
3993 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3994 
3995 	for (i = 0; i < rdev->usec_timeout; i++) {
3996 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3997 			break;
3998 		udelay(1);
3999 	}
4000 
4001 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
4002 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
4003 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
4004 
4005 	tmp = RREG32(MPLL_CNTL_MODE);
4006 	tmp &= ~MPLL_MCLK_SEL;
4007 	WREG32(MPLL_CNTL_MODE, tmp);
4008 }
4009 
4010 static void si_spll_powerdown(struct radeon_device *rdev)
4011 {
4012 	u32 tmp;
4013 
4014 	tmp = RREG32(SPLL_CNTL_MODE);
4015 	tmp |= SPLL_SW_DIR_CONTROL;
4016 	WREG32(SPLL_CNTL_MODE, tmp);
4017 
4018 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4019 	tmp |= SPLL_RESET;
4020 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4021 
4022 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4023 	tmp |= SPLL_SLEEP;
4024 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4025 
4026 	tmp = RREG32(SPLL_CNTL_MODE);
4027 	tmp &= ~SPLL_SW_DIR_CONTROL;
4028 	WREG32(SPLL_CNTL_MODE, tmp);
4029 }
4030 
4031 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4032 {
4033 	struct evergreen_mc_save save;
4034 	u32 tmp, i;
4035 
4036 	dev_info(rdev->dev, "GPU pci config reset\n");
4037 
4038 	/* disable dpm? */
4039 
4040 	/* disable cg/pg */
4041 	si_fini_pg(rdev);
4042 	si_fini_cg(rdev);
4043 
4044 	/* Disable CP parsing/prefetching */
4045 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4046 	/* dma0 */
4047 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4048 	tmp &= ~DMA_RB_ENABLE;
4049 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4050 	/* dma1 */
4051 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4052 	tmp &= ~DMA_RB_ENABLE;
4053 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4054 	/* XXX other engines? */
4055 
4056 	/* halt the rlc, disable cp internal ints */
4057 	si_rlc_stop(rdev);
4058 
4059 	udelay(50);
4060 
4061 	/* disable mem access */
4062 	evergreen_mc_stop(rdev, &save);
4063 	if (evergreen_mc_wait_for_idle(rdev)) {
4064 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4065 	}
4066 
4067 	/* set mclk/sclk to bypass */
4068 	si_set_clk_bypass_mode(rdev);
4069 	/* powerdown spll */
4070 	si_spll_powerdown(rdev);
4071 	/* disable BM */
4072 	pci_clear_master(rdev->pdev);
4073 	/* reset */
4074 	radeon_pci_config_reset(rdev);
4075 	/* wait for asic to come out of reset */
4076 	for (i = 0; i < rdev->usec_timeout; i++) {
4077 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4078 			break;
4079 		udelay(1);
4080 	}
4081 }
4082 
4083 int si_asic_reset(struct radeon_device *rdev, bool hard)
4084 {
4085 	u32 reset_mask;
4086 
4087 	if (hard) {
4088 		si_gpu_pci_config_reset(rdev);
4089 		return 0;
4090 	}
4091 
4092 	reset_mask = si_gpu_check_soft_reset(rdev);
4093 
4094 	if (reset_mask)
4095 		r600_set_bios_scratch_engine_hung(rdev, true);
4096 
4097 	/* try soft reset */
4098 	si_gpu_soft_reset(rdev, reset_mask);
4099 
4100 	reset_mask = si_gpu_check_soft_reset(rdev);
4101 
4102 	/* try pci config reset */
4103 	if (reset_mask && radeon_hard_reset)
4104 		si_gpu_pci_config_reset(rdev);
4105 
4106 	reset_mask = si_gpu_check_soft_reset(rdev);
4107 
4108 	if (!reset_mask)
4109 		r600_set_bios_scratch_engine_hung(rdev, false);
4110 
4111 	return 0;
4112 }
4113 
4114 /**
4115  * si_gfx_is_lockup - Check if the GFX engine is locked up
4116  *
4117  * @rdev: radeon_device pointer
4118  * @ring: radeon_ring structure holding ring information
4119  *
4120  * Check if the GFX engine is locked up.
4121  * Returns true if the engine appears to be locked up, false if not.
4122  */
4123 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4124 {
4125 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4126 
4127 	if (!(reset_mask & (RADEON_RESET_GFX |
4128 			    RADEON_RESET_COMPUTE |
4129 			    RADEON_RESET_CP))) {
4130 		radeon_ring_lockup_update(rdev, ring);
4131 		return false;
4132 	}
4133 	return radeon_ring_test_lockup(rdev, ring);
4134 }
4135 
4136 /* MC */
4137 static void si_mc_program(struct radeon_device *rdev)
4138 {
4139 	struct evergreen_mc_save save;
4140 	u32 tmp;
4141 	int i, j;
4142 
4143 	/* Initialize HDP */
4144 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4145 		WREG32((0x2c14 + j), 0x00000000);
4146 		WREG32((0x2c18 + j), 0x00000000);
4147 		WREG32((0x2c1c + j), 0x00000000);
4148 		WREG32((0x2c20 + j), 0x00000000);
4149 		WREG32((0x2c24 + j), 0x00000000);
4150 	}
4151 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4152 
4153 	evergreen_mc_stop(rdev, &save);
4154 	if (radeon_mc_wait_for_idle(rdev)) {
4155 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4156 	}
4157 	if (!ASIC_IS_NODCE(rdev))
4158 		/* Lockout access through VGA aperture*/
4159 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4160 	/* Update configuration */
4161 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4162 	       rdev->mc.vram_start >> 12);
4163 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4164 	       rdev->mc.vram_end >> 12);
4165 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4166 	       rdev->vram_scratch.gpu_addr >> 12);
4167 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4168 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4169 	WREG32(MC_VM_FB_LOCATION, tmp);
4170 	/* XXX double check these! */
4171 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4172 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4173 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4174 	WREG32(MC_VM_AGP_BASE, 0);
4175 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4176 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4177 	if (radeon_mc_wait_for_idle(rdev)) {
4178 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4179 	}
4180 	evergreen_mc_resume(rdev, &save);
4181 	if (!ASIC_IS_NODCE(rdev)) {
4182 		/* we need to own VRAM, so turn off the VGA renderer here
4183 		 * to stop it overwriting our objects */
4184 		rv515_vga_render_disable(rdev);
4185 	}
4186 }
4187 
4188 void si_vram_gtt_location(struct radeon_device *rdev,
4189 			  struct radeon_mc *mc)
4190 {
4191 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4192 		/* leave room for at least 1024M GTT */
4193 		dev_warn(rdev->dev, "limiting VRAM\n");
4194 		mc->real_vram_size = 0xFFC0000000ULL;
4195 		mc->mc_vram_size = 0xFFC0000000ULL;
4196 	}
4197 	radeon_vram_location(rdev, &rdev->mc, 0);
4198 	rdev->mc.gtt_base_align = 0;
4199 	radeon_gtt_location(rdev, mc);
4200 }
4201 
4202 static int si_mc_init(struct radeon_device *rdev)
4203 {
4204 	u32 tmp;
4205 	int chansize, numchan;
4206 
4207 	/* Get VRAM informations */
4208 	rdev->mc.vram_is_ddr = true;
4209 	tmp = RREG32(MC_ARB_RAMCFG);
4210 	if (tmp & CHANSIZE_OVERRIDE) {
4211 		chansize = 16;
4212 	} else if (tmp & CHANSIZE_MASK) {
4213 		chansize = 64;
4214 	} else {
4215 		chansize = 32;
4216 	}
4217 	tmp = RREG32(MC_SHARED_CHMAP);
4218 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4219 	case 0:
4220 	default:
4221 		numchan = 1;
4222 		break;
4223 	case 1:
4224 		numchan = 2;
4225 		break;
4226 	case 2:
4227 		numchan = 4;
4228 		break;
4229 	case 3:
4230 		numchan = 8;
4231 		break;
4232 	case 4:
4233 		numchan = 3;
4234 		break;
4235 	case 5:
4236 		numchan = 6;
4237 		break;
4238 	case 6:
4239 		numchan = 10;
4240 		break;
4241 	case 7:
4242 		numchan = 12;
4243 		break;
4244 	case 8:
4245 		numchan = 16;
4246 		break;
4247 	}
4248 	rdev->mc.vram_width = numchan * chansize;
4249 	/* Could aper size report 0 ? */
4250 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4251 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4252 	/* size in MB on si */
4253 	tmp = RREG32(CONFIG_MEMSIZE);
4254 	/* some boards may have garbage in the upper 16 bits */
4255 	if (tmp & 0xffff0000) {
4256 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4257 		if (tmp & 0xffff)
4258 			tmp &= 0xffff;
4259 	}
4260 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4261 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4262 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4263 	si_vram_gtt_location(rdev, &rdev->mc);
4264 	radeon_update_bandwidth_info(rdev);
4265 
4266 	return 0;
4267 }
4268 
4269 /*
4270  * GART
4271  */
4272 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4273 {
4274 	/* flush hdp cache */
4275 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4276 
4277 	/* bits 0-15 are the VM contexts0-15 */
4278 	WREG32(VM_INVALIDATE_REQUEST, 1);
4279 }
4280 
4281 static int si_pcie_gart_enable(struct radeon_device *rdev)
4282 {
4283 	int r, i;
4284 
4285 	if (rdev->gart.robj == NULL) {
4286 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4287 		return -EINVAL;
4288 	}
4289 	r = radeon_gart_table_vram_pin(rdev);
4290 	if (r)
4291 		return r;
4292 	/* Setup TLB control */
4293 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4294 	       (0xA << 7) |
4295 	       ENABLE_L1_TLB |
4296 	       ENABLE_L1_FRAGMENT_PROCESSING |
4297 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4298 	       ENABLE_ADVANCED_DRIVER_MODEL |
4299 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4300 	/* Setup L2 cache */
4301 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4302 	       ENABLE_L2_FRAGMENT_PROCESSING |
4303 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4304 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4305 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4306 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4307 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4308 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4309 	       BANK_SELECT(4) |
4310 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4311 	/* setup context0 */
4312 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4313 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4314 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4315 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4316 			(u32)(rdev->dummy_page.addr >> 12));
4317 	WREG32(VM_CONTEXT0_CNTL2, 0);
4318 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4319 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4320 
4321 	WREG32(0x15D4, 0);
4322 	WREG32(0x15D8, 0);
4323 	WREG32(0x15DC, 0);
4324 
4325 	/* empty context1-15 */
4326 	/* set vm size, must be a multiple of 4 */
4327 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4328 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4329 	/* Assign the pt base to something valid for now; the pts used for
4330 	 * the VMs are determined by the application and setup and assigned
4331 	 * on the fly in the vm part of radeon_gart.c
4332 	 */
4333 	for (i = 1; i < 16; i++) {
4334 		if (i < 8)
4335 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4336 			       rdev->vm_manager.saved_table_addr[i]);
4337 		else
4338 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4339 			       rdev->vm_manager.saved_table_addr[i]);
4340 	}
4341 
4342 	/* enable context1-15 */
4343 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4344 	       (u32)(rdev->dummy_page.addr >> 12));
4345 	WREG32(VM_CONTEXT1_CNTL2, 4);
4346 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4347 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4348 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4349 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4350 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4351 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4352 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4353 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4354 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4355 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4356 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4357 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4358 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4359 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4360 
4361 	si_pcie_gart_tlb_flush(rdev);
4362 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4363 		 (unsigned)(rdev->mc.gtt_size >> 20),
4364 		 (unsigned long long)rdev->gart.table_addr);
4365 	rdev->gart.ready = true;
4366 	return 0;
4367 }
4368 
4369 static void si_pcie_gart_disable(struct radeon_device *rdev)
4370 {
4371 	unsigned i;
4372 
4373 	for (i = 1; i < 16; ++i) {
4374 		uint32_t reg;
4375 		if (i < 8)
4376 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4377 		else
4378 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4379 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4380 	}
4381 
4382 	/* Disable all tables */
4383 	WREG32(VM_CONTEXT0_CNTL, 0);
4384 	WREG32(VM_CONTEXT1_CNTL, 0);
4385 	/* Setup TLB control */
4386 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4387 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4388 	/* Setup L2 cache */
4389 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4390 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4391 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4392 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4393 	WREG32(VM_L2_CNTL2, 0);
4394 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4395 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4396 	radeon_gart_table_vram_unpin(rdev);
4397 }
4398 
4399 static void si_pcie_gart_fini(struct radeon_device *rdev)
4400 {
4401 	si_pcie_gart_disable(rdev);
4402 	radeon_gart_table_vram_free(rdev);
4403 	radeon_gart_fini(rdev);
4404 }
4405 
4406 /* vm parser */
4407 static bool si_vm_reg_valid(u32 reg)
4408 {
4409 	/* context regs are fine */
4410 	if (reg >= 0x28000)
4411 		return true;
4412 
4413 	/* shader regs are also fine */
4414 	if (reg >= 0xB000 && reg < 0xC000)
4415 		return true;
4416 
4417 	/* check config regs */
4418 	switch (reg) {
4419 	case GRBM_GFX_INDEX:
4420 	case CP_STRMOUT_CNTL:
4421 	case VGT_VTX_VECT_EJECT_REG:
4422 	case VGT_CACHE_INVALIDATION:
4423 	case VGT_ESGS_RING_SIZE:
4424 	case VGT_GSVS_RING_SIZE:
4425 	case VGT_GS_VERTEX_REUSE:
4426 	case VGT_PRIMITIVE_TYPE:
4427 	case VGT_INDEX_TYPE:
4428 	case VGT_NUM_INDICES:
4429 	case VGT_NUM_INSTANCES:
4430 	case VGT_TF_RING_SIZE:
4431 	case VGT_HS_OFFCHIP_PARAM:
4432 	case VGT_TF_MEMORY_BASE:
4433 	case PA_CL_ENHANCE:
4434 	case PA_SU_LINE_STIPPLE_VALUE:
4435 	case PA_SC_LINE_STIPPLE_STATE:
4436 	case PA_SC_ENHANCE:
4437 	case SQC_CACHES:
4438 	case SPI_STATIC_THREAD_MGMT_1:
4439 	case SPI_STATIC_THREAD_MGMT_2:
4440 	case SPI_STATIC_THREAD_MGMT_3:
4441 	case SPI_PS_MAX_WAVE_ID:
4442 	case SPI_CONFIG_CNTL:
4443 	case SPI_CONFIG_CNTL_1:
4444 	case TA_CNTL_AUX:
4445 	case TA_CS_BC_BASE_ADDR:
4446 		return true;
4447 	default:
4448 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4449 		return false;
4450 	}
4451 }
4452 
4453 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4454 				  u32 *ib, struct radeon_cs_packet *pkt)
4455 {
4456 	switch (pkt->opcode) {
4457 	case PACKET3_NOP:
4458 	case PACKET3_SET_BASE:
4459 	case PACKET3_SET_CE_DE_COUNTERS:
4460 	case PACKET3_LOAD_CONST_RAM:
4461 	case PACKET3_WRITE_CONST_RAM:
4462 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4463 	case PACKET3_DUMP_CONST_RAM:
4464 	case PACKET3_INCREMENT_CE_COUNTER:
4465 	case PACKET3_WAIT_ON_DE_COUNTER:
4466 	case PACKET3_CE_WRITE:
4467 		break;
4468 	default:
4469 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4470 		return -EINVAL;
4471 	}
4472 	return 0;
4473 }
4474 
4475 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4476 {
4477 	u32 start_reg, reg, i;
4478 	u32 command = ib[idx + 4];
4479 	u32 info = ib[idx + 1];
4480 	u32 idx_value = ib[idx];
4481 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4482 		/* src address space is register */
4483 		if (((info & 0x60000000) >> 29) == 0) {
4484 			start_reg = idx_value << 2;
4485 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4486 				reg = start_reg;
4487 				if (!si_vm_reg_valid(reg)) {
4488 					DRM_ERROR("CP DMA Bad SRC register\n");
4489 					return -EINVAL;
4490 				}
4491 			} else {
4492 				for (i = 0; i < (command & 0x1fffff); i++) {
4493 					reg = start_reg + (4 * i);
4494 					if (!si_vm_reg_valid(reg)) {
4495 						DRM_ERROR("CP DMA Bad SRC register\n");
4496 						return -EINVAL;
4497 					}
4498 				}
4499 			}
4500 		}
4501 	}
4502 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4503 		/* dst address space is register */
4504 		if (((info & 0x00300000) >> 20) == 0) {
4505 			start_reg = ib[idx + 2];
4506 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4507 				reg = start_reg;
4508 				if (!si_vm_reg_valid(reg)) {
4509 					DRM_ERROR("CP DMA Bad DST register\n");
4510 					return -EINVAL;
4511 				}
4512 			} else {
4513 				for (i = 0; i < (command & 0x1fffff); i++) {
4514 					reg = start_reg + (4 * i);
4515 				if (!si_vm_reg_valid(reg)) {
4516 						DRM_ERROR("CP DMA Bad DST register\n");
4517 						return -EINVAL;
4518 					}
4519 				}
4520 			}
4521 		}
4522 	}
4523 	return 0;
4524 }
4525 
4526 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4527 				   u32 *ib, struct radeon_cs_packet *pkt)
4528 {
4529 	int r;
4530 	u32 idx = pkt->idx + 1;
4531 	u32 idx_value = ib[idx];
4532 	u32 start_reg, end_reg, reg, i;
4533 
4534 	switch (pkt->opcode) {
4535 	case PACKET3_NOP:
4536 	case PACKET3_SET_BASE:
4537 	case PACKET3_CLEAR_STATE:
4538 	case PACKET3_INDEX_BUFFER_SIZE:
4539 	case PACKET3_DISPATCH_DIRECT:
4540 	case PACKET3_DISPATCH_INDIRECT:
4541 	case PACKET3_ALLOC_GDS:
4542 	case PACKET3_WRITE_GDS_RAM:
4543 	case PACKET3_ATOMIC_GDS:
4544 	case PACKET3_ATOMIC:
4545 	case PACKET3_OCCLUSION_QUERY:
4546 	case PACKET3_SET_PREDICATION:
4547 	case PACKET3_COND_EXEC:
4548 	case PACKET3_PRED_EXEC:
4549 	case PACKET3_DRAW_INDIRECT:
4550 	case PACKET3_DRAW_INDEX_INDIRECT:
4551 	case PACKET3_INDEX_BASE:
4552 	case PACKET3_DRAW_INDEX_2:
4553 	case PACKET3_CONTEXT_CONTROL:
4554 	case PACKET3_INDEX_TYPE:
4555 	case PACKET3_DRAW_INDIRECT_MULTI:
4556 	case PACKET3_DRAW_INDEX_AUTO:
4557 	case PACKET3_DRAW_INDEX_IMMD:
4558 	case PACKET3_NUM_INSTANCES:
4559 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4560 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4561 	case PACKET3_DRAW_INDEX_OFFSET_2:
4562 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4563 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4564 	case PACKET3_MPEG_INDEX:
4565 	case PACKET3_WAIT_REG_MEM:
4566 	case PACKET3_MEM_WRITE:
4567 	case PACKET3_PFP_SYNC_ME:
4568 	case PACKET3_SURFACE_SYNC:
4569 	case PACKET3_EVENT_WRITE:
4570 	case PACKET3_EVENT_WRITE_EOP:
4571 	case PACKET3_EVENT_WRITE_EOS:
4572 	case PACKET3_SET_CONTEXT_REG:
4573 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4574 	case PACKET3_SET_SH_REG:
4575 	case PACKET3_SET_SH_REG_OFFSET:
4576 	case PACKET3_INCREMENT_DE_COUNTER:
4577 	case PACKET3_WAIT_ON_CE_COUNTER:
4578 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4579 	case PACKET3_ME_WRITE:
4580 		break;
4581 	case PACKET3_COPY_DATA:
4582 		if ((idx_value & 0xf00) == 0) {
4583 			reg = ib[idx + 3] * 4;
4584 			if (!si_vm_reg_valid(reg))
4585 				return -EINVAL;
4586 		}
4587 		break;
4588 	case PACKET3_WRITE_DATA:
4589 		if ((idx_value & 0xf00) == 0) {
4590 			start_reg = ib[idx + 1] * 4;
4591 			if (idx_value & 0x10000) {
4592 				if (!si_vm_reg_valid(start_reg))
4593 					return -EINVAL;
4594 			} else {
4595 				for (i = 0; i < (pkt->count - 2); i++) {
4596 					reg = start_reg + (4 * i);
4597 					if (!si_vm_reg_valid(reg))
4598 						return -EINVAL;
4599 				}
4600 			}
4601 		}
4602 		break;
4603 	case PACKET3_COND_WRITE:
4604 		if (idx_value & 0x100) {
4605 			reg = ib[idx + 5] * 4;
4606 			if (!si_vm_reg_valid(reg))
4607 				return -EINVAL;
4608 		}
4609 		break;
4610 	case PACKET3_COPY_DW:
4611 		if (idx_value & 0x2) {
4612 			reg = ib[idx + 3] * 4;
4613 			if (!si_vm_reg_valid(reg))
4614 				return -EINVAL;
4615 		}
4616 		break;
4617 	case PACKET3_SET_CONFIG_REG:
4618 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4619 		end_reg = 4 * pkt->count + start_reg - 4;
4620 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4621 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4622 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4623 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4624 			return -EINVAL;
4625 		}
4626 		for (i = 0; i < pkt->count; i++) {
4627 			reg = start_reg + (4 * i);
4628 			if (!si_vm_reg_valid(reg))
4629 				return -EINVAL;
4630 		}
4631 		break;
4632 	case PACKET3_CP_DMA:
4633 		r = si_vm_packet3_cp_dma_check(ib, idx);
4634 		if (r)
4635 			return r;
4636 		break;
4637 	default:
4638 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4639 		return -EINVAL;
4640 	}
4641 	return 0;
4642 }
4643 
4644 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4645 				       u32 *ib, struct radeon_cs_packet *pkt)
4646 {
4647 	int r;
4648 	u32 idx = pkt->idx + 1;
4649 	u32 idx_value = ib[idx];
4650 	u32 start_reg, reg, i;
4651 
4652 	switch (pkt->opcode) {
4653 	case PACKET3_NOP:
4654 	case PACKET3_SET_BASE:
4655 	case PACKET3_CLEAR_STATE:
4656 	case PACKET3_DISPATCH_DIRECT:
4657 	case PACKET3_DISPATCH_INDIRECT:
4658 	case PACKET3_ALLOC_GDS:
4659 	case PACKET3_WRITE_GDS_RAM:
4660 	case PACKET3_ATOMIC_GDS:
4661 	case PACKET3_ATOMIC:
4662 	case PACKET3_OCCLUSION_QUERY:
4663 	case PACKET3_SET_PREDICATION:
4664 	case PACKET3_COND_EXEC:
4665 	case PACKET3_PRED_EXEC:
4666 	case PACKET3_CONTEXT_CONTROL:
4667 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4668 	case PACKET3_WAIT_REG_MEM:
4669 	case PACKET3_MEM_WRITE:
4670 	case PACKET3_PFP_SYNC_ME:
4671 	case PACKET3_SURFACE_SYNC:
4672 	case PACKET3_EVENT_WRITE:
4673 	case PACKET3_EVENT_WRITE_EOP:
4674 	case PACKET3_EVENT_WRITE_EOS:
4675 	case PACKET3_SET_CONTEXT_REG:
4676 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4677 	case PACKET3_SET_SH_REG:
4678 	case PACKET3_SET_SH_REG_OFFSET:
4679 	case PACKET3_INCREMENT_DE_COUNTER:
4680 	case PACKET3_WAIT_ON_CE_COUNTER:
4681 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4682 	case PACKET3_ME_WRITE:
4683 		break;
4684 	case PACKET3_COPY_DATA:
4685 		if ((idx_value & 0xf00) == 0) {
4686 			reg = ib[idx + 3] * 4;
4687 			if (!si_vm_reg_valid(reg))
4688 				return -EINVAL;
4689 		}
4690 		break;
4691 	case PACKET3_WRITE_DATA:
4692 		if ((idx_value & 0xf00) == 0) {
4693 			start_reg = ib[idx + 1] * 4;
4694 			if (idx_value & 0x10000) {
4695 				if (!si_vm_reg_valid(start_reg))
4696 					return -EINVAL;
4697 			} else {
4698 				for (i = 0; i < (pkt->count - 2); i++) {
4699 					reg = start_reg + (4 * i);
4700 					if (!si_vm_reg_valid(reg))
4701 						return -EINVAL;
4702 				}
4703 			}
4704 		}
4705 		break;
4706 	case PACKET3_COND_WRITE:
4707 		if (idx_value & 0x100) {
4708 			reg = ib[idx + 5] * 4;
4709 			if (!si_vm_reg_valid(reg))
4710 				return -EINVAL;
4711 		}
4712 		break;
4713 	case PACKET3_COPY_DW:
4714 		if (idx_value & 0x2) {
4715 			reg = ib[idx + 3] * 4;
4716 			if (!si_vm_reg_valid(reg))
4717 				return -EINVAL;
4718 		}
4719 		break;
4720 	case PACKET3_CP_DMA:
4721 		r = si_vm_packet3_cp_dma_check(ib, idx);
4722 		if (r)
4723 			return r;
4724 		break;
4725 	default:
4726 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4727 		return -EINVAL;
4728 	}
4729 	return 0;
4730 }
4731 
4732 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4733 {
4734 	int ret = 0;
4735 	u32 idx = 0, i;
4736 	struct radeon_cs_packet pkt;
4737 
4738 	do {
4739 		pkt.idx = idx;
4740 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4741 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4742 		pkt.one_reg_wr = 0;
4743 		switch (pkt.type) {
4744 		case RADEON_PACKET_TYPE0:
4745 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4746 			ret = -EINVAL;
4747 			break;
4748 		case RADEON_PACKET_TYPE2:
4749 			idx += 1;
4750 			break;
4751 		case RADEON_PACKET_TYPE3:
4752 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4753 			if (ib->is_const_ib)
4754 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4755 			else {
4756 				switch (ib->ring) {
4757 				case RADEON_RING_TYPE_GFX_INDEX:
4758 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4759 					break;
4760 				case CAYMAN_RING_TYPE_CP1_INDEX:
4761 				case CAYMAN_RING_TYPE_CP2_INDEX:
4762 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4763 					break;
4764 				default:
4765 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4766 					ret = -EINVAL;
4767 					break;
4768 				}
4769 			}
4770 			idx += pkt.count + 2;
4771 			break;
4772 		default:
4773 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4774 			ret = -EINVAL;
4775 			break;
4776 		}
4777 		if (ret) {
4778 			for (i = 0; i < ib->length_dw; i++) {
4779 				if (i == idx)
4780 					printk("\t0x%08x <---\n", ib->ptr[i]);
4781 				else
4782 					printk("\t0x%08x\n", ib->ptr[i]);
4783 			}
4784 			break;
4785 		}
4786 	} while (idx < ib->length_dw);
4787 
4788 	return ret;
4789 }
4790 
4791 /*
4792  * vm
4793  */
4794 int si_vm_init(struct radeon_device *rdev)
4795 {
4796 	/* number of VMs */
4797 	rdev->vm_manager.nvm = 16;
4798 	/* base offset of vram pages */
4799 	rdev->vm_manager.vram_base_offset = 0;
4800 
4801 	return 0;
4802 }
4803 
4804 void si_vm_fini(struct radeon_device *rdev)
4805 {
4806 }
4807 
4808 /**
4809  * si_vm_decode_fault - print human readable fault info
4810  *
4811  * @rdev: radeon_device pointer
4812  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4813  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4814  *
4815  * Print human readable fault information (SI).
4816  */
4817 static void si_vm_decode_fault(struct radeon_device *rdev,
4818 			       u32 status, u32 addr)
4819 {
4820 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4821 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4822 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4823 	char *block;
4824 
4825 	if (rdev->family == CHIP_TAHITI) {
4826 		switch (mc_id) {
4827 		case 160:
4828 		case 144:
4829 		case 96:
4830 		case 80:
4831 		case 224:
4832 		case 208:
4833 		case 32:
4834 		case 16:
4835 			block = "CB";
4836 			break;
4837 		case 161:
4838 		case 145:
4839 		case 97:
4840 		case 81:
4841 		case 225:
4842 		case 209:
4843 		case 33:
4844 		case 17:
4845 			block = "CB_FMASK";
4846 			break;
4847 		case 162:
4848 		case 146:
4849 		case 98:
4850 		case 82:
4851 		case 226:
4852 		case 210:
4853 		case 34:
4854 		case 18:
4855 			block = "CB_CMASK";
4856 			break;
4857 		case 163:
4858 		case 147:
4859 		case 99:
4860 		case 83:
4861 		case 227:
4862 		case 211:
4863 		case 35:
4864 		case 19:
4865 			block = "CB_IMMED";
4866 			break;
4867 		case 164:
4868 		case 148:
4869 		case 100:
4870 		case 84:
4871 		case 228:
4872 		case 212:
4873 		case 36:
4874 		case 20:
4875 			block = "DB";
4876 			break;
4877 		case 165:
4878 		case 149:
4879 		case 101:
4880 		case 85:
4881 		case 229:
4882 		case 213:
4883 		case 37:
4884 		case 21:
4885 			block = "DB_HTILE";
4886 			break;
4887 		case 167:
4888 		case 151:
4889 		case 103:
4890 		case 87:
4891 		case 231:
4892 		case 215:
4893 		case 39:
4894 		case 23:
4895 			block = "DB_STEN";
4896 			break;
4897 		case 72:
4898 		case 68:
4899 		case 64:
4900 		case 8:
4901 		case 4:
4902 		case 0:
4903 		case 136:
4904 		case 132:
4905 		case 128:
4906 		case 200:
4907 		case 196:
4908 		case 192:
4909 			block = "TC";
4910 			break;
4911 		case 112:
4912 		case 48:
4913 			block = "CP";
4914 			break;
4915 		case 49:
4916 		case 177:
4917 		case 50:
4918 		case 178:
4919 			block = "SH";
4920 			break;
4921 		case 53:
4922 		case 190:
4923 			block = "VGT";
4924 			break;
4925 		case 117:
4926 			block = "IH";
4927 			break;
4928 		case 51:
4929 		case 115:
4930 			block = "RLC";
4931 			break;
4932 		case 119:
4933 		case 183:
4934 			block = "DMA0";
4935 			break;
4936 		case 61:
4937 			block = "DMA1";
4938 			break;
4939 		case 248:
4940 		case 120:
4941 			block = "HDP";
4942 			break;
4943 		default:
4944 			block = "unknown";
4945 			break;
4946 		}
4947 	} else {
4948 		switch (mc_id) {
4949 		case 32:
4950 		case 16:
4951 		case 96:
4952 		case 80:
4953 		case 160:
4954 		case 144:
4955 		case 224:
4956 		case 208:
4957 			block = "CB";
4958 			break;
4959 		case 33:
4960 		case 17:
4961 		case 97:
4962 		case 81:
4963 		case 161:
4964 		case 145:
4965 		case 225:
4966 		case 209:
4967 			block = "CB_FMASK";
4968 			break;
4969 		case 34:
4970 		case 18:
4971 		case 98:
4972 		case 82:
4973 		case 162:
4974 		case 146:
4975 		case 226:
4976 		case 210:
4977 			block = "CB_CMASK";
4978 			break;
4979 		case 35:
4980 		case 19:
4981 		case 99:
4982 		case 83:
4983 		case 163:
4984 		case 147:
4985 		case 227:
4986 		case 211:
4987 			block = "CB_IMMED";
4988 			break;
4989 		case 36:
4990 		case 20:
4991 		case 100:
4992 		case 84:
4993 		case 164:
4994 		case 148:
4995 		case 228:
4996 		case 212:
4997 			block = "DB";
4998 			break;
4999 		case 37:
5000 		case 21:
5001 		case 101:
5002 		case 85:
5003 		case 165:
5004 		case 149:
5005 		case 229:
5006 		case 213:
5007 			block = "DB_HTILE";
5008 			break;
5009 		case 39:
5010 		case 23:
5011 		case 103:
5012 		case 87:
5013 		case 167:
5014 		case 151:
5015 		case 231:
5016 		case 215:
5017 			block = "DB_STEN";
5018 			break;
5019 		case 72:
5020 		case 68:
5021 		case 8:
5022 		case 4:
5023 		case 136:
5024 		case 132:
5025 		case 200:
5026 		case 196:
5027 			block = "TC";
5028 			break;
5029 		case 112:
5030 		case 48:
5031 			block = "CP";
5032 			break;
5033 		case 49:
5034 		case 177:
5035 		case 50:
5036 		case 178:
5037 			block = "SH";
5038 			break;
5039 		case 53:
5040 			block = "VGT";
5041 			break;
5042 		case 117:
5043 			block = "IH";
5044 			break;
5045 		case 51:
5046 		case 115:
5047 			block = "RLC";
5048 			break;
5049 		case 119:
5050 		case 183:
5051 			block = "DMA0";
5052 			break;
5053 		case 61:
5054 			block = "DMA1";
5055 			break;
5056 		case 248:
5057 		case 120:
5058 			block = "HDP";
5059 			break;
5060 		default:
5061 			block = "unknown";
5062 			break;
5063 		}
5064 	}
5065 
5066 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5067 	       protections, vmid, addr,
5068 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5069 	       block, mc_id);
5070 }
5071 
5072 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5073 		 unsigned vm_id, uint64_t pd_addr)
5074 {
5075 	/* write new base address */
5076 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5077 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5078 				 WRITE_DATA_DST_SEL(0)));
5079 
5080 	if (vm_id < 8) {
5081 		radeon_ring_write(ring,
5082 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5083 	} else {
5084 		radeon_ring_write(ring,
5085 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5086 	}
5087 	radeon_ring_write(ring, 0);
5088 	radeon_ring_write(ring, pd_addr >> 12);
5089 
5090 	/* flush hdp cache */
5091 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5092 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5093 				 WRITE_DATA_DST_SEL(0)));
5094 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5095 	radeon_ring_write(ring, 0);
5096 	radeon_ring_write(ring, 0x1);
5097 
5098 	/* bits 0-15 are the VM contexts0-15 */
5099 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5100 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5101 				 WRITE_DATA_DST_SEL(0)));
5102 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5103 	radeon_ring_write(ring, 0);
5104 	radeon_ring_write(ring, 1 << vm_id);
5105 
5106 	/* wait for the invalidate to complete */
5107 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5108 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5109 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5110 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5111 	radeon_ring_write(ring, 0);
5112 	radeon_ring_write(ring, 0); /* ref */
5113 	radeon_ring_write(ring, 0); /* mask */
5114 	radeon_ring_write(ring, 0x20); /* poll interval */
5115 
5116 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5117 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5118 	radeon_ring_write(ring, 0x0);
5119 }
5120 
5121 /*
5122  *  Power and clock gating
5123  */
5124 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5125 {
5126 	int i;
5127 
5128 	for (i = 0; i < rdev->usec_timeout; i++) {
5129 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5130 			break;
5131 		udelay(1);
5132 	}
5133 
5134 	for (i = 0; i < rdev->usec_timeout; i++) {
5135 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5136 			break;
5137 		udelay(1);
5138 	}
5139 }
5140 
5141 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5142 					 bool enable)
5143 {
5144 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5145 	u32 mask;
5146 	int i;
5147 
5148 	if (enable)
5149 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5150 	else
5151 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5152 	WREG32(CP_INT_CNTL_RING0, tmp);
5153 
5154 	if (!enable) {
5155 		/* read a gfx register */
5156 		tmp = RREG32(DB_DEPTH_INFO);
5157 
5158 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5159 		for (i = 0; i < rdev->usec_timeout; i++) {
5160 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5161 				break;
5162 			udelay(1);
5163 		}
5164 	}
5165 }
5166 
5167 static void si_set_uvd_dcm(struct radeon_device *rdev,
5168 			   bool sw_mode)
5169 {
5170 	u32 tmp, tmp2;
5171 
5172 	tmp = RREG32(UVD_CGC_CTRL);
5173 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5174 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5175 
5176 	if (sw_mode) {
5177 		tmp &= ~0x7ffff800;
5178 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5179 	} else {
5180 		tmp |= 0x7ffff800;
5181 		tmp2 = 0;
5182 	}
5183 
5184 	WREG32(UVD_CGC_CTRL, tmp);
5185 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5186 }
5187 
5188 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5189 {
5190 	bool hw_mode = true;
5191 
5192 	if (hw_mode) {
5193 		si_set_uvd_dcm(rdev, false);
5194 	} else {
5195 		u32 tmp = RREG32(UVD_CGC_CTRL);
5196 		tmp &= ~DCM;
5197 		WREG32(UVD_CGC_CTRL, tmp);
5198 	}
5199 }
5200 
5201 static u32 si_halt_rlc(struct radeon_device *rdev)
5202 {
5203 	u32 data, orig;
5204 
5205 	orig = data = RREG32(RLC_CNTL);
5206 
5207 	if (data & RLC_ENABLE) {
5208 		data &= ~RLC_ENABLE;
5209 		WREG32(RLC_CNTL, data);
5210 
5211 		si_wait_for_rlc_serdes(rdev);
5212 	}
5213 
5214 	return orig;
5215 }
5216 
5217 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5218 {
5219 	u32 tmp;
5220 
5221 	tmp = RREG32(RLC_CNTL);
5222 	if (tmp != rlc)
5223 		WREG32(RLC_CNTL, rlc);
5224 }
5225 
5226 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5227 {
5228 	u32 data, orig;
5229 
5230 	orig = data = RREG32(DMA_PG);
5231 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5232 		data |= PG_CNTL_ENABLE;
5233 	else
5234 		data &= ~PG_CNTL_ENABLE;
5235 	if (orig != data)
5236 		WREG32(DMA_PG, data);
5237 }
5238 
5239 static void si_init_dma_pg(struct radeon_device *rdev)
5240 {
5241 	u32 tmp;
5242 
5243 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5244 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5245 
5246 	for (tmp = 0; tmp < 5; tmp++)
5247 		WREG32(DMA_PGFSM_WRITE, 0);
5248 }
5249 
5250 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5251 			       bool enable)
5252 {
5253 	u32 tmp;
5254 
5255 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5256 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5257 		WREG32(RLC_TTOP_D, tmp);
5258 
5259 		tmp = RREG32(RLC_PG_CNTL);
5260 		tmp |= GFX_PG_ENABLE;
5261 		WREG32(RLC_PG_CNTL, tmp);
5262 
5263 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5264 		tmp |= AUTO_PG_EN;
5265 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5266 	} else {
5267 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5268 		tmp &= ~AUTO_PG_EN;
5269 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5270 
5271 		tmp = RREG32(DB_RENDER_CONTROL);
5272 	}
5273 }
5274 
5275 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5276 {
5277 	u32 tmp;
5278 
5279 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5280 
5281 	tmp = RREG32(RLC_PG_CNTL);
5282 	tmp |= GFX_PG_SRC;
5283 	WREG32(RLC_PG_CNTL, tmp);
5284 
5285 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5286 
5287 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5288 
5289 	tmp &= ~GRBM_REG_SGIT_MASK;
5290 	tmp |= GRBM_REG_SGIT(0x700);
5291 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5292 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5293 }
5294 
5295 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5296 {
5297 	u32 mask = 0, tmp, tmp1;
5298 	int i;
5299 
5300 	si_select_se_sh(rdev, se, sh);
5301 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5302 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5303 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5304 
5305 	tmp &= 0xffff0000;
5306 
5307 	tmp |= tmp1;
5308 	tmp >>= 16;
5309 
5310 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5311 		mask <<= 1;
5312 		mask |= 1;
5313 	}
5314 
5315 	return (~tmp) & mask;
5316 }
5317 
5318 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5319 {
5320 	u32 i, j, k, active_cu_number = 0;
5321 	u32 mask, counter, cu_bitmap;
5322 	u32 tmp = 0;
5323 
5324 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5325 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5326 			mask = 1;
5327 			cu_bitmap = 0;
5328 			counter  = 0;
5329 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5330 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5331 					if (counter < 2)
5332 						cu_bitmap |= mask;
5333 					counter++;
5334 				}
5335 				mask <<= 1;
5336 			}
5337 
5338 			active_cu_number += counter;
5339 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5340 		}
5341 	}
5342 
5343 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5344 
5345 	tmp = RREG32(RLC_MAX_PG_CU);
5346 	tmp &= ~MAX_PU_CU_MASK;
5347 	tmp |= MAX_PU_CU(active_cu_number);
5348 	WREG32(RLC_MAX_PG_CU, tmp);
5349 }
5350 
5351 static void si_enable_cgcg(struct radeon_device *rdev,
5352 			   bool enable)
5353 {
5354 	u32 data, orig, tmp;
5355 
5356 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5357 
5358 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5359 		si_enable_gui_idle_interrupt(rdev, true);
5360 
5361 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5362 
5363 		tmp = si_halt_rlc(rdev);
5364 
5365 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5366 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5367 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5368 
5369 		si_wait_for_rlc_serdes(rdev);
5370 
5371 		si_update_rlc(rdev, tmp);
5372 
5373 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5374 
5375 		data |= CGCG_EN | CGLS_EN;
5376 	} else {
5377 		si_enable_gui_idle_interrupt(rdev, false);
5378 
5379 		RREG32(CB_CGTT_SCLK_CTRL);
5380 		RREG32(CB_CGTT_SCLK_CTRL);
5381 		RREG32(CB_CGTT_SCLK_CTRL);
5382 		RREG32(CB_CGTT_SCLK_CTRL);
5383 
5384 		data &= ~(CGCG_EN | CGLS_EN);
5385 	}
5386 
5387 	if (orig != data)
5388 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5389 }
5390 
5391 static void si_enable_mgcg(struct radeon_device *rdev,
5392 			   bool enable)
5393 {
5394 	u32 data, orig, tmp = 0;
5395 
5396 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5397 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5398 		data = 0x96940200;
5399 		if (orig != data)
5400 			WREG32(CGTS_SM_CTRL_REG, data);
5401 
5402 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5403 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5404 			data |= CP_MEM_LS_EN;
5405 			if (orig != data)
5406 				WREG32(CP_MEM_SLP_CNTL, data);
5407 		}
5408 
5409 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5410 		data &= 0xffffffc0;
5411 		if (orig != data)
5412 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5413 
5414 		tmp = si_halt_rlc(rdev);
5415 
5416 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5417 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5418 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5419 
5420 		si_update_rlc(rdev, tmp);
5421 	} else {
5422 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5423 		data |= 0x00000003;
5424 		if (orig != data)
5425 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5426 
5427 		data = RREG32(CP_MEM_SLP_CNTL);
5428 		if (data & CP_MEM_LS_EN) {
5429 			data &= ~CP_MEM_LS_EN;
5430 			WREG32(CP_MEM_SLP_CNTL, data);
5431 		}
5432 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5433 		data |= LS_OVERRIDE | OVERRIDE;
5434 		if (orig != data)
5435 			WREG32(CGTS_SM_CTRL_REG, data);
5436 
5437 		tmp = si_halt_rlc(rdev);
5438 
5439 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5440 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5441 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5442 
5443 		si_update_rlc(rdev, tmp);
5444 	}
5445 }
5446 
5447 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5448 			       bool enable)
5449 {
5450 	u32 orig, data, tmp;
5451 
5452 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5453 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5454 		tmp |= 0x3fff;
5455 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5456 
5457 		orig = data = RREG32(UVD_CGC_CTRL);
5458 		data |= DCM;
5459 		if (orig != data)
5460 			WREG32(UVD_CGC_CTRL, data);
5461 
5462 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5463 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5464 	} else {
5465 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5466 		tmp &= ~0x3fff;
5467 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5468 
5469 		orig = data = RREG32(UVD_CGC_CTRL);
5470 		data &= ~DCM;
5471 		if (orig != data)
5472 			WREG32(UVD_CGC_CTRL, data);
5473 
5474 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5475 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5476 	}
5477 }
5478 
5479 static const u32 mc_cg_registers[] =
5480 {
5481 	MC_HUB_MISC_HUB_CG,
5482 	MC_HUB_MISC_SIP_CG,
5483 	MC_HUB_MISC_VM_CG,
5484 	MC_XPB_CLK_GAT,
5485 	ATC_MISC_CG,
5486 	MC_CITF_MISC_WR_CG,
5487 	MC_CITF_MISC_RD_CG,
5488 	MC_CITF_MISC_VM_CG,
5489 	VM_L2_CG,
5490 };
5491 
5492 static void si_enable_mc_ls(struct radeon_device *rdev,
5493 			    bool enable)
5494 {
5495 	int i;
5496 	u32 orig, data;
5497 
5498 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5499 		orig = data = RREG32(mc_cg_registers[i]);
5500 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5501 			data |= MC_LS_ENABLE;
5502 		else
5503 			data &= ~MC_LS_ENABLE;
5504 		if (data != orig)
5505 			WREG32(mc_cg_registers[i], data);
5506 	}
5507 }
5508 
5509 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5510 			       bool enable)
5511 {
5512 	int i;
5513 	u32 orig, data;
5514 
5515 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5516 		orig = data = RREG32(mc_cg_registers[i]);
5517 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5518 			data |= MC_CG_ENABLE;
5519 		else
5520 			data &= ~MC_CG_ENABLE;
5521 		if (data != orig)
5522 			WREG32(mc_cg_registers[i], data);
5523 	}
5524 }
5525 
5526 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5527 			       bool enable)
5528 {
5529 	u32 orig, data, offset;
5530 	int i;
5531 
5532 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5533 		for (i = 0; i < 2; i++) {
5534 			if (i == 0)
5535 				offset = DMA0_REGISTER_OFFSET;
5536 			else
5537 				offset = DMA1_REGISTER_OFFSET;
5538 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5539 			data &= ~MEM_POWER_OVERRIDE;
5540 			if (data != orig)
5541 				WREG32(DMA_POWER_CNTL + offset, data);
5542 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5543 		}
5544 	} else {
5545 		for (i = 0; i < 2; i++) {
5546 			if (i == 0)
5547 				offset = DMA0_REGISTER_OFFSET;
5548 			else
5549 				offset = DMA1_REGISTER_OFFSET;
5550 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5551 			data |= MEM_POWER_OVERRIDE;
5552 			if (data != orig)
5553 				WREG32(DMA_POWER_CNTL + offset, data);
5554 
5555 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5556 			data = 0xff000000;
5557 			if (data != orig)
5558 				WREG32(DMA_CLK_CTRL + offset, data);
5559 		}
5560 	}
5561 }
5562 
5563 static void si_enable_bif_mgls(struct radeon_device *rdev,
5564 			       bool enable)
5565 {
5566 	u32 orig, data;
5567 
5568 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5569 
5570 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5571 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5572 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5573 	else
5574 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5575 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5576 
5577 	if (orig != data)
5578 		WREG32_PCIE(PCIE_CNTL2, data);
5579 }
5580 
5581 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5582 			       bool enable)
5583 {
5584 	u32 orig, data;
5585 
5586 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5587 
5588 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5589 		data &= ~CLOCK_GATING_DIS;
5590 	else
5591 		data |= CLOCK_GATING_DIS;
5592 
5593 	if (orig != data)
5594 		WREG32(HDP_HOST_PATH_CNTL, data);
5595 }
5596 
5597 static void si_enable_hdp_ls(struct radeon_device *rdev,
5598 			     bool enable)
5599 {
5600 	u32 orig, data;
5601 
5602 	orig = data = RREG32(HDP_MEM_POWER_LS);
5603 
5604 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5605 		data |= HDP_LS_ENABLE;
5606 	else
5607 		data &= ~HDP_LS_ENABLE;
5608 
5609 	if (orig != data)
5610 		WREG32(HDP_MEM_POWER_LS, data);
5611 }
5612 
5613 static void si_update_cg(struct radeon_device *rdev,
5614 			 u32 block, bool enable)
5615 {
5616 	if (block & RADEON_CG_BLOCK_GFX) {
5617 		si_enable_gui_idle_interrupt(rdev, false);
5618 		/* order matters! */
5619 		if (enable) {
5620 			si_enable_mgcg(rdev, true);
5621 			si_enable_cgcg(rdev, true);
5622 		} else {
5623 			si_enable_cgcg(rdev, false);
5624 			si_enable_mgcg(rdev, false);
5625 		}
5626 		si_enable_gui_idle_interrupt(rdev, true);
5627 	}
5628 
5629 	if (block & RADEON_CG_BLOCK_MC) {
5630 		si_enable_mc_mgcg(rdev, enable);
5631 		si_enable_mc_ls(rdev, enable);
5632 	}
5633 
5634 	if (block & RADEON_CG_BLOCK_SDMA) {
5635 		si_enable_dma_mgcg(rdev, enable);
5636 	}
5637 
5638 	if (block & RADEON_CG_BLOCK_BIF) {
5639 		si_enable_bif_mgls(rdev, enable);
5640 	}
5641 
5642 	if (block & RADEON_CG_BLOCK_UVD) {
5643 		if (rdev->has_uvd) {
5644 			si_enable_uvd_mgcg(rdev, enable);
5645 		}
5646 	}
5647 
5648 	if (block & RADEON_CG_BLOCK_HDP) {
5649 		si_enable_hdp_mgcg(rdev, enable);
5650 		si_enable_hdp_ls(rdev, enable);
5651 	}
5652 }
5653 
5654 static void si_init_cg(struct radeon_device *rdev)
5655 {
5656 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5657 			    RADEON_CG_BLOCK_MC |
5658 			    RADEON_CG_BLOCK_SDMA |
5659 			    RADEON_CG_BLOCK_BIF |
5660 			    RADEON_CG_BLOCK_HDP), true);
5661 	if (rdev->has_uvd) {
5662 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5663 		si_init_uvd_internal_cg(rdev);
5664 	}
5665 }
5666 
5667 static void si_fini_cg(struct radeon_device *rdev)
5668 {
5669 	if (rdev->has_uvd) {
5670 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5671 	}
5672 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5673 			    RADEON_CG_BLOCK_MC |
5674 			    RADEON_CG_BLOCK_SDMA |
5675 			    RADEON_CG_BLOCK_BIF |
5676 			    RADEON_CG_BLOCK_HDP), false);
5677 }
5678 
5679 u32 si_get_csb_size(struct radeon_device *rdev)
5680 {
5681 	u32 count = 0;
5682 	const struct cs_section_def *sect = NULL;
5683 	const struct cs_extent_def *ext = NULL;
5684 
5685 	if (rdev->rlc.cs_data == NULL)
5686 		return 0;
5687 
5688 	/* begin clear state */
5689 	count += 2;
5690 	/* context control state */
5691 	count += 3;
5692 
5693 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5694 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5695 			if (sect->id == SECT_CONTEXT)
5696 				count += 2 + ext->reg_count;
5697 			else
5698 				return 0;
5699 		}
5700 	}
5701 	/* pa_sc_raster_config */
5702 	count += 3;
5703 	/* end clear state */
5704 	count += 2;
5705 	/* clear state */
5706 	count += 2;
5707 
5708 	return count;
5709 }
5710 
5711 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5712 {
5713 	u32 count = 0, i;
5714 	const struct cs_section_def *sect = NULL;
5715 	const struct cs_extent_def *ext = NULL;
5716 
5717 	if (rdev->rlc.cs_data == NULL)
5718 		return;
5719 	if (buffer == NULL)
5720 		return;
5721 
5722 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5723 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5724 
5725 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5726 	buffer[count++] = cpu_to_le32(0x80000000);
5727 	buffer[count++] = cpu_to_le32(0x80000000);
5728 
5729 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5730 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5731 			if (sect->id == SECT_CONTEXT) {
5732 				buffer[count++] =
5733 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5734 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5735 				for (i = 0; i < ext->reg_count; i++)
5736 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5737 			} else {
5738 				return;
5739 			}
5740 		}
5741 	}
5742 
5743 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5744 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5745 	switch (rdev->family) {
5746 	case CHIP_TAHITI:
5747 	case CHIP_PITCAIRN:
5748 		buffer[count++] = cpu_to_le32(0x2a00126a);
5749 		break;
5750 	case CHIP_VERDE:
5751 		buffer[count++] = cpu_to_le32(0x0000124a);
5752 		break;
5753 	case CHIP_OLAND:
5754 		buffer[count++] = cpu_to_le32(0x00000082);
5755 		break;
5756 	case CHIP_HAINAN:
5757 		buffer[count++] = cpu_to_le32(0x00000000);
5758 		break;
5759 	default:
5760 		buffer[count++] = cpu_to_le32(0x00000000);
5761 		break;
5762 	}
5763 
5764 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5765 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5766 
5767 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5768 	buffer[count++] = cpu_to_le32(0);
5769 }
5770 
5771 static void si_init_pg(struct radeon_device *rdev)
5772 {
5773 	if (rdev->pg_flags) {
5774 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5775 			si_init_dma_pg(rdev);
5776 		}
5777 		si_init_ao_cu_mask(rdev);
5778 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5779 			si_init_gfx_cgpg(rdev);
5780 		} else {
5781 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5782 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5783 		}
5784 		si_enable_dma_pg(rdev, true);
5785 		si_enable_gfx_cgpg(rdev, true);
5786 	} else {
5787 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5788 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5789 	}
5790 }
5791 
5792 static void si_fini_pg(struct radeon_device *rdev)
5793 {
5794 	if (rdev->pg_flags) {
5795 		si_enable_dma_pg(rdev, false);
5796 		si_enable_gfx_cgpg(rdev, false);
5797 	}
5798 }
5799 
5800 /*
5801  * RLC
5802  */
5803 void si_rlc_reset(struct radeon_device *rdev)
5804 {
5805 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5806 
5807 	tmp |= SOFT_RESET_RLC;
5808 	WREG32(GRBM_SOFT_RESET, tmp);
5809 	udelay(50);
5810 	tmp &= ~SOFT_RESET_RLC;
5811 	WREG32(GRBM_SOFT_RESET, tmp);
5812 	udelay(50);
5813 }
5814 
5815 static void si_rlc_stop(struct radeon_device *rdev)
5816 {
5817 	WREG32(RLC_CNTL, 0);
5818 
5819 	si_enable_gui_idle_interrupt(rdev, false);
5820 
5821 	si_wait_for_rlc_serdes(rdev);
5822 }
5823 
5824 static void si_rlc_start(struct radeon_device *rdev)
5825 {
5826 	WREG32(RLC_CNTL, RLC_ENABLE);
5827 
5828 	si_enable_gui_idle_interrupt(rdev, true);
5829 
5830 	udelay(50);
5831 }
5832 
5833 static bool si_lbpw_supported(struct radeon_device *rdev)
5834 {
5835 	u32 tmp;
5836 
5837 	/* Enable LBPW only for DDR3 */
5838 	tmp = RREG32(MC_SEQ_MISC0);
5839 	if ((tmp & 0xF0000000) == 0xB0000000)
5840 		return true;
5841 	return false;
5842 }
5843 
5844 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5845 {
5846 	u32 tmp;
5847 
5848 	tmp = RREG32(RLC_LB_CNTL);
5849 	if (enable)
5850 		tmp |= LOAD_BALANCE_ENABLE;
5851 	else
5852 		tmp &= ~LOAD_BALANCE_ENABLE;
5853 	WREG32(RLC_LB_CNTL, tmp);
5854 
5855 	if (!enable) {
5856 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5857 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5858 	}
5859 }
5860 
5861 static int si_rlc_resume(struct radeon_device *rdev)
5862 {
5863 	u32 i;
5864 
5865 	if (!rdev->rlc_fw)
5866 		return -EINVAL;
5867 
5868 	si_rlc_stop(rdev);
5869 
5870 	si_rlc_reset(rdev);
5871 
5872 	si_init_pg(rdev);
5873 
5874 	si_init_cg(rdev);
5875 
5876 	WREG32(RLC_RL_BASE, 0);
5877 	WREG32(RLC_RL_SIZE, 0);
5878 	WREG32(RLC_LB_CNTL, 0);
5879 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5880 	WREG32(RLC_LB_CNTR_INIT, 0);
5881 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5882 
5883 	WREG32(RLC_MC_CNTL, 0);
5884 	WREG32(RLC_UCODE_CNTL, 0);
5885 
5886 	if (rdev->new_fw) {
5887 		const struct rlc_firmware_header_v1_0 *hdr =
5888 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5889 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5890 		const __le32 *fw_data = (const __le32 *)
5891 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5892 
5893 		radeon_ucode_print_rlc_hdr(&hdr->header);
5894 
5895 		for (i = 0; i < fw_size; i++) {
5896 			WREG32(RLC_UCODE_ADDR, i);
5897 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5898 		}
5899 	} else {
5900 		const __be32 *fw_data =
5901 			(const __be32 *)rdev->rlc_fw->data;
5902 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5903 			WREG32(RLC_UCODE_ADDR, i);
5904 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5905 		}
5906 	}
5907 	WREG32(RLC_UCODE_ADDR, 0);
5908 
5909 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5910 
5911 	si_rlc_start(rdev);
5912 
5913 	return 0;
5914 }
5915 
5916 static void si_enable_interrupts(struct radeon_device *rdev)
5917 {
5918 	u32 ih_cntl = RREG32(IH_CNTL);
5919 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5920 
5921 	ih_cntl |= ENABLE_INTR;
5922 	ih_rb_cntl |= IH_RB_ENABLE;
5923 	WREG32(IH_CNTL, ih_cntl);
5924 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5925 	rdev->ih.enabled = true;
5926 }
5927 
5928 static void si_disable_interrupts(struct radeon_device *rdev)
5929 {
5930 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5931 	u32 ih_cntl = RREG32(IH_CNTL);
5932 
5933 	ih_rb_cntl &= ~IH_RB_ENABLE;
5934 	ih_cntl &= ~ENABLE_INTR;
5935 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5936 	WREG32(IH_CNTL, ih_cntl);
5937 	/* set rptr, wptr to 0 */
5938 	WREG32(IH_RB_RPTR, 0);
5939 	WREG32(IH_RB_WPTR, 0);
5940 	rdev->ih.enabled = false;
5941 	rdev->ih.rptr = 0;
5942 }
5943 
5944 static void si_disable_interrupt_state(struct radeon_device *rdev)
5945 {
5946 	int i;
5947 	u32 tmp;
5948 
5949 	tmp = RREG32(CP_INT_CNTL_RING0) &
5950 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5951 	WREG32(CP_INT_CNTL_RING0, tmp);
5952 	WREG32(CP_INT_CNTL_RING1, 0);
5953 	WREG32(CP_INT_CNTL_RING2, 0);
5954 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5955 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5956 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5957 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5958 	WREG32(GRBM_INT_CNTL, 0);
5959 	WREG32(SRBM_INT_CNTL, 0);
5960 	for (i = 0; i < rdev->num_crtc; i++)
5961 		WREG32(INT_MASK + crtc_offsets[i], 0);
5962 	for (i = 0; i < rdev->num_crtc; i++)
5963 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5964 
5965 	if (!ASIC_IS_NODCE(rdev)) {
5966 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5967 
5968 		for (i = 0; i < 6; i++)
5969 			WREG32_AND(DC_HPDx_INT_CONTROL(i),
5970 				   DC_HPDx_INT_POLARITY);
5971 	}
5972 }
5973 
5974 static int si_irq_init(struct radeon_device *rdev)
5975 {
5976 	int ret = 0;
5977 	int rb_bufsz;
5978 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5979 
5980 	/* allocate ring */
5981 	ret = r600_ih_ring_alloc(rdev);
5982 	if (ret)
5983 		return ret;
5984 
5985 	/* disable irqs */
5986 	si_disable_interrupts(rdev);
5987 
5988 	/* init rlc */
5989 	ret = si_rlc_resume(rdev);
5990 	if (ret) {
5991 		r600_ih_ring_fini(rdev);
5992 		return ret;
5993 	}
5994 
5995 	/* setup interrupt control */
5996 	/* set dummy read address to ring address */
5997 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5998 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5999 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6000 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6001 	 */
6002 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6003 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6004 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6005 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6006 
6007 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6008 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6009 
6010 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6011 		      IH_WPTR_OVERFLOW_CLEAR |
6012 		      (rb_bufsz << 1));
6013 
6014 	if (rdev->wb.enabled)
6015 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6016 
6017 	/* set the writeback address whether it's enabled or not */
6018 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6019 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6020 
6021 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6022 
6023 	/* set rptr, wptr to 0 */
6024 	WREG32(IH_RB_RPTR, 0);
6025 	WREG32(IH_RB_WPTR, 0);
6026 
6027 	/* Default settings for IH_CNTL (disabled at first) */
6028 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6029 	/* RPTR_REARM only works if msi's are enabled */
6030 	if (rdev->msi_enabled)
6031 		ih_cntl |= RPTR_REARM;
6032 	WREG32(IH_CNTL, ih_cntl);
6033 
6034 	/* force the active interrupt state to all disabled */
6035 	si_disable_interrupt_state(rdev);
6036 
6037 	pci_set_master(rdev->pdev);
6038 
6039 	/* enable irqs */
6040 	si_enable_interrupts(rdev);
6041 
6042 	return ret;
6043 }
6044 
6045 /* The order we write back each register here is important */
6046 int si_irq_set(struct radeon_device *rdev)
6047 {
6048 	int i;
6049 	u32 cp_int_cntl;
6050 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6051 	u32 grbm_int_cntl = 0;
6052 	u32 dma_cntl, dma_cntl1;
6053 	u32 thermal_int = 0;
6054 
6055 	if (!rdev->irq.installed) {
6056 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6057 		return -EINVAL;
6058 	}
6059 	/* don't enable anything if the ih is disabled */
6060 	if (!rdev->ih.enabled) {
6061 		si_disable_interrupts(rdev);
6062 		/* force the active interrupt state to all disabled */
6063 		si_disable_interrupt_state(rdev);
6064 		return 0;
6065 	}
6066 
6067 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6068 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6069 
6070 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6071 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6072 
6073 	thermal_int = RREG32(CG_THERMAL_INT) &
6074 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6075 
6076 	/* enable CP interrupts on all rings */
6077 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6078 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6079 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6080 	}
6081 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6082 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6083 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6084 	}
6085 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6086 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6087 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6088 	}
6089 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6090 		DRM_DEBUG("si_irq_set: sw int dma\n");
6091 		dma_cntl |= TRAP_ENABLE;
6092 	}
6093 
6094 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6095 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6096 		dma_cntl1 |= TRAP_ENABLE;
6097 	}
6098 
6099 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6100 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6101 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6102 
6103 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6104 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6105 
6106 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6107 
6108 	if (rdev->irq.dpm_thermal) {
6109 		DRM_DEBUG("dpm thermal\n");
6110 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6111 	}
6112 
6113 	for (i = 0; i < rdev->num_crtc; i++) {
6114 		radeon_irq_kms_set_irq_n_enabled(
6115 		    rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6116 		    rdev->irq.crtc_vblank_int[i] ||
6117 		    atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6118 	}
6119 
6120 	for (i = 0; i < rdev->num_crtc; i++)
6121 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6122 
6123 	if (!ASIC_IS_NODCE(rdev)) {
6124 		for (i = 0; i < 6; i++) {
6125 			radeon_irq_kms_set_irq_n_enabled(
6126 			    rdev, DC_HPDx_INT_CONTROL(i),
6127 			    DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6128 			    rdev->irq.hpd[i], "HPD", i);
6129 		}
6130 	}
6131 
6132 	WREG32(CG_THERMAL_INT, thermal_int);
6133 
6134 	/* posting read */
6135 	RREG32(SRBM_STATUS);
6136 
6137 	return 0;
6138 }
6139 
6140 /* The order we write back each register here is important */
6141 static inline void si_irq_ack(struct radeon_device *rdev)
6142 {
6143 	int i, j;
6144 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6145 	u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6146 
6147 	if (ASIC_IS_NODCE(rdev))
6148 		return;
6149 
6150 	for (i = 0; i < 6; i++) {
6151 		disp_int[i] = RREG32(si_disp_int_status[i]);
6152 		if (i < rdev->num_crtc)
6153 			grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6154 	}
6155 
6156 	/* We write back each interrupt register in pairs of two */
6157 	for (i = 0; i < rdev->num_crtc; i += 2) {
6158 		for (j = i; j < (i + 2); j++) {
6159 			if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6160 				WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6161 				       GRPH_PFLIP_INT_CLEAR);
6162 		}
6163 
6164 		for (j = i; j < (i + 2); j++) {
6165 			if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6166 				WREG32(VBLANK_STATUS + crtc_offsets[j],
6167 				       VBLANK_ACK);
6168 			if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6169 				WREG32(VLINE_STATUS + crtc_offsets[j],
6170 				       VLINE_ACK);
6171 		}
6172 	}
6173 
6174 	for (i = 0; i < 6; i++) {
6175 		if (disp_int[i] & DC_HPD1_INTERRUPT)
6176 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6177 	}
6178 
6179 	for (i = 0; i < 6; i++) {
6180 		if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6181 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6182 	}
6183 }
6184 
6185 static void si_irq_disable(struct radeon_device *rdev)
6186 {
6187 	si_disable_interrupts(rdev);
6188 	/* Wait and acknowledge irq */
6189 	mdelay(1);
6190 	si_irq_ack(rdev);
6191 	si_disable_interrupt_state(rdev);
6192 }
6193 
6194 static void si_irq_suspend(struct radeon_device *rdev)
6195 {
6196 	si_irq_disable(rdev);
6197 	si_rlc_stop(rdev);
6198 }
6199 
6200 static void si_irq_fini(struct radeon_device *rdev)
6201 {
6202 	si_irq_suspend(rdev);
6203 	r600_ih_ring_fini(rdev);
6204 }
6205 
6206 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6207 {
6208 	u32 wptr, tmp;
6209 
6210 	if (rdev->wb.enabled)
6211 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6212 	else
6213 		wptr = RREG32(IH_RB_WPTR);
6214 
6215 	if (wptr & RB_OVERFLOW) {
6216 		wptr &= ~RB_OVERFLOW;
6217 		/* When a ring buffer overflow happen start parsing interrupt
6218 		 * from the last not overwritten vector (wptr + 16). Hopefully
6219 		 * this should allow us to catchup.
6220 		 */
6221 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6222 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6223 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6224 		tmp = RREG32(IH_RB_CNTL);
6225 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6226 		WREG32(IH_RB_CNTL, tmp);
6227 	}
6228 	return (wptr & rdev->ih.ptr_mask);
6229 }
6230 
6231 /*        SI IV Ring
6232  * Each IV ring entry is 128 bits:
6233  * [7:0]    - interrupt source id
6234  * [31:8]   - reserved
6235  * [59:32]  - interrupt source data
6236  * [63:60]  - reserved
6237  * [71:64]  - RINGID
6238  * [79:72]  - VMID
6239  * [127:80] - reserved
6240  */
6241 int si_irq_process(struct radeon_device *rdev)
6242 {
6243 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6244 	u32 crtc_idx, hpd_idx;
6245 	u32 mask;
6246 	u32 wptr;
6247 	u32 rptr;
6248 	u32 src_id, src_data, ring_id;
6249 	u32 ring_index;
6250 	bool queue_hotplug = false;
6251 	bool queue_dp = false;
6252 	bool queue_thermal = false;
6253 	u32 status, addr;
6254 	const char *event_name;
6255 
6256 	if (!rdev->ih.enabled || rdev->shutdown)
6257 		return IRQ_NONE;
6258 
6259 	wptr = si_get_ih_wptr(rdev);
6260 
6261 restart_ih:
6262 	/* is somebody else already processing irqs? */
6263 	if (atomic_xchg(&rdev->ih.lock, 1))
6264 		return IRQ_NONE;
6265 
6266 	rptr = rdev->ih.rptr;
6267 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6268 
6269 	/* Order reading of wptr vs. reading of IH ring data */
6270 	rmb();
6271 
6272 	/* display interrupts */
6273 	si_irq_ack(rdev);
6274 
6275 	while (rptr != wptr) {
6276 		/* wptr/rptr are in bytes! */
6277 		ring_index = rptr / 4;
6278 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6279 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6280 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6281 
6282 		switch (src_id) {
6283 		case 1: /* D1 vblank/vline */
6284 		case 2: /* D2 vblank/vline */
6285 		case 3: /* D3 vblank/vline */
6286 		case 4: /* D4 vblank/vline */
6287 		case 5: /* D5 vblank/vline */
6288 		case 6: /* D6 vblank/vline */
6289 			crtc_idx = src_id - 1;
6290 
6291 			if (src_data == 0) { /* vblank */
6292 				mask = LB_D1_VBLANK_INTERRUPT;
6293 				event_name = "vblank";
6294 
6295 				if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6296 					drm_handle_vblank(rdev->ddev, crtc_idx);
6297 					rdev->pm.vblank_sync = true;
6298 					wake_up(&rdev->irq.vblank_queue);
6299 				}
6300 				if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6301 					radeon_crtc_handle_vblank(rdev,
6302 								  crtc_idx);
6303 				}
6304 
6305 			} else if (src_data == 1) { /* vline */
6306 				mask = LB_D1_VLINE_INTERRUPT;
6307 				event_name = "vline";
6308 			} else {
6309 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6310 					  src_id, src_data);
6311 				break;
6312 			}
6313 
6314 			if (!(disp_int[crtc_idx] & mask)) {
6315 				DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6316 					  crtc_idx + 1, event_name);
6317 			}
6318 
6319 			disp_int[crtc_idx] &= ~mask;
6320 			DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6321 
6322 			break;
6323 		case 8: /* D1 page flip */
6324 		case 10: /* D2 page flip */
6325 		case 12: /* D3 page flip */
6326 		case 14: /* D4 page flip */
6327 		case 16: /* D5 page flip */
6328 		case 18: /* D6 page flip */
6329 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6330 			if (radeon_use_pflipirq > 0)
6331 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6332 			break;
6333 		case 42: /* HPD hotplug */
6334 			if (src_data <= 5) {
6335 				hpd_idx = src_data;
6336 				mask = DC_HPD1_INTERRUPT;
6337 				queue_hotplug = true;
6338 				event_name = "HPD";
6339 
6340 			} else if (src_data <= 11) {
6341 				hpd_idx = src_data - 6;
6342 				mask = DC_HPD1_RX_INTERRUPT;
6343 				queue_dp = true;
6344 				event_name = "HPD_RX";
6345 
6346 			} else {
6347 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6348 					  src_id, src_data);
6349 				break;
6350 			}
6351 
6352 			if (!(disp_int[hpd_idx] & mask))
6353 				DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6354 
6355 			disp_int[hpd_idx] &= ~mask;
6356 			DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6357 			break;
6358 		case 96:
6359 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6360 			WREG32(SRBM_INT_ACK, 0x1);
6361 			break;
6362 		case 124: /* UVD */
6363 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6364 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6365 			break;
6366 		case 146:
6367 		case 147:
6368 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6369 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6370 			/* reset addr and status */
6371 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6372 			if (addr == 0x0 && status == 0x0)
6373 				break;
6374 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6375 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6376 				addr);
6377 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6378 				status);
6379 			si_vm_decode_fault(rdev, status, addr);
6380 			break;
6381 		case 176: /* RINGID0 CP_INT */
6382 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6383 			break;
6384 		case 177: /* RINGID1 CP_INT */
6385 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6386 			break;
6387 		case 178: /* RINGID2 CP_INT */
6388 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6389 			break;
6390 		case 181: /* CP EOP event */
6391 			DRM_DEBUG("IH: CP EOP\n");
6392 			switch (ring_id) {
6393 			case 0:
6394 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6395 				break;
6396 			case 1:
6397 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6398 				break;
6399 			case 2:
6400 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6401 				break;
6402 			}
6403 			break;
6404 		case 224: /* DMA trap event */
6405 			DRM_DEBUG("IH: DMA trap\n");
6406 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6407 			break;
6408 		case 230: /* thermal low to high */
6409 			DRM_DEBUG("IH: thermal low to high\n");
6410 			rdev->pm.dpm.thermal.high_to_low = false;
6411 			queue_thermal = true;
6412 			break;
6413 		case 231: /* thermal high to low */
6414 			DRM_DEBUG("IH: thermal high to low\n");
6415 			rdev->pm.dpm.thermal.high_to_low = true;
6416 			queue_thermal = true;
6417 			break;
6418 		case 233: /* GUI IDLE */
6419 			DRM_DEBUG("IH: GUI idle\n");
6420 			break;
6421 		case 244: /* DMA trap event */
6422 			DRM_DEBUG("IH: DMA1 trap\n");
6423 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6424 			break;
6425 		default:
6426 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6427 			break;
6428 		}
6429 
6430 		/* wptr/rptr are in bytes! */
6431 		rptr += 16;
6432 		rptr &= rdev->ih.ptr_mask;
6433 		WREG32(IH_RB_RPTR, rptr);
6434 	}
6435 	if (queue_dp)
6436 		schedule_work(&rdev->dp_work);
6437 	if (queue_hotplug)
6438 		schedule_delayed_work(&rdev->hotplug_work, 0);
6439 	if (queue_thermal && rdev->pm.dpm_enabled)
6440 		schedule_work(&rdev->pm.dpm.thermal.work);
6441 	rdev->ih.rptr = rptr;
6442 	atomic_set(&rdev->ih.lock, 0);
6443 
6444 	/* make sure wptr hasn't changed while processing */
6445 	wptr = si_get_ih_wptr(rdev);
6446 	if (wptr != rptr)
6447 		goto restart_ih;
6448 
6449 	return IRQ_HANDLED;
6450 }
6451 
6452 /*
6453  * startup/shutdown callbacks
6454  */
6455 static void si_uvd_init(struct radeon_device *rdev)
6456 {
6457 	int r;
6458 
6459 	if (!rdev->has_uvd)
6460 		return;
6461 
6462 	r = radeon_uvd_init(rdev);
6463 	if (r) {
6464 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6465 		/*
6466 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6467 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6468 		 * there. So it is pointless to try to go through that code
6469 		 * hence why we disable uvd here.
6470 		 */
6471 		rdev->has_uvd = 0;
6472 		return;
6473 	}
6474 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6475 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6476 }
6477 
6478 static void si_uvd_start(struct radeon_device *rdev)
6479 {
6480 	int r;
6481 
6482 	if (!rdev->has_uvd)
6483 		return;
6484 
6485 	r = uvd_v2_2_resume(rdev);
6486 	if (r) {
6487 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6488 		goto error;
6489 	}
6490 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6491 	if (r) {
6492 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6493 		goto error;
6494 	}
6495 	return;
6496 
6497 error:
6498 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6499 }
6500 
6501 static void si_uvd_resume(struct radeon_device *rdev)
6502 {
6503 	struct radeon_ring *ring;
6504 	int r;
6505 
6506 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6507 		return;
6508 
6509 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6510 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6511 	if (r) {
6512 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6513 		return;
6514 	}
6515 	r = uvd_v1_0_init(rdev);
6516 	if (r) {
6517 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6518 		return;
6519 	}
6520 }
6521 
6522 static void si_vce_init(struct radeon_device *rdev)
6523 {
6524 	int r;
6525 
6526 	if (!rdev->has_vce)
6527 		return;
6528 
6529 	r = radeon_vce_init(rdev);
6530 	if (r) {
6531 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6532 		/*
6533 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6534 		 * to early fails si_vce_start() and thus nothing happens
6535 		 * there. So it is pointless to try to go through that code
6536 		 * hence why we disable vce here.
6537 		 */
6538 		rdev->has_vce = 0;
6539 		return;
6540 	}
6541 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6542 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6543 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6544 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6545 }
6546 
6547 static void si_vce_start(struct radeon_device *rdev)
6548 {
6549 	int r;
6550 
6551 	if (!rdev->has_vce)
6552 		return;
6553 
6554 	r = radeon_vce_resume(rdev);
6555 	if (r) {
6556 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6557 		goto error;
6558 	}
6559 	r = vce_v1_0_resume(rdev);
6560 	if (r) {
6561 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6562 		goto error;
6563 	}
6564 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6565 	if (r) {
6566 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6567 		goto error;
6568 	}
6569 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6570 	if (r) {
6571 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6572 		goto error;
6573 	}
6574 	return;
6575 
6576 error:
6577 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6578 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6579 }
6580 
6581 static void si_vce_resume(struct radeon_device *rdev)
6582 {
6583 	struct radeon_ring *ring;
6584 	int r;
6585 
6586 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6587 		return;
6588 
6589 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6590 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6591 	if (r) {
6592 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6593 		return;
6594 	}
6595 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6596 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6597 	if (r) {
6598 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6599 		return;
6600 	}
6601 	r = vce_v1_0_init(rdev);
6602 	if (r) {
6603 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6604 		return;
6605 	}
6606 }
6607 
6608 static int si_startup(struct radeon_device *rdev)
6609 {
6610 	struct radeon_ring *ring;
6611 	int r;
6612 
6613 	/* enable pcie gen2/3 link */
6614 	si_pcie_gen3_enable(rdev);
6615 	/* enable aspm */
6616 	si_program_aspm(rdev);
6617 
6618 	/* scratch needs to be initialized before MC */
6619 	r = r600_vram_scratch_init(rdev);
6620 	if (r)
6621 		return r;
6622 
6623 	si_mc_program(rdev);
6624 
6625 	if (!rdev->pm.dpm_enabled) {
6626 		r = si_mc_load_microcode(rdev);
6627 		if (r) {
6628 			DRM_ERROR("Failed to load MC firmware!\n");
6629 			return r;
6630 		}
6631 	}
6632 
6633 	r = si_pcie_gart_enable(rdev);
6634 	if (r)
6635 		return r;
6636 	si_gpu_init(rdev);
6637 
6638 	/* allocate rlc buffers */
6639 	if (rdev->family == CHIP_VERDE) {
6640 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6641 		rdev->rlc.reg_list_size =
6642 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6643 	}
6644 	rdev->rlc.cs_data = si_cs_data;
6645 	r = sumo_rlc_init(rdev);
6646 	if (r) {
6647 		DRM_ERROR("Failed to init rlc BOs!\n");
6648 		return r;
6649 	}
6650 
6651 	/* allocate wb buffer */
6652 	r = radeon_wb_init(rdev);
6653 	if (r)
6654 		return r;
6655 
6656 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6657 	if (r) {
6658 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6659 		return r;
6660 	}
6661 
6662 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6663 	if (r) {
6664 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6665 		return r;
6666 	}
6667 
6668 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6669 	if (r) {
6670 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6671 		return r;
6672 	}
6673 
6674 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6675 	if (r) {
6676 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6677 		return r;
6678 	}
6679 
6680 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6681 	if (r) {
6682 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6683 		return r;
6684 	}
6685 
6686 	si_uvd_start(rdev);
6687 	si_vce_start(rdev);
6688 
6689 	/* Enable IRQ */
6690 	if (!rdev->irq.installed) {
6691 		r = radeon_irq_kms_init(rdev);
6692 		if (r)
6693 			return r;
6694 	}
6695 
6696 	r = si_irq_init(rdev);
6697 	if (r) {
6698 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6699 		radeon_irq_kms_fini(rdev);
6700 		return r;
6701 	}
6702 	si_irq_set(rdev);
6703 
6704 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6705 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6706 			     RADEON_CP_PACKET2);
6707 	if (r)
6708 		return r;
6709 
6710 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6711 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6712 			     RADEON_CP_PACKET2);
6713 	if (r)
6714 		return r;
6715 
6716 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6717 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6718 			     RADEON_CP_PACKET2);
6719 	if (r)
6720 		return r;
6721 
6722 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6723 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6724 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6725 	if (r)
6726 		return r;
6727 
6728 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6729 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6730 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6731 	if (r)
6732 		return r;
6733 
6734 	r = si_cp_load_microcode(rdev);
6735 	if (r)
6736 		return r;
6737 	r = si_cp_resume(rdev);
6738 	if (r)
6739 		return r;
6740 
6741 	r = cayman_dma_resume(rdev);
6742 	if (r)
6743 		return r;
6744 
6745 	si_uvd_resume(rdev);
6746 	si_vce_resume(rdev);
6747 
6748 	r = radeon_ib_pool_init(rdev);
6749 	if (r) {
6750 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6751 		return r;
6752 	}
6753 
6754 	r = radeon_vm_manager_init(rdev);
6755 	if (r) {
6756 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6757 		return r;
6758 	}
6759 
6760 	r = radeon_audio_init(rdev);
6761 	if (r)
6762 		return r;
6763 
6764 	return 0;
6765 }
6766 
6767 int si_resume(struct radeon_device *rdev)
6768 {
6769 	int r;
6770 
6771 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6772 	 * posting will perform necessary task to bring back GPU into good
6773 	 * shape.
6774 	 */
6775 	/* post card */
6776 	atom_asic_init(rdev->mode_info.atom_context);
6777 
6778 	/* init golden registers */
6779 	si_init_golden_registers(rdev);
6780 
6781 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6782 		radeon_pm_resume(rdev);
6783 
6784 	rdev->accel_working = true;
6785 	r = si_startup(rdev);
6786 	if (r) {
6787 		DRM_ERROR("si startup failed on resume\n");
6788 		rdev->accel_working = false;
6789 		return r;
6790 	}
6791 
6792 	return r;
6793 
6794 }
6795 
6796 int si_suspend(struct radeon_device *rdev)
6797 {
6798 	radeon_pm_suspend(rdev);
6799 	radeon_audio_fini(rdev);
6800 	radeon_vm_manager_fini(rdev);
6801 	si_cp_enable(rdev, false);
6802 	cayman_dma_stop(rdev);
6803 	if (rdev->has_uvd) {
6804 		uvd_v1_0_fini(rdev);
6805 		radeon_uvd_suspend(rdev);
6806 	}
6807 	if (rdev->has_vce)
6808 		radeon_vce_suspend(rdev);
6809 	si_fini_pg(rdev);
6810 	si_fini_cg(rdev);
6811 	si_irq_suspend(rdev);
6812 	radeon_wb_disable(rdev);
6813 	si_pcie_gart_disable(rdev);
6814 	return 0;
6815 }
6816 
6817 /* Plan is to move initialization in that function and use
6818  * helper function so that radeon_device_init pretty much
6819  * do nothing more than calling asic specific function. This
6820  * should also allow to remove a bunch of callback function
6821  * like vram_info.
6822  */
6823 int si_init(struct radeon_device *rdev)
6824 {
6825 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6826 	int r;
6827 
6828 	/* Read BIOS */
6829 	if (!radeon_get_bios(rdev)) {
6830 		if (ASIC_IS_AVIVO(rdev))
6831 			return -EINVAL;
6832 	}
6833 	/* Must be an ATOMBIOS */
6834 	if (!rdev->is_atom_bios) {
6835 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6836 		return -EINVAL;
6837 	}
6838 	r = radeon_atombios_init(rdev);
6839 	if (r)
6840 		return r;
6841 
6842 	/* Post card if necessary */
6843 	if (!radeon_card_posted(rdev)) {
6844 		if (!rdev->bios) {
6845 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6846 			return -EINVAL;
6847 		}
6848 		DRM_INFO("GPU not posted. posting now...\n");
6849 		atom_asic_init(rdev->mode_info.atom_context);
6850 	}
6851 	/* init golden registers */
6852 	si_init_golden_registers(rdev);
6853 	/* Initialize scratch registers */
6854 	si_scratch_init(rdev);
6855 	/* Initialize surface registers */
6856 	radeon_surface_init(rdev);
6857 	/* Initialize clocks */
6858 	radeon_get_clock_info(rdev->ddev);
6859 
6860 	/* Fence driver */
6861 	r = radeon_fence_driver_init(rdev);
6862 	if (r)
6863 		return r;
6864 
6865 	/* initialize memory controller */
6866 	r = si_mc_init(rdev);
6867 	if (r)
6868 		return r;
6869 	/* Memory manager */
6870 	r = radeon_bo_init(rdev);
6871 	if (r)
6872 		return r;
6873 
6874 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6875 	    !rdev->rlc_fw || !rdev->mc_fw) {
6876 		r = si_init_microcode(rdev);
6877 		if (r) {
6878 			DRM_ERROR("Failed to load firmware!\n");
6879 			return r;
6880 		}
6881 	}
6882 
6883 	/* Initialize power management */
6884 	radeon_pm_init(rdev);
6885 
6886 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6887 	ring->ring_obj = NULL;
6888 	r600_ring_init(rdev, ring, 1024 * 1024);
6889 
6890 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6891 	ring->ring_obj = NULL;
6892 	r600_ring_init(rdev, ring, 1024 * 1024);
6893 
6894 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6895 	ring->ring_obj = NULL;
6896 	r600_ring_init(rdev, ring, 1024 * 1024);
6897 
6898 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6899 	ring->ring_obj = NULL;
6900 	r600_ring_init(rdev, ring, 64 * 1024);
6901 
6902 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6903 	ring->ring_obj = NULL;
6904 	r600_ring_init(rdev, ring, 64 * 1024);
6905 
6906 	si_uvd_init(rdev);
6907 	si_vce_init(rdev);
6908 
6909 	rdev->ih.ring_obj = NULL;
6910 	r600_ih_ring_init(rdev, 64 * 1024);
6911 
6912 	r = r600_pcie_gart_init(rdev);
6913 	if (r)
6914 		return r;
6915 
6916 	rdev->accel_working = true;
6917 	r = si_startup(rdev);
6918 	if (r) {
6919 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6920 		si_cp_fini(rdev);
6921 		cayman_dma_fini(rdev);
6922 		si_irq_fini(rdev);
6923 		sumo_rlc_fini(rdev);
6924 		radeon_wb_fini(rdev);
6925 		radeon_ib_pool_fini(rdev);
6926 		radeon_vm_manager_fini(rdev);
6927 		radeon_irq_kms_fini(rdev);
6928 		si_pcie_gart_fini(rdev);
6929 		rdev->accel_working = false;
6930 	}
6931 
6932 	/* Don't start up if the MC ucode is missing.
6933 	 * The default clocks and voltages before the MC ucode
6934 	 * is loaded are not suffient for advanced operations.
6935 	 */
6936 	if (!rdev->mc_fw) {
6937 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6938 		return -EINVAL;
6939 	}
6940 
6941 	return 0;
6942 }
6943 
6944 void si_fini(struct radeon_device *rdev)
6945 {
6946 	radeon_pm_fini(rdev);
6947 	si_cp_fini(rdev);
6948 	cayman_dma_fini(rdev);
6949 	si_fini_pg(rdev);
6950 	si_fini_cg(rdev);
6951 	si_irq_fini(rdev);
6952 	sumo_rlc_fini(rdev);
6953 	radeon_wb_fini(rdev);
6954 	radeon_vm_manager_fini(rdev);
6955 	radeon_ib_pool_fini(rdev);
6956 	radeon_irq_kms_fini(rdev);
6957 	if (rdev->has_uvd) {
6958 		uvd_v1_0_fini(rdev);
6959 		radeon_uvd_fini(rdev);
6960 	}
6961 	if (rdev->has_vce)
6962 		radeon_vce_fini(rdev);
6963 	si_pcie_gart_fini(rdev);
6964 	r600_vram_scratch_fini(rdev);
6965 	radeon_gem_fini(rdev);
6966 	radeon_fence_driver_fini(rdev);
6967 	radeon_bo_fini(rdev);
6968 	radeon_atombios_fini(rdev);
6969 	kfree(rdev->bios);
6970 	rdev->bios = NULL;
6971 }
6972 
6973 /**
6974  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6975  *
6976  * @rdev: radeon_device pointer
6977  *
6978  * Fetches a GPU clock counter snapshot (SI).
6979  * Returns the 64 bit clock counter snapshot.
6980  */
6981 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6982 {
6983 	uint64_t clock;
6984 
6985 	mutex_lock(&rdev->gpu_clock_mutex);
6986 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6987 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6988 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6989 	mutex_unlock(&rdev->gpu_clock_mutex);
6990 	return clock;
6991 }
6992 
6993 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6994 {
6995 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6996 	int r;
6997 
6998 	/* bypass vclk and dclk with bclk */
6999 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7000 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7001 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7002 
7003 	/* put PLL in bypass mode */
7004 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7005 
7006 	if (!vclk || !dclk) {
7007 		/* keep the Bypass mode */
7008 		return 0;
7009 	}
7010 
7011 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7012 					  16384, 0x03FFFFFF, 0, 128, 5,
7013 					  &fb_div, &vclk_div, &dclk_div);
7014 	if (r)
7015 		return r;
7016 
7017 	/* set RESET_ANTI_MUX to 0 */
7018 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7019 
7020 	/* set VCO_MODE to 1 */
7021 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7022 
7023 	/* disable sleep mode */
7024 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7025 
7026 	/* deassert UPLL_RESET */
7027 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7028 
7029 	mdelay(1);
7030 
7031 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7032 	if (r)
7033 		return r;
7034 
7035 	/* assert UPLL_RESET again */
7036 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7037 
7038 	/* disable spread spectrum. */
7039 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7040 
7041 	/* set feedback divider */
7042 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7043 
7044 	/* set ref divider to 0 */
7045 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7046 
7047 	if (fb_div < 307200)
7048 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7049 	else
7050 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7051 
7052 	/* set PDIV_A and PDIV_B */
7053 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7054 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7055 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7056 
7057 	/* give the PLL some time to settle */
7058 	mdelay(15);
7059 
7060 	/* deassert PLL_RESET */
7061 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7062 
7063 	mdelay(15);
7064 
7065 	/* switch from bypass mode to normal mode */
7066 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7067 
7068 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7069 	if (r)
7070 		return r;
7071 
7072 	/* switch VCLK and DCLK selection */
7073 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7074 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7075 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7076 
7077 	mdelay(100);
7078 
7079 	return 0;
7080 }
7081 
7082 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7083 {
7084 	struct pci_dev *root = rdev->pdev->bus->self;
7085 	enum pci_bus_speed speed_cap;
7086 	int bridge_pos, gpu_pos;
7087 	u32 speed_cntl, current_data_rate;
7088 	int i;
7089 	u16 tmp16;
7090 
7091 	if (pci_is_root_bus(rdev->pdev->bus))
7092 		return;
7093 
7094 	if (radeon_pcie_gen2 == 0)
7095 		return;
7096 
7097 	if (rdev->flags & RADEON_IS_IGP)
7098 		return;
7099 
7100 	if (!(rdev->flags & RADEON_IS_PCIE))
7101 		return;
7102 
7103 	speed_cap = pcie_get_speed_cap(root);
7104 	if (speed_cap == PCI_SPEED_UNKNOWN)
7105 		return;
7106 
7107 	if ((speed_cap != PCIE_SPEED_8_0GT) &&
7108 	    (speed_cap != PCIE_SPEED_5_0GT))
7109 		return;
7110 
7111 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7112 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7113 		LC_CURRENT_DATA_RATE_SHIFT;
7114 	if (speed_cap == PCIE_SPEED_8_0GT) {
7115 		if (current_data_rate == 2) {
7116 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7117 			return;
7118 		}
7119 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7120 	} else if (speed_cap == PCIE_SPEED_5_0GT) {
7121 		if (current_data_rate == 1) {
7122 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7123 			return;
7124 		}
7125 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7126 	}
7127 
7128 	bridge_pos = pci_pcie_cap(root);
7129 	if (!bridge_pos)
7130 		return;
7131 
7132 	gpu_pos = pci_pcie_cap(rdev->pdev);
7133 	if (!gpu_pos)
7134 		return;
7135 
7136 	if (speed_cap == PCIE_SPEED_8_0GT) {
7137 		/* re-try equalization if gen3 is not already enabled */
7138 		if (current_data_rate != 2) {
7139 			u16 bridge_cfg, gpu_cfg;
7140 			u16 bridge_cfg2, gpu_cfg2;
7141 			u32 max_lw, current_lw, tmp;
7142 
7143 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7144 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7145 
7146 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7147 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7148 
7149 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7150 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7151 
7152 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7153 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7154 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7155 
7156 			if (current_lw < max_lw) {
7157 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7158 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7159 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7160 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7161 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7162 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7163 				}
7164 			}
7165 
7166 			for (i = 0; i < 10; i++) {
7167 				/* check status */
7168 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7169 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7170 					break;
7171 
7172 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7173 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7174 
7175 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7176 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7177 
7178 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7179 				tmp |= LC_SET_QUIESCE;
7180 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7181 
7182 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7183 				tmp |= LC_REDO_EQ;
7184 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7185 
7186 				msleep(100);
7187 
7188 				/* linkctl */
7189 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7190 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7191 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7192 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7193 
7194 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7195 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7196 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7197 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7198 
7199 				/* linkctl2 */
7200 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7201 				tmp16 &= ~((1 << 4) | (7 << 9));
7202 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7203 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7204 
7205 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7206 				tmp16 &= ~((1 << 4) | (7 << 9));
7207 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7208 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7209 
7210 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7211 				tmp &= ~LC_SET_QUIESCE;
7212 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7213 			}
7214 		}
7215 	}
7216 
7217 	/* set the link speed */
7218 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7219 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7220 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7221 
7222 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7223 	tmp16 &= ~0xf;
7224 	if (speed_cap == PCIE_SPEED_8_0GT)
7225 		tmp16 |= 3; /* gen3 */
7226 	else if (speed_cap == PCIE_SPEED_5_0GT)
7227 		tmp16 |= 2; /* gen2 */
7228 	else
7229 		tmp16 |= 1; /* gen1 */
7230 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7231 
7232 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7233 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7234 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7235 
7236 	for (i = 0; i < rdev->usec_timeout; i++) {
7237 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7238 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7239 			break;
7240 		udelay(1);
7241 	}
7242 }
7243 
7244 static void si_program_aspm(struct radeon_device *rdev)
7245 {
7246 	u32 data, orig;
7247 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7248 	bool disable_clkreq = false;
7249 
7250 	if (radeon_aspm == 0)
7251 		return;
7252 
7253 	if (!(rdev->flags & RADEON_IS_PCIE))
7254 		return;
7255 
7256 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7257 	data &= ~LC_XMIT_N_FTS_MASK;
7258 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7259 	if (orig != data)
7260 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7261 
7262 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7263 	data |= LC_GO_TO_RECOVERY;
7264 	if (orig != data)
7265 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7266 
7267 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7268 	data |= P_IGNORE_EDB_ERR;
7269 	if (orig != data)
7270 		WREG32_PCIE(PCIE_P_CNTL, data);
7271 
7272 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7273 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7274 	data |= LC_PMI_TO_L1_DIS;
7275 	if (!disable_l0s)
7276 		data |= LC_L0S_INACTIVITY(7);
7277 
7278 	if (!disable_l1) {
7279 		data |= LC_L1_INACTIVITY(7);
7280 		data &= ~LC_PMI_TO_L1_DIS;
7281 		if (orig != data)
7282 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7283 
7284 		if (!disable_plloff_in_l1) {
7285 			bool clk_req_support;
7286 
7287 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7288 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7289 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7290 			if (orig != data)
7291 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7292 
7293 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7294 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7295 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7296 			if (orig != data)
7297 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7298 
7299 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7300 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7301 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7302 			if (orig != data)
7303 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7304 
7305 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7306 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7307 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7308 			if (orig != data)
7309 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7310 
7311 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7312 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7313 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7314 				if (orig != data)
7315 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7316 
7317 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7318 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7319 				if (orig != data)
7320 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7321 
7322 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7323 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7324 				if (orig != data)
7325 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7326 
7327 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7328 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7329 				if (orig != data)
7330 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7331 
7332 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7333 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7334 				if (orig != data)
7335 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7336 
7337 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7338 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7339 				if (orig != data)
7340 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7341 
7342 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7343 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7344 				if (orig != data)
7345 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7346 
7347 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7348 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7349 				if (orig != data)
7350 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7351 			}
7352 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7353 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7354 			data |= LC_DYN_LANES_PWR_STATE(3);
7355 			if (orig != data)
7356 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7357 
7358 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7359 			data &= ~LS2_EXIT_TIME_MASK;
7360 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7361 				data |= LS2_EXIT_TIME(5);
7362 			if (orig != data)
7363 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7364 
7365 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7366 			data &= ~LS2_EXIT_TIME_MASK;
7367 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7368 				data |= LS2_EXIT_TIME(5);
7369 			if (orig != data)
7370 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7371 
7372 			if (!disable_clkreq &&
7373 			    !pci_is_root_bus(rdev->pdev->bus)) {
7374 				struct pci_dev *root = rdev->pdev->bus->self;
7375 				u32 lnkcap;
7376 
7377 				clk_req_support = false;
7378 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7379 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7380 					clk_req_support = true;
7381 			} else {
7382 				clk_req_support = false;
7383 			}
7384 
7385 			if (clk_req_support) {
7386 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7387 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7388 				if (orig != data)
7389 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7390 
7391 				orig = data = RREG32(THM_CLK_CNTL);
7392 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7393 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7394 				if (orig != data)
7395 					WREG32(THM_CLK_CNTL, data);
7396 
7397 				orig = data = RREG32(MISC_CLK_CNTL);
7398 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7399 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7400 				if (orig != data)
7401 					WREG32(MISC_CLK_CNTL, data);
7402 
7403 				orig = data = RREG32(CG_CLKPIN_CNTL);
7404 				data &= ~BCLK_AS_XCLK;
7405 				if (orig != data)
7406 					WREG32(CG_CLKPIN_CNTL, data);
7407 
7408 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7409 				data &= ~FORCE_BIF_REFCLK_EN;
7410 				if (orig != data)
7411 					WREG32(CG_CLKPIN_CNTL_2, data);
7412 
7413 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7414 				data &= ~MPLL_CLKOUT_SEL_MASK;
7415 				data |= MPLL_CLKOUT_SEL(4);
7416 				if (orig != data)
7417 					WREG32(MPLL_BYPASSCLK_SEL, data);
7418 
7419 				orig = data = RREG32(SPLL_CNTL_MODE);
7420 				data &= ~SPLL_REFCLK_SEL_MASK;
7421 				if (orig != data)
7422 					WREG32(SPLL_CNTL_MODE, data);
7423 			}
7424 		}
7425 	} else {
7426 		if (orig != data)
7427 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7428 	}
7429 
7430 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7431 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7432 	if (orig != data)
7433 		WREG32_PCIE(PCIE_CNTL2, data);
7434 
7435 	if (!disable_l0s) {
7436 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7437 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7438 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7439 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7440 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7441 				data &= ~LC_L0S_INACTIVITY_MASK;
7442 				if (orig != data)
7443 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7444 			}
7445 		}
7446 	}
7447 }
7448 
7449 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7450 {
7451 	unsigned i;
7452 
7453 	/* make sure VCEPLL_CTLREQ is deasserted */
7454 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7455 
7456 	mdelay(10);
7457 
7458 	/* assert UPLL_CTLREQ */
7459 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7460 
7461 	/* wait for CTLACK and CTLACK2 to get asserted */
7462 	for (i = 0; i < 100; ++i) {
7463 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7464 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7465 			break;
7466 		mdelay(10);
7467 	}
7468 
7469 	/* deassert UPLL_CTLREQ */
7470 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7471 
7472 	if (i == 100) {
7473 		DRM_ERROR("Timeout setting UVD clocks!\n");
7474 		return -ETIMEDOUT;
7475 	}
7476 
7477 	return 0;
7478 }
7479 
7480 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7481 {
7482 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7483 	int r;
7484 
7485 	/* bypass evclk and ecclk with bclk */
7486 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7487 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7488 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7489 
7490 	/* put PLL in bypass mode */
7491 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7492 		     ~VCEPLL_BYPASS_EN_MASK);
7493 
7494 	if (!evclk || !ecclk) {
7495 		/* keep the Bypass mode, put PLL to sleep */
7496 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7497 			     ~VCEPLL_SLEEP_MASK);
7498 		return 0;
7499 	}
7500 
7501 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7502 					  16384, 0x03FFFFFF, 0, 128, 5,
7503 					  &fb_div, &evclk_div, &ecclk_div);
7504 	if (r)
7505 		return r;
7506 
7507 	/* set RESET_ANTI_MUX to 0 */
7508 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7509 
7510 	/* set VCO_MODE to 1 */
7511 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7512 		     ~VCEPLL_VCO_MODE_MASK);
7513 
7514 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7515 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7516 		     ~VCEPLL_SLEEP_MASK);
7517 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7518 
7519 	/* deassert VCEPLL_RESET */
7520 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7521 
7522 	mdelay(1);
7523 
7524 	r = si_vce_send_vcepll_ctlreq(rdev);
7525 	if (r)
7526 		return r;
7527 
7528 	/* assert VCEPLL_RESET again */
7529 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7530 
7531 	/* disable spread spectrum. */
7532 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7533 
7534 	/* set feedback divider */
7535 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7536 
7537 	/* set ref divider to 0 */
7538 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7539 
7540 	/* set PDIV_A and PDIV_B */
7541 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7542 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7543 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7544 
7545 	/* give the PLL some time to settle */
7546 	mdelay(15);
7547 
7548 	/* deassert PLL_RESET */
7549 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7550 
7551 	mdelay(15);
7552 
7553 	/* switch from bypass mode to normal mode */
7554 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7555 
7556 	r = si_vce_send_vcepll_ctlreq(rdev);
7557 	if (r)
7558 		return r;
7559 
7560 	/* switch VCLK and DCLK selection */
7561 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7562 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7563 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7564 
7565 	mdelay(100);
7566 
7567 	return 0;
7568 }
7569