xref: /dragonfly/sys/dev/drm/radeon/ni.c (revision 9ebbd47d)
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include <uapi_drm/radeon_drm.h>
30 #include "nid.h"
31 #include "atom.h"
32 #include "ni_reg.h"
33 #include "cayman_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_cayman.h"
36 
37 static const u32 tn_rlc_save_restore_register_list[] =
38 {
39 	0x98fc,
40 	0x98f0,
41 	0x9834,
42 	0x9838,
43 	0x9870,
44 	0x9874,
45 	0x8a14,
46 	0x8b24,
47 	0x8bcc,
48 	0x8b10,
49 	0x8c30,
50 	0x8d00,
51 	0x8d04,
52 	0x8c00,
53 	0x8c04,
54 	0x8c10,
55 	0x8c14,
56 	0x8d8c,
57 	0x8cf0,
58 	0x8e38,
59 	0x9508,
60 	0x9688,
61 	0x9608,
62 	0x960c,
63 	0x9610,
64 	0x9614,
65 	0x88c4,
66 	0x8978,
67 	0x88d4,
68 	0x900c,
69 	0x9100,
70 	0x913c,
71 	0x90e8,
72 	0x9354,
73 	0xa008,
74 	0x98f8,
75 	0x9148,
76 	0x914c,
77 	0x3f94,
78 	0x98f4,
79 	0x9b7c,
80 	0x3f8c,
81 	0x8950,
82 	0x8954,
83 	0x8a18,
84 	0x8b28,
85 	0x9144,
86 	0x3f90,
87 	0x915c,
88 	0x9160,
89 	0x9178,
90 	0x917c,
91 	0x9180,
92 	0x918c,
93 	0x9190,
94 	0x9194,
95 	0x9198,
96 	0x919c,
97 	0x91a8,
98 	0x91ac,
99 	0x91b0,
100 	0x91b4,
101 	0x91b8,
102 	0x91c4,
103 	0x91c8,
104 	0x91cc,
105 	0x91d0,
106 	0x91d4,
107 	0x91e0,
108 	0x91e4,
109 	0x91ec,
110 	0x91f0,
111 	0x91f4,
112 	0x9200,
113 	0x9204,
114 	0x929c,
115 	0x8030,
116 	0x9150,
117 	0x9a60,
118 	0x920c,
119 	0x9210,
120 	0x9228,
121 	0x922c,
122 	0x9244,
123 	0x9248,
124 	0x91e8,
125 	0x9294,
126 	0x9208,
127 	0x9224,
128 	0x9240,
129 	0x9220,
130 	0x923c,
131 	0x9258,
132 	0x9744,
133 	0xa200,
134 	0xa204,
135 	0xa208,
136 	0xa20c,
137 	0x8d58,
138 	0x9030,
139 	0x9034,
140 	0x9038,
141 	0x903c,
142 	0x9040,
143 	0x9654,
144 	0x897c,
145 	0xa210,
146 	0xa214,
147 	0x9868,
148 	0xa02c,
149 	0x9664,
150 	0x9698,
151 	0x949c,
152 	0x8e10,
153 	0x8e18,
154 	0x8c50,
155 	0x8c58,
156 	0x8c60,
157 	0x8c68,
158 	0x89b4,
159 	0x9830,
160 	0x802c,
161 };
162 
163 /* Firmware Names */
164 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
165 MODULE_FIRMWARE("radeon/BARTS_me.bin");
166 MODULE_FIRMWARE("radeon/BARTS_mc.bin");
167 MODULE_FIRMWARE("radeon/BARTS_smc.bin");
168 MODULE_FIRMWARE("radeon/BTC_rlc.bin");
169 MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
170 MODULE_FIRMWARE("radeon/TURKS_me.bin");
171 MODULE_FIRMWARE("radeon/TURKS_mc.bin");
172 MODULE_FIRMWARE("radeon/TURKS_smc.bin");
173 MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
174 MODULE_FIRMWARE("radeon/CAICOS_me.bin");
175 MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
176 MODULE_FIRMWARE("radeon/CAICOS_smc.bin");
177 MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
178 MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
179 MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
180 MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
181 MODULE_FIRMWARE("radeon/CAYMAN_smc.bin");
182 MODULE_FIRMWARE("radeon/ARUBA_pfp.bin");
183 MODULE_FIRMWARE("radeon/ARUBA_me.bin");
184 MODULE_FIRMWARE("radeon/ARUBA_rlc.bin");
185 
186 
187 static const u32 cayman_golden_registers2[] =
188 {
189 	0x3e5c, 0xffffffff, 0x00000000,
190 	0x3e48, 0xffffffff, 0x00000000,
191 	0x3e4c, 0xffffffff, 0x00000000,
192 	0x3e64, 0xffffffff, 0x00000000,
193 	0x3e50, 0xffffffff, 0x00000000,
194 	0x3e60, 0xffffffff, 0x00000000
195 };
196 
197 static const u32 cayman_golden_registers[] =
198 {
199 	0x5eb4, 0xffffffff, 0x00000002,
200 	0x5e78, 0x8f311ff1, 0x001000f0,
201 	0x3f90, 0xffff0000, 0xff000000,
202 	0x9148, 0xffff0000, 0xff000000,
203 	0x3f94, 0xffff0000, 0xff000000,
204 	0x914c, 0xffff0000, 0xff000000,
205 	0xc78, 0x00000080, 0x00000080,
206 	0xbd4, 0x70073777, 0x00011003,
207 	0xd02c, 0xbfffff1f, 0x08421000,
208 	0xd0b8, 0x73773777, 0x02011003,
209 	0x5bc0, 0x00200000, 0x50100000,
210 	0x98f8, 0x33773777, 0x02011003,
211 	0x98fc, 0xffffffff, 0x76541032,
212 	0x7030, 0x31000311, 0x00000011,
213 	0x2f48, 0x33773777, 0x42010001,
214 	0x6b28, 0x00000010, 0x00000012,
215 	0x7728, 0x00000010, 0x00000012,
216 	0x10328, 0x00000010, 0x00000012,
217 	0x10f28, 0x00000010, 0x00000012,
218 	0x11b28, 0x00000010, 0x00000012,
219 	0x12728, 0x00000010, 0x00000012,
220 	0x240c, 0x000007ff, 0x00000000,
221 	0x8a14, 0xf000001f, 0x00000007,
222 	0x8b24, 0x3fff3fff, 0x00ff0fff,
223 	0x8b10, 0x0000ff0f, 0x00000000,
224 	0x28a4c, 0x07ffffff, 0x06000000,
225 	0x10c, 0x00000001, 0x00010003,
226 	0xa02c, 0xffffffff, 0x0000009b,
227 	0x913c, 0x0000010f, 0x01000100,
228 	0x8c04, 0xf8ff00ff, 0x40600060,
229 	0x28350, 0x00000f01, 0x00000000,
230 	0x9508, 0x3700001f, 0x00000002,
231 	0x960c, 0xffffffff, 0x54763210,
232 	0x88c4, 0x001f3ae3, 0x00000082,
233 	0x88d0, 0xffffffff, 0x0f40df40,
234 	0x88d4, 0x0000001f, 0x00000010,
235 	0x8974, 0xffffffff, 0x00000000
236 };
237 
238 static const u32 dvst_golden_registers2[] =
239 {
240 	0x8f8, 0xffffffff, 0,
241 	0x8fc, 0x00380000, 0,
242 	0x8f8, 0xffffffff, 1,
243 	0x8fc, 0x0e000000, 0
244 };
245 
246 static const u32 dvst_golden_registers[] =
247 {
248 	0x690, 0x3fff3fff, 0x20c00033,
249 	0x918c, 0x0fff0fff, 0x00010006,
250 	0x91a8, 0x0fff0fff, 0x00010006,
251 	0x9150, 0xffffdfff, 0x6e944040,
252 	0x917c, 0x0fff0fff, 0x00030002,
253 	0x9198, 0x0fff0fff, 0x00030002,
254 	0x915c, 0x0fff0fff, 0x00010000,
255 	0x3f90, 0xffff0001, 0xff000000,
256 	0x9178, 0x0fff0fff, 0x00070000,
257 	0x9194, 0x0fff0fff, 0x00070000,
258 	0x9148, 0xffff0001, 0xff000000,
259 	0x9190, 0x0fff0fff, 0x00090008,
260 	0x91ac, 0x0fff0fff, 0x00090008,
261 	0x3f94, 0xffff0000, 0xff000000,
262 	0x914c, 0xffff0000, 0xff000000,
263 	0x929c, 0x00000fff, 0x00000001,
264 	0x55e4, 0xff607fff, 0xfc000100,
265 	0x8a18, 0xff000fff, 0x00000100,
266 	0x8b28, 0xff000fff, 0x00000100,
267 	0x9144, 0xfffc0fff, 0x00000100,
268 	0x6ed8, 0x00010101, 0x00010000,
269 	0x9830, 0xffffffff, 0x00000000,
270 	0x9834, 0xf00fffff, 0x00000400,
271 	0x9838, 0xfffffffe, 0x00000000,
272 	0xd0c0, 0xff000fff, 0x00000100,
273 	0xd02c, 0xbfffff1f, 0x08421000,
274 	0xd0b8, 0x73773777, 0x12010001,
275 	0x5bb0, 0x000000f0, 0x00000070,
276 	0x98f8, 0x73773777, 0x12010001,
277 	0x98fc, 0xffffffff, 0x00000010,
278 	0x9b7c, 0x00ff0000, 0x00fc0000,
279 	0x8030, 0x00001f0f, 0x0000100a,
280 	0x2f48, 0x73773777, 0x12010001,
281 	0x2408, 0x00030000, 0x000c007f,
282 	0x8a14, 0xf000003f, 0x00000007,
283 	0x8b24, 0x3fff3fff, 0x00ff0fff,
284 	0x8b10, 0x0000ff0f, 0x00000000,
285 	0x28a4c, 0x07ffffff, 0x06000000,
286 	0x4d8, 0x00000fff, 0x00000100,
287 	0xa008, 0xffffffff, 0x00010000,
288 	0x913c, 0xffff03ff, 0x01000100,
289 	0x8c00, 0x000000ff, 0x00000003,
290 	0x8c04, 0xf8ff00ff, 0x40600060,
291 	0x8cf0, 0x1fff1fff, 0x08e00410,
292 	0x28350, 0x00000f01, 0x00000000,
293 	0x9508, 0xf700071f, 0x00000002,
294 	0x960c, 0xffffffff, 0x54763210,
295 	0x20ef8, 0x01ff01ff, 0x00000002,
296 	0x20e98, 0xfffffbff, 0x00200000,
297 	0x2015c, 0xffffffff, 0x00000f40,
298 	0x88c4, 0x001f3ae3, 0x00000082,
299 	0x8978, 0x3fffffff, 0x04050140,
300 	0x88d4, 0x0000001f, 0x00000010,
301 	0x8974, 0xffffffff, 0x00000000
302 };
303 
304 static const u32 scrapper_golden_registers[] =
305 {
306 	0x690, 0x3fff3fff, 0x20c00033,
307 	0x918c, 0x0fff0fff, 0x00010006,
308 	0x918c, 0x0fff0fff, 0x00010006,
309 	0x91a8, 0x0fff0fff, 0x00010006,
310 	0x91a8, 0x0fff0fff, 0x00010006,
311 	0x9150, 0xffffdfff, 0x6e944040,
312 	0x9150, 0xffffdfff, 0x6e944040,
313 	0x917c, 0x0fff0fff, 0x00030002,
314 	0x917c, 0x0fff0fff, 0x00030002,
315 	0x9198, 0x0fff0fff, 0x00030002,
316 	0x9198, 0x0fff0fff, 0x00030002,
317 	0x915c, 0x0fff0fff, 0x00010000,
318 	0x915c, 0x0fff0fff, 0x00010000,
319 	0x3f90, 0xffff0001, 0xff000000,
320 	0x3f90, 0xffff0001, 0xff000000,
321 	0x9178, 0x0fff0fff, 0x00070000,
322 	0x9178, 0x0fff0fff, 0x00070000,
323 	0x9194, 0x0fff0fff, 0x00070000,
324 	0x9194, 0x0fff0fff, 0x00070000,
325 	0x9148, 0xffff0001, 0xff000000,
326 	0x9148, 0xffff0001, 0xff000000,
327 	0x9190, 0x0fff0fff, 0x00090008,
328 	0x9190, 0x0fff0fff, 0x00090008,
329 	0x91ac, 0x0fff0fff, 0x00090008,
330 	0x91ac, 0x0fff0fff, 0x00090008,
331 	0x3f94, 0xffff0000, 0xff000000,
332 	0x3f94, 0xffff0000, 0xff000000,
333 	0x914c, 0xffff0000, 0xff000000,
334 	0x914c, 0xffff0000, 0xff000000,
335 	0x929c, 0x00000fff, 0x00000001,
336 	0x929c, 0x00000fff, 0x00000001,
337 	0x55e4, 0xff607fff, 0xfc000100,
338 	0x8a18, 0xff000fff, 0x00000100,
339 	0x8a18, 0xff000fff, 0x00000100,
340 	0x8b28, 0xff000fff, 0x00000100,
341 	0x8b28, 0xff000fff, 0x00000100,
342 	0x9144, 0xfffc0fff, 0x00000100,
343 	0x9144, 0xfffc0fff, 0x00000100,
344 	0x6ed8, 0x00010101, 0x00010000,
345 	0x9830, 0xffffffff, 0x00000000,
346 	0x9830, 0xffffffff, 0x00000000,
347 	0x9834, 0xf00fffff, 0x00000400,
348 	0x9834, 0xf00fffff, 0x00000400,
349 	0x9838, 0xfffffffe, 0x00000000,
350 	0x9838, 0xfffffffe, 0x00000000,
351 	0xd0c0, 0xff000fff, 0x00000100,
352 	0xd02c, 0xbfffff1f, 0x08421000,
353 	0xd02c, 0xbfffff1f, 0x08421000,
354 	0xd0b8, 0x73773777, 0x12010001,
355 	0xd0b8, 0x73773777, 0x12010001,
356 	0x5bb0, 0x000000f0, 0x00000070,
357 	0x98f8, 0x73773777, 0x12010001,
358 	0x98f8, 0x73773777, 0x12010001,
359 	0x98fc, 0xffffffff, 0x00000010,
360 	0x98fc, 0xffffffff, 0x00000010,
361 	0x9b7c, 0x00ff0000, 0x00fc0000,
362 	0x9b7c, 0x00ff0000, 0x00fc0000,
363 	0x8030, 0x00001f0f, 0x0000100a,
364 	0x8030, 0x00001f0f, 0x0000100a,
365 	0x2f48, 0x73773777, 0x12010001,
366 	0x2f48, 0x73773777, 0x12010001,
367 	0x2408, 0x00030000, 0x000c007f,
368 	0x8a14, 0xf000003f, 0x00000007,
369 	0x8a14, 0xf000003f, 0x00000007,
370 	0x8b24, 0x3fff3fff, 0x00ff0fff,
371 	0x8b24, 0x3fff3fff, 0x00ff0fff,
372 	0x8b10, 0x0000ff0f, 0x00000000,
373 	0x8b10, 0x0000ff0f, 0x00000000,
374 	0x28a4c, 0x07ffffff, 0x06000000,
375 	0x28a4c, 0x07ffffff, 0x06000000,
376 	0x4d8, 0x00000fff, 0x00000100,
377 	0x4d8, 0x00000fff, 0x00000100,
378 	0xa008, 0xffffffff, 0x00010000,
379 	0xa008, 0xffffffff, 0x00010000,
380 	0x913c, 0xffff03ff, 0x01000100,
381 	0x913c, 0xffff03ff, 0x01000100,
382 	0x90e8, 0x001fffff, 0x010400c0,
383 	0x8c00, 0x000000ff, 0x00000003,
384 	0x8c00, 0x000000ff, 0x00000003,
385 	0x8c04, 0xf8ff00ff, 0x40600060,
386 	0x8c04, 0xf8ff00ff, 0x40600060,
387 	0x8c30, 0x0000000f, 0x00040005,
388 	0x8cf0, 0x1fff1fff, 0x08e00410,
389 	0x8cf0, 0x1fff1fff, 0x08e00410,
390 	0x900c, 0x00ffffff, 0x0017071f,
391 	0x28350, 0x00000f01, 0x00000000,
392 	0x28350, 0x00000f01, 0x00000000,
393 	0x9508, 0xf700071f, 0x00000002,
394 	0x9508, 0xf700071f, 0x00000002,
395 	0x9688, 0x00300000, 0x0017000f,
396 	0x960c, 0xffffffff, 0x54763210,
397 	0x960c, 0xffffffff, 0x54763210,
398 	0x20ef8, 0x01ff01ff, 0x00000002,
399 	0x20e98, 0xfffffbff, 0x00200000,
400 	0x2015c, 0xffffffff, 0x00000f40,
401 	0x88c4, 0x001f3ae3, 0x00000082,
402 	0x88c4, 0x001f3ae3, 0x00000082,
403 	0x8978, 0x3fffffff, 0x04050140,
404 	0x8978, 0x3fffffff, 0x04050140,
405 	0x88d4, 0x0000001f, 0x00000010,
406 	0x88d4, 0x0000001f, 0x00000010,
407 	0x8974, 0xffffffff, 0x00000000,
408 	0x8974, 0xffffffff, 0x00000000
409 };
410 
411 static void ni_init_golden_registers(struct radeon_device *rdev)
412 {
413 	switch (rdev->family) {
414 	case CHIP_CAYMAN:
415 		radeon_program_register_sequence(rdev,
416 						 cayman_golden_registers,
417 						 (const u32)ARRAY_SIZE(cayman_golden_registers));
418 		radeon_program_register_sequence(rdev,
419 						 cayman_golden_registers2,
420 						 (const u32)ARRAY_SIZE(cayman_golden_registers2));
421 		break;
422 	case CHIP_ARUBA:
423 		if ((rdev->ddev->pci_device == 0x9900) ||
424 		    (rdev->ddev->pci_device == 0x9901) ||
425 		    (rdev->ddev->pci_device == 0x9903) ||
426 		    (rdev->ddev->pci_device == 0x9904) ||
427 		    (rdev->ddev->pci_device == 0x9905) ||
428 		    (rdev->ddev->pci_device == 0x9906) ||
429 		    (rdev->ddev->pci_device == 0x9907) ||
430 		    (rdev->ddev->pci_device == 0x9908) ||
431 		    (rdev->ddev->pci_device == 0x9909) ||
432 		    (rdev->ddev->pci_device == 0x990A) ||
433 		    (rdev->ddev->pci_device == 0x990B) ||
434 		    (rdev->ddev->pci_device == 0x990C) ||
435 		    (rdev->ddev->pci_device == 0x990D) ||
436 		    (rdev->ddev->pci_device == 0x990E) ||
437 		    (rdev->ddev->pci_device == 0x990F) ||
438 		    (rdev->ddev->pci_device == 0x9910) ||
439 		    (rdev->ddev->pci_device == 0x9913) ||
440 		    (rdev->ddev->pci_device == 0x9917) ||
441 		    (rdev->ddev->pci_device == 0x9918)) {
442 			radeon_program_register_sequence(rdev,
443 							 dvst_golden_registers,
444 							 (const u32)ARRAY_SIZE(dvst_golden_registers));
445 			radeon_program_register_sequence(rdev,
446 							 dvst_golden_registers2,
447 							 (const u32)ARRAY_SIZE(dvst_golden_registers2));
448 		} else {
449 			radeon_program_register_sequence(rdev,
450 							 scrapper_golden_registers,
451 							 (const u32)ARRAY_SIZE(scrapper_golden_registers));
452 			radeon_program_register_sequence(rdev,
453 							 dvst_golden_registers2,
454 							 (const u32)ARRAY_SIZE(dvst_golden_registers2));
455 		}
456 		break;
457 	default:
458 		break;
459 	}
460 }
461 
462 #define BTC_IO_MC_REGS_SIZE 29
463 
464 static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
465 	{0x00000077, 0xff010100},
466 	{0x00000078, 0x00000000},
467 	{0x00000079, 0x00001434},
468 	{0x0000007a, 0xcc08ec08},
469 	{0x0000007b, 0x00040000},
470 	{0x0000007c, 0x000080c0},
471 	{0x0000007d, 0x09000000},
472 	{0x0000007e, 0x00210404},
473 	{0x00000081, 0x08a8e800},
474 	{0x00000082, 0x00030444},
475 	{0x00000083, 0x00000000},
476 	{0x00000085, 0x00000001},
477 	{0x00000086, 0x00000002},
478 	{0x00000087, 0x48490000},
479 	{0x00000088, 0x20244647},
480 	{0x00000089, 0x00000005},
481 	{0x0000008b, 0x66030000},
482 	{0x0000008c, 0x00006603},
483 	{0x0000008d, 0x00000100},
484 	{0x0000008f, 0x00001c0a},
485 	{0x00000090, 0xff000001},
486 	{0x00000094, 0x00101101},
487 	{0x00000095, 0x00000fff},
488 	{0x00000096, 0x00116fff},
489 	{0x00000097, 0x60010000},
490 	{0x00000098, 0x10010000},
491 	{0x00000099, 0x00006000},
492 	{0x0000009a, 0x00001000},
493 	{0x0000009f, 0x00946a00}
494 };
495 
496 static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
497 	{0x00000077, 0xff010100},
498 	{0x00000078, 0x00000000},
499 	{0x00000079, 0x00001434},
500 	{0x0000007a, 0xcc08ec08},
501 	{0x0000007b, 0x00040000},
502 	{0x0000007c, 0x000080c0},
503 	{0x0000007d, 0x09000000},
504 	{0x0000007e, 0x00210404},
505 	{0x00000081, 0x08a8e800},
506 	{0x00000082, 0x00030444},
507 	{0x00000083, 0x00000000},
508 	{0x00000085, 0x00000001},
509 	{0x00000086, 0x00000002},
510 	{0x00000087, 0x48490000},
511 	{0x00000088, 0x20244647},
512 	{0x00000089, 0x00000005},
513 	{0x0000008b, 0x66030000},
514 	{0x0000008c, 0x00006603},
515 	{0x0000008d, 0x00000100},
516 	{0x0000008f, 0x00001c0a},
517 	{0x00000090, 0xff000001},
518 	{0x00000094, 0x00101101},
519 	{0x00000095, 0x00000fff},
520 	{0x00000096, 0x00116fff},
521 	{0x00000097, 0x60010000},
522 	{0x00000098, 0x10010000},
523 	{0x00000099, 0x00006000},
524 	{0x0000009a, 0x00001000},
525 	{0x0000009f, 0x00936a00}
526 };
527 
528 static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
529 	{0x00000077, 0xff010100},
530 	{0x00000078, 0x00000000},
531 	{0x00000079, 0x00001434},
532 	{0x0000007a, 0xcc08ec08},
533 	{0x0000007b, 0x00040000},
534 	{0x0000007c, 0x000080c0},
535 	{0x0000007d, 0x09000000},
536 	{0x0000007e, 0x00210404},
537 	{0x00000081, 0x08a8e800},
538 	{0x00000082, 0x00030444},
539 	{0x00000083, 0x00000000},
540 	{0x00000085, 0x00000001},
541 	{0x00000086, 0x00000002},
542 	{0x00000087, 0x48490000},
543 	{0x00000088, 0x20244647},
544 	{0x00000089, 0x00000005},
545 	{0x0000008b, 0x66030000},
546 	{0x0000008c, 0x00006603},
547 	{0x0000008d, 0x00000100},
548 	{0x0000008f, 0x00001c0a},
549 	{0x00000090, 0xff000001},
550 	{0x00000094, 0x00101101},
551 	{0x00000095, 0x00000fff},
552 	{0x00000096, 0x00116fff},
553 	{0x00000097, 0x60010000},
554 	{0x00000098, 0x10010000},
555 	{0x00000099, 0x00006000},
556 	{0x0000009a, 0x00001000},
557 	{0x0000009f, 0x00916a00}
558 };
559 
560 static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
561 	{0x00000077, 0xff010100},
562 	{0x00000078, 0x00000000},
563 	{0x00000079, 0x00001434},
564 	{0x0000007a, 0xcc08ec08},
565 	{0x0000007b, 0x00040000},
566 	{0x0000007c, 0x000080c0},
567 	{0x0000007d, 0x09000000},
568 	{0x0000007e, 0x00210404},
569 	{0x00000081, 0x08a8e800},
570 	{0x00000082, 0x00030444},
571 	{0x00000083, 0x00000000},
572 	{0x00000085, 0x00000001},
573 	{0x00000086, 0x00000002},
574 	{0x00000087, 0x48490000},
575 	{0x00000088, 0x20244647},
576 	{0x00000089, 0x00000005},
577 	{0x0000008b, 0x66030000},
578 	{0x0000008c, 0x00006603},
579 	{0x0000008d, 0x00000100},
580 	{0x0000008f, 0x00001c0a},
581 	{0x00000090, 0xff000001},
582 	{0x00000094, 0x00101101},
583 	{0x00000095, 0x00000fff},
584 	{0x00000096, 0x00116fff},
585 	{0x00000097, 0x60010000},
586 	{0x00000098, 0x10010000},
587 	{0x00000099, 0x00006000},
588 	{0x0000009a, 0x00001000},
589 	{0x0000009f, 0x00976b00}
590 };
591 
592 int ni_mc_load_microcode(struct radeon_device *rdev)
593 {
594 	const __be32 *fw_data;
595 	u32 mem_type, running, blackout = 0;
596 	u32 *io_mc_regs;
597 	int i, ucode_size, regs_size;
598 
599 	if (!rdev->mc_fw)
600 		return -EINVAL;
601 
602 	switch (rdev->family) {
603 	case CHIP_BARTS:
604 		io_mc_regs = (u32 *)&barts_io_mc_regs;
605 		ucode_size = BTC_MC_UCODE_SIZE;
606 		regs_size = BTC_IO_MC_REGS_SIZE;
607 		break;
608 	case CHIP_TURKS:
609 		io_mc_regs = (u32 *)&turks_io_mc_regs;
610 		ucode_size = BTC_MC_UCODE_SIZE;
611 		regs_size = BTC_IO_MC_REGS_SIZE;
612 		break;
613 	case CHIP_CAICOS:
614 	default:
615 		io_mc_regs = (u32 *)&caicos_io_mc_regs;
616 		ucode_size = BTC_MC_UCODE_SIZE;
617 		regs_size = BTC_IO_MC_REGS_SIZE;
618 		break;
619 	case CHIP_CAYMAN:
620 		io_mc_regs = (u32 *)&cayman_io_mc_regs;
621 		ucode_size = CAYMAN_MC_UCODE_SIZE;
622 		regs_size = BTC_IO_MC_REGS_SIZE;
623 		break;
624 	}
625 
626 	mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
627 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
628 
629 	if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
630 		if (running) {
631 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
632 			WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
633 		}
634 
635 		/* reset the engine and set to writable */
636 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
637 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
638 
639 		/* load mc io regs */
640 		for (i = 0; i < regs_size; i++) {
641 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
642 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
643 		}
644 		/* load the MC ucode */
645 		fw_data = (const __be32 *)rdev->mc_fw->data;
646 		for (i = 0; i < ucode_size; i++)
647 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
648 
649 		/* put the engine back into the active state */
650 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
651 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
652 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
653 
654 		/* wait for training to complete */
655 		for (i = 0; i < rdev->usec_timeout; i++) {
656 			if (RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD)
657 				break;
658 			udelay(1);
659 		}
660 
661 		if (running)
662 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
663 	}
664 
665 	return 0;
666 }
667 
668 int ni_init_microcode(struct radeon_device *rdev)
669 {
670 	const char *chip_name;
671 	const char *rlc_chip_name;
672 	size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
673 	size_t smc_req_size = 0;
674 	char fw_name[30];
675 	int err;
676 
677 	DRM_DEBUG("\n");
678 
679 	switch (rdev->family) {
680 	case CHIP_BARTS:
681 		chip_name = "BARTS";
682 		rlc_chip_name = "BTC";
683 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
684 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
685 		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
686 		mc_req_size = BTC_MC_UCODE_SIZE * 4;
687 		smc_req_size = ALIGN(BARTS_SMC_UCODE_SIZE, 4);
688 		break;
689 	case CHIP_TURKS:
690 		chip_name = "TURKS";
691 		rlc_chip_name = "BTC";
692 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
693 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
694 		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
695 		mc_req_size = BTC_MC_UCODE_SIZE * 4;
696 		smc_req_size = ALIGN(TURKS_SMC_UCODE_SIZE, 4);
697 		break;
698 	case CHIP_CAICOS:
699 		chip_name = "CAICOS";
700 		rlc_chip_name = "BTC";
701 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
702 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
703 		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
704 		mc_req_size = BTC_MC_UCODE_SIZE * 4;
705 		smc_req_size = ALIGN(CAICOS_SMC_UCODE_SIZE, 4);
706 		break;
707 	case CHIP_CAYMAN:
708 		chip_name = "CAYMAN";
709 		rlc_chip_name = "CAYMAN";
710 		pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
711 		me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
712 		rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
713 		mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
714 		smc_req_size = ALIGN(CAYMAN_SMC_UCODE_SIZE, 4);
715 		break;
716 	case CHIP_ARUBA:
717 		chip_name = "ARUBA";
718 		rlc_chip_name = "ARUBA";
719 		/* pfp/me same size as CAYMAN */
720 		pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
721 		me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
722 		rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4;
723 		mc_req_size = 0;
724 		break;
725 	default: BUG();
726 	}
727 
728 	DRM_INFO("Loading %s Microcode\n", chip_name);
729 
730 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
731 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
732 	if (err)
733 		goto out;
734 	if (rdev->pfp_fw->datasize != pfp_req_size) {
735 		printk(KERN_ERR
736 		       "ni_pfp: Bogus length %zu in firmware \"%s\"\n",
737 		       rdev->pfp_fw->datasize, fw_name);
738 		err = -EINVAL;
739 		goto out;
740 	}
741 
742 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
743 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
744 	if (err)
745 		goto out;
746 	if (rdev->me_fw->datasize != me_req_size) {
747 		printk(KERN_ERR
748 		       "ni_me: Bogus length %zu in firmware \"%s\"\n",
749 		       rdev->me_fw->datasize, fw_name);
750 		err = -EINVAL;
751 	}
752 
753 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc",
754 		  rlc_chip_name);
755 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
756 	if (err)
757 		goto out;
758 	if (rdev->rlc_fw->datasize != rlc_req_size) {
759 		printk(KERN_ERR
760 		       "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
761 		       rdev->rlc_fw->datasize, fw_name);
762 		err = -EINVAL;
763 	}
764 
765 	/* no MC ucode on TN */
766 	if (!(rdev->flags & RADEON_IS_IGP)) {
767 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
768 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
769 		if (err)
770 			goto out;
771 		if (rdev->mc_fw->datasize != mc_req_size) {
772 			printk(KERN_ERR
773 			       "ni_mc: Bogus length %zu in firmware \"%s\"\n",
774 			       rdev->mc_fw->datasize, fw_name);
775 			err = -EINVAL;
776 		}
777 	}
778 
779 	if ((rdev->family >= CHIP_BARTS) && (rdev->family <= CHIP_CAYMAN)) {
780 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
781 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
782 		if (err) {
783 			printk(KERN_ERR
784 			       "smc: error loading firmware \"%s\"\n",
785 			       fw_name);
786 			release_firmware(rdev->smc_fw);
787 			rdev->smc_fw = NULL;
788 			err = 0;
789 		} else if (rdev->smc_fw->datasize != smc_req_size) {
790 			printk(KERN_ERR
791 			       "ni_smc: Bogus length %zu in firmware \"%s\"\n",
792 			       rdev->smc_fw->datasize, fw_name);
793 			err = -EINVAL;
794 		}
795 	}
796 
797 out:
798 	if (err) {
799 		if (err != -EINVAL)
800 			printk(KERN_ERR
801 			       "ni_cp: Failed to load firmware \"%s\"\n",
802 			       fw_name);
803 		release_firmware(rdev->pfp_fw);
804 		rdev->pfp_fw = NULL;
805 		release_firmware(rdev->me_fw);
806 		rdev->me_fw = NULL;
807 		release_firmware(rdev->rlc_fw);
808 		rdev->rlc_fw = NULL;
809 		release_firmware(rdev->mc_fw);
810 		rdev->mc_fw = NULL;
811 		release_firmware(rdev->smc_fw);
812 		rdev->smc_fw = NULL;
813 	}
814 	return err;
815 }
816 
817 /**
818  * ni_fini_microcode - drop the firmwares image references
819  *
820  * @rdev: radeon_device pointer
821  *
822  * Drop the pfp, me, mc and rlc firmwares image references.
823  * Called at driver shutdown.
824  */
825 void ni_fini_microcode(struct radeon_device *rdev)
826 {
827 	release_firmware(rdev->pfp_fw);
828 	rdev->pfp_fw = NULL;
829 	release_firmware(rdev->me_fw);
830 	rdev->me_fw = NULL;
831 	release_firmware(rdev->rlc_fw);
832 	rdev->rlc_fw = NULL;
833 	release_firmware(rdev->mc_fw);
834 	rdev->mc_fw = NULL;
835 	release_firmware(rdev->smc_fw);
836 	rdev->smc_fw = NULL;
837 }
838 
839 int tn_get_temp(struct radeon_device *rdev)
840 {
841 	u32 temp = RREG32_SMC(TN_CURRENT_GNB_TEMP) & 0x7ff;
842 	int actual_temp = (temp / 8) - 49;
843 
844 	return actual_temp * 1000;
845 }
846 
847 /*
848  * Core functions
849  */
850 static void cayman_gpu_init(struct radeon_device *rdev)
851 {
852 	u32 gb_addr_config = 0;
853 	u32 mc_shared_chmap, mc_arb_ramcfg;
854 	u32 cgts_tcc_disable;
855 	u32 sx_debug_1;
856 	u32 smx_dc_ctl0;
857 	u32 cgts_sm_ctrl_reg;
858 	u32 hdp_host_path_cntl;
859 	u32 tmp;
860 	u32 disabled_rb_mask;
861 	int i, j;
862 
863 	switch (rdev->family) {
864 	case CHIP_CAYMAN:
865 		rdev->config.cayman.max_shader_engines = 2;
866 		rdev->config.cayman.max_pipes_per_simd = 4;
867 		rdev->config.cayman.max_tile_pipes = 8;
868 		rdev->config.cayman.max_simds_per_se = 12;
869 		rdev->config.cayman.max_backends_per_se = 4;
870 		rdev->config.cayman.max_texture_channel_caches = 8;
871 		rdev->config.cayman.max_gprs = 256;
872 		rdev->config.cayman.max_threads = 256;
873 		rdev->config.cayman.max_gs_threads = 32;
874 		rdev->config.cayman.max_stack_entries = 512;
875 		rdev->config.cayman.sx_num_of_sets = 8;
876 		rdev->config.cayman.sx_max_export_size = 256;
877 		rdev->config.cayman.sx_max_export_pos_size = 64;
878 		rdev->config.cayman.sx_max_export_smx_size = 192;
879 		rdev->config.cayman.max_hw_contexts = 8;
880 		rdev->config.cayman.sq_num_cf_insts = 2;
881 
882 		rdev->config.cayman.sc_prim_fifo_size = 0x100;
883 		rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
884 		rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
885 		gb_addr_config = CAYMAN_GB_ADDR_CONFIG_GOLDEN;
886 		break;
887 	case CHIP_ARUBA:
888 	default:
889 		rdev->config.cayman.max_shader_engines = 1;
890 		rdev->config.cayman.max_pipes_per_simd = 4;
891 		rdev->config.cayman.max_tile_pipes = 2;
892 		if ((rdev->ddev->pci_device == 0x9900) ||
893 		    (rdev->ddev->pci_device == 0x9901) ||
894 		    (rdev->ddev->pci_device == 0x9905) ||
895 		    (rdev->ddev->pci_device == 0x9906) ||
896 		    (rdev->ddev->pci_device == 0x9907) ||
897 		    (rdev->ddev->pci_device == 0x9908) ||
898 		    (rdev->ddev->pci_device == 0x9909) ||
899 		    (rdev->ddev->pci_device == 0x990B) ||
900 		    (rdev->ddev->pci_device == 0x990C) ||
901 		    (rdev->ddev->pci_device == 0x990F) ||
902 		    (rdev->ddev->pci_device == 0x9910) ||
903 		    (rdev->ddev->pci_device == 0x9917) ||
904 		    (rdev->ddev->pci_device == 0x9999) ||
905 		    (rdev->ddev->pci_device == 0x999C)) {
906 			rdev->config.cayman.max_simds_per_se = 6;
907 			rdev->config.cayman.max_backends_per_se = 2;
908 		} else if ((rdev->ddev->pci_device == 0x9903) ||
909 			   (rdev->ddev->pci_device == 0x9904) ||
910 			   (rdev->ddev->pci_device == 0x990A) ||
911 			   (rdev->ddev->pci_device == 0x990D) ||
912 			   (rdev->ddev->pci_device == 0x990E) ||
913 			   (rdev->ddev->pci_device == 0x9913) ||
914 			   (rdev->ddev->pci_device == 0x9918) ||
915 			   (rdev->ddev->pci_device == 0x999D)) {
916 			rdev->config.cayman.max_simds_per_se = 4;
917 			rdev->config.cayman.max_backends_per_se = 2;
918 		} else if ((rdev->ddev->pci_device == 0x9919) ||
919 			   (rdev->ddev->pci_device == 0x9990) ||
920 			   (rdev->ddev->pci_device == 0x9991) ||
921 			   (rdev->ddev->pci_device == 0x9994) ||
922 			   (rdev->ddev->pci_device == 0x9995) ||
923 			   (rdev->ddev->pci_device == 0x9996) ||
924 			   (rdev->ddev->pci_device == 0x999A) ||
925 			   (rdev->ddev->pci_device == 0x99A0)) {
926 			rdev->config.cayman.max_simds_per_se = 3;
927 			rdev->config.cayman.max_backends_per_se = 1;
928 		} else {
929 			rdev->config.cayman.max_simds_per_se = 2;
930 			rdev->config.cayman.max_backends_per_se = 1;
931 		}
932 		rdev->config.cayman.max_texture_channel_caches = 2;
933 		rdev->config.cayman.max_gprs = 256;
934 		rdev->config.cayman.max_threads = 256;
935 		rdev->config.cayman.max_gs_threads = 32;
936 		rdev->config.cayman.max_stack_entries = 512;
937 		rdev->config.cayman.sx_num_of_sets = 8;
938 		rdev->config.cayman.sx_max_export_size = 256;
939 		rdev->config.cayman.sx_max_export_pos_size = 64;
940 		rdev->config.cayman.sx_max_export_smx_size = 192;
941 		rdev->config.cayman.max_hw_contexts = 8;
942 		rdev->config.cayman.sq_num_cf_insts = 2;
943 
944 		rdev->config.cayman.sc_prim_fifo_size = 0x40;
945 		rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
946 		rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
947 		gb_addr_config = ARUBA_GB_ADDR_CONFIG_GOLDEN;
948 		break;
949 	}
950 
951 	/* Initialize HDP */
952 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
953 		WREG32((0x2c14 + j), 0x00000000);
954 		WREG32((0x2c18 + j), 0x00000000);
955 		WREG32((0x2c1c + j), 0x00000000);
956 		WREG32((0x2c20 + j), 0x00000000);
957 		WREG32((0x2c24 + j), 0x00000000);
958 	}
959 
960 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
961 
962 	evergreen_fix_pci_max_read_req_size(rdev);
963 
964 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
965 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
966 
967 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
968 	rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
969 	if (rdev->config.cayman.mem_row_size_in_kb > 4)
970 		rdev->config.cayman.mem_row_size_in_kb = 4;
971 	/* XXX use MC settings? */
972 	rdev->config.cayman.shader_engine_tile_size = 32;
973 	rdev->config.cayman.num_gpus = 1;
974 	rdev->config.cayman.multi_gpu_tile_size = 64;
975 
976 	tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
977 	rdev->config.cayman.num_tile_pipes = (1 << tmp);
978 	tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
979 	rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
980 	tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
981 	rdev->config.cayman.num_shader_engines = tmp + 1;
982 	tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
983 	rdev->config.cayman.num_gpus = tmp + 1;
984 	tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
985 	rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
986 	tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
987 	rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
988 
989 
990 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
991 	 * not have bank info, so create a custom tiling dword.
992 	 * bits 3:0   num_pipes
993 	 * bits 7:4   num_banks
994 	 * bits 11:8  group_size
995 	 * bits 15:12 row_size
996 	 */
997 	rdev->config.cayman.tile_config = 0;
998 	switch (rdev->config.cayman.num_tile_pipes) {
999 	case 1:
1000 	default:
1001 		rdev->config.cayman.tile_config |= (0 << 0);
1002 		break;
1003 	case 2:
1004 		rdev->config.cayman.tile_config |= (1 << 0);
1005 		break;
1006 	case 4:
1007 		rdev->config.cayman.tile_config |= (2 << 0);
1008 		break;
1009 	case 8:
1010 		rdev->config.cayman.tile_config |= (3 << 0);
1011 		break;
1012 	}
1013 
1014 	/* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
1015 	if (rdev->flags & RADEON_IS_IGP)
1016 		rdev->config.cayman.tile_config |= 1 << 4;
1017 	else {
1018 		switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1019 		case 0: /* four banks */
1020 			rdev->config.cayman.tile_config |= 0 << 4;
1021 			break;
1022 		case 1: /* eight banks */
1023 			rdev->config.cayman.tile_config |= 1 << 4;
1024 			break;
1025 		case 2: /* sixteen banks */
1026 		default:
1027 			rdev->config.cayman.tile_config |= 2 << 4;
1028 			break;
1029 		}
1030 	}
1031 	rdev->config.cayman.tile_config |=
1032 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1033 	rdev->config.cayman.tile_config |=
1034 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1035 
1036 	tmp = 0;
1037 	for (i = (rdev->config.cayman.max_shader_engines - 1); i >= 0; i--) {
1038 		u32 rb_disable_bitmap;
1039 
1040 		WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1041 		WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1042 		rb_disable_bitmap = (RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000) >> 16;
1043 		tmp <<= 4;
1044 		tmp |= rb_disable_bitmap;
1045 	}
1046 	/* enabled rb are just the one not disabled :) */
1047 	disabled_rb_mask = tmp;
1048 	tmp = 0;
1049 	for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1050 		tmp |= (1 << i);
1051 	/* if all the backends are disabled, fix it up here */
1052 	if ((disabled_rb_mask & tmp) == tmp) {
1053 		for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1054 			disabled_rb_mask &= ~(1 << i);
1055 	}
1056 
1057 	WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1058 	WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1059 
1060 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1061 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1062 	if (ASIC_IS_DCE6(rdev))
1063 		WREG32(DMIF_ADDR_CALC, gb_addr_config);
1064 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1065 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1066 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
1067 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1068 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1069 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1070 
1071 	if ((rdev->config.cayman.max_backends_per_se == 1) &&
1072 	    (rdev->flags & RADEON_IS_IGP)) {
1073 		if ((disabled_rb_mask & 3) == 1) {
1074 			/* RB0 disabled, RB1 enabled */
1075 			tmp = 0x11111111;
1076 		} else {
1077 			/* RB1 disabled, RB0 enabled */
1078 			tmp = 0x00000000;
1079 		}
1080 	} else {
1081 		tmp = gb_addr_config & NUM_PIPES_MASK;
1082 		tmp = r6xx_remap_render_backend(rdev, tmp,
1083 						rdev->config.cayman.max_backends_per_se *
1084 						rdev->config.cayman.max_shader_engines,
1085 						CAYMAN_MAX_BACKENDS, disabled_rb_mask);
1086 	}
1087 	WREG32(GB_BACKEND_MAP, tmp);
1088 
1089 	cgts_tcc_disable = 0xffff0000;
1090 	for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++)
1091 		cgts_tcc_disable &= ~(1 << (16 + i));
1092 	WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1093 	WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
1094 	WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
1095 	WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
1096 
1097 	/* reprogram the shader complex */
1098 	cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
1099 	for (i = 0; i < 16; i++)
1100 		WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
1101 	WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
1102 
1103 	/* set HW defaults for 3D engine */
1104 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1105 
1106 	sx_debug_1 = RREG32(SX_DEBUG_1);
1107 	sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
1108 	WREG32(SX_DEBUG_1, sx_debug_1);
1109 
1110 	smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
1111 	smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
1112 	smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
1113 	WREG32(SMX_DC_CTL0, smx_dc_ctl0);
1114 
1115 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
1116 
1117 	/* need to be explicitly zero-ed */
1118 	WREG32(VGT_OFFCHIP_LDS_BASE, 0);
1119 	WREG32(SQ_LSTMP_RING_BASE, 0);
1120 	WREG32(SQ_HSTMP_RING_BASE, 0);
1121 	WREG32(SQ_ESTMP_RING_BASE, 0);
1122 	WREG32(SQ_GSTMP_RING_BASE, 0);
1123 	WREG32(SQ_VSTMP_RING_BASE, 0);
1124 	WREG32(SQ_PSTMP_RING_BASE, 0);
1125 
1126 	WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
1127 
1128 	WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
1129 					POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
1130 					SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
1131 
1132 	WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
1133 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
1134 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
1135 
1136 
1137 	WREG32(VGT_NUM_INSTANCES, 1);
1138 
1139 	WREG32(CP_PERFMON_CNTL, 0);
1140 
1141 	WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
1142 				  FETCH_FIFO_HIWATER(0x4) |
1143 				  DONE_FIFO_HIWATER(0xe0) |
1144 				  ALU_UPDATE_FIFO_HIWATER(0x8)));
1145 
1146 	WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
1147 	WREG32(SQ_CONFIG, (VC_ENABLE |
1148 			   EXPORT_SRC_C |
1149 			   GFX_PRIO(0) |
1150 			   CS1_PRIO(0) |
1151 			   CS2_PRIO(1)));
1152 	WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
1153 
1154 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1155 					  FORCE_EOV_MAX_REZ_CNT(255)));
1156 
1157 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1158 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
1159 
1160 	WREG32(VGT_GS_VERTEX_REUSE, 16);
1161 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1162 
1163 	WREG32(CB_PERF_CTR0_SEL_0, 0);
1164 	WREG32(CB_PERF_CTR0_SEL_1, 0);
1165 	WREG32(CB_PERF_CTR1_SEL_0, 0);
1166 	WREG32(CB_PERF_CTR1_SEL_1, 0);
1167 	WREG32(CB_PERF_CTR2_SEL_0, 0);
1168 	WREG32(CB_PERF_CTR2_SEL_1, 0);
1169 	WREG32(CB_PERF_CTR3_SEL_0, 0);
1170 	WREG32(CB_PERF_CTR3_SEL_1, 0);
1171 
1172 	tmp = RREG32(HDP_MISC_CNTL);
1173 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1174 	WREG32(HDP_MISC_CNTL, tmp);
1175 
1176 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1177 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1178 
1179 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1180 
1181 	udelay(50);
1182 
1183 	/* set clockgating golden values on TN */
1184 	if (rdev->family == CHIP_ARUBA) {
1185 		tmp = RREG32_CG(CG_CGTT_LOCAL_0);
1186 		tmp &= ~0x00380000;
1187 		WREG32_CG(CG_CGTT_LOCAL_0, tmp);
1188                 tmp = RREG32_CG(CG_CGTT_LOCAL_1);
1189 		tmp &= ~0x0e000000;
1190 		WREG32_CG(CG_CGTT_LOCAL_1, tmp);
1191 	}
1192 }
1193 
1194 /*
1195  * GART
1196  */
1197 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
1198 {
1199 	/* flush hdp cache */
1200 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1201 
1202 	/* bits 0-7 are the VM contexts0-7 */
1203 	WREG32(VM_INVALIDATE_REQUEST, 1);
1204 }
1205 
1206 static int cayman_pcie_gart_enable(struct radeon_device *rdev)
1207 {
1208 	int i, r;
1209 
1210 	if (rdev->gart.robj == NULL) {
1211 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1212 		return -EINVAL;
1213 	}
1214 	r = radeon_gart_table_vram_pin(rdev);
1215 	if (r)
1216 		return r;
1217 	radeon_gart_restore(rdev);
1218 	/* Setup TLB control */
1219 	WREG32(MC_VM_MX_L1_TLB_CNTL,
1220 	       (0xA << 7) |
1221 	       ENABLE_L1_TLB |
1222 	       ENABLE_L1_FRAGMENT_PROCESSING |
1223 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1224 	       ENABLE_ADVANCED_DRIVER_MODEL |
1225 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1226 	/* Setup L2 cache */
1227 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1228 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1229 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1230 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
1231 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
1232 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1233 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1234 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1235 	/* setup context0 */
1236 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1237 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1238 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1239 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1240 			(u32)(rdev->dummy_page.addr >> 12));
1241 	WREG32(VM_CONTEXT0_CNTL2, 0);
1242 	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1243 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
1244 
1245 	WREG32(0x15D4, 0);
1246 	WREG32(0x15D8, 0);
1247 	WREG32(0x15DC, 0);
1248 
1249 	/* empty context1-7 */
1250 	/* Assign the pt base to something valid for now; the pts used for
1251 	 * the VMs are determined by the application and setup and assigned
1252 	 * on the fly in the vm part of radeon_gart.c
1253 	 */
1254 	for (i = 1; i < 8; i++) {
1255 		WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
1256 		WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
1257 		WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1258 			rdev->gart.table_addr >> 12);
1259 	}
1260 
1261 	/* enable context1-7 */
1262 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1263 	       (u32)(rdev->dummy_page.addr >> 12));
1264 	WREG32(VM_CONTEXT1_CNTL2, 4);
1265 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
1266 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1267 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1268 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1269 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1270 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1271 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1272 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1273 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1274 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1275 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1276 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1277 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1278 
1279 	cayman_pcie_gart_tlb_flush(rdev);
1280 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1281 		 (unsigned)(rdev->mc.gtt_size >> 20),
1282 		 (unsigned long long)rdev->gart.table_addr);
1283 	rdev->gart.ready = true;
1284 	return 0;
1285 }
1286 
1287 static void cayman_pcie_gart_disable(struct radeon_device *rdev)
1288 {
1289 	/* Disable all tables */
1290 	WREG32(VM_CONTEXT0_CNTL, 0);
1291 	WREG32(VM_CONTEXT1_CNTL, 0);
1292 	/* Setup TLB control */
1293 	WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
1294 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1295 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1296 	/* Setup L2 cache */
1297 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1298 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1299 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
1300 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
1301 	WREG32(VM_L2_CNTL2, 0);
1302 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1303 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1304 	radeon_gart_table_vram_unpin(rdev);
1305 }
1306 
1307 static void cayman_pcie_gart_fini(struct radeon_device *rdev)
1308 {
1309 	cayman_pcie_gart_disable(rdev);
1310 	radeon_gart_table_vram_free(rdev);
1311 	radeon_gart_fini(rdev);
1312 }
1313 
1314 void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
1315 			      int ring, u32 cp_int_cntl)
1316 {
1317 	u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
1318 
1319 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
1320 	WREG32(CP_INT_CNTL, cp_int_cntl);
1321 }
1322 
1323 /*
1324  * CP.
1325  */
1326 void cayman_fence_ring_emit(struct radeon_device *rdev,
1327 			    struct radeon_fence *fence)
1328 {
1329 	struct radeon_ring *ring = &rdev->ring[fence->ring];
1330 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1331 
1332 	/* flush read cache over gart for this vmid */
1333 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1334 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1335 	radeon_ring_write(ring, 0);
1336 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1337 	radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1338 	radeon_ring_write(ring, 0xFFFFFFFF);
1339 	radeon_ring_write(ring, 0);
1340 	radeon_ring_write(ring, 10); /* poll interval */
1341 	/* EVENT_WRITE_EOP - flush caches, send int */
1342 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1343 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
1344 	radeon_ring_write(ring, addr & 0xffffffff);
1345 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1346 	radeon_ring_write(ring, fence->seq);
1347 	radeon_ring_write(ring, 0);
1348 }
1349 
1350 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1351 {
1352 	struct radeon_ring *ring = &rdev->ring[ib->ring];
1353 
1354 	/* set to DX10/11 mode */
1355 	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
1356 	radeon_ring_write(ring, 1);
1357 
1358 	if (ring->rptr_save_reg) {
1359 		uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
1360 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1361 		radeon_ring_write(ring, ((ring->rptr_save_reg -
1362 					  PACKET3_SET_CONFIG_REG_START) >> 2));
1363 		radeon_ring_write(ring, next_rptr);
1364 	}
1365 
1366 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1367 	radeon_ring_write(ring,
1368 #ifdef __BIG_ENDIAN
1369 			  (2 << 0) |
1370 #endif
1371 			  (ib->gpu_addr & 0xFFFFFFFC));
1372 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1373 	radeon_ring_write(ring, ib->length_dw |
1374 			  (ib->vm ? (ib->vm->id << 24) : 0));
1375 
1376 	/* flush read cache over gart for this vmid */
1377 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1378 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1379 	radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1380 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1381 	radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1382 	radeon_ring_write(ring, 0xFFFFFFFF);
1383 	radeon_ring_write(ring, 0);
1384 	radeon_ring_write(ring, 10); /* poll interval */
1385 }
1386 
1387 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1388 {
1389 	if (enable)
1390 		WREG32(CP_ME_CNTL, 0);
1391 	else {
1392 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1393 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1394 		WREG32(SCRATCH_UMSK, 0);
1395 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1396 	}
1397 }
1398 
1399 static int cayman_cp_load_microcode(struct radeon_device *rdev)
1400 {
1401 	const __be32 *fw_data;
1402 	int i;
1403 
1404 	if (!rdev->me_fw || !rdev->pfp_fw)
1405 		return -EINVAL;
1406 
1407 	cayman_cp_enable(rdev, false);
1408 
1409 	fw_data = (const __be32 *)rdev->pfp_fw->data;
1410 	WREG32(CP_PFP_UCODE_ADDR, 0);
1411 	for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1412 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1413 	WREG32(CP_PFP_UCODE_ADDR, 0);
1414 
1415 	fw_data = (const __be32 *)rdev->me_fw->data;
1416 	WREG32(CP_ME_RAM_WADDR, 0);
1417 	for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1418 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1419 
1420 	WREG32(CP_PFP_UCODE_ADDR, 0);
1421 	WREG32(CP_ME_RAM_WADDR, 0);
1422 	WREG32(CP_ME_RAM_RADDR, 0);
1423 	return 0;
1424 }
1425 
1426 static int cayman_cp_start(struct radeon_device *rdev)
1427 {
1428 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1429 	int r, i;
1430 
1431 	r = radeon_ring_lock(rdev, ring, 7);
1432 	if (r) {
1433 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1434 		return r;
1435 	}
1436 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1437 	radeon_ring_write(ring, 0x1);
1438 	radeon_ring_write(ring, 0x0);
1439 	radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1);
1440 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1441 	radeon_ring_write(ring, 0);
1442 	radeon_ring_write(ring, 0);
1443 	radeon_ring_unlock_commit(rdev, ring);
1444 
1445 	cayman_cp_enable(rdev, true);
1446 
1447 	r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
1448 	if (r) {
1449 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1450 		return r;
1451 	}
1452 
1453 	/* setup clear context state */
1454 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1455 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1456 
1457 	for (i = 0; i < cayman_default_size; i++)
1458 		radeon_ring_write(ring, cayman_default_state[i]);
1459 
1460 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1461 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1462 
1463 	/* set clear context state */
1464 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1465 	radeon_ring_write(ring, 0);
1466 
1467 	/* SQ_VTX_BASE_VTX_LOC */
1468 	radeon_ring_write(ring, 0xc0026f00);
1469 	radeon_ring_write(ring, 0x00000000);
1470 	radeon_ring_write(ring, 0x00000000);
1471 	radeon_ring_write(ring, 0x00000000);
1472 
1473 	/* Clear consts */
1474 	radeon_ring_write(ring, 0xc0036f00);
1475 	radeon_ring_write(ring, 0x00000bc4);
1476 	radeon_ring_write(ring, 0xffffffff);
1477 	radeon_ring_write(ring, 0xffffffff);
1478 	radeon_ring_write(ring, 0xffffffff);
1479 
1480 	radeon_ring_write(ring, 0xc0026900);
1481 	radeon_ring_write(ring, 0x00000316);
1482 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1483 	radeon_ring_write(ring, 0x00000010); /*  */
1484 
1485 	radeon_ring_unlock_commit(rdev, ring);
1486 
1487 	/* XXX init other rings */
1488 
1489 	return 0;
1490 }
1491 
1492 static void cayman_cp_fini(struct radeon_device *rdev)
1493 {
1494 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1495 	cayman_cp_enable(rdev, false);
1496 	radeon_ring_fini(rdev, ring);
1497 	radeon_scratch_free(rdev, ring->rptr_save_reg);
1498 }
1499 
1500 static int cayman_cp_resume(struct radeon_device *rdev)
1501 {
1502 	static const int ridx[] = {
1503 		RADEON_RING_TYPE_GFX_INDEX,
1504 		CAYMAN_RING_TYPE_CP1_INDEX,
1505 		CAYMAN_RING_TYPE_CP2_INDEX
1506 	};
1507 	static const unsigned cp_rb_cntl[] = {
1508 		CP_RB0_CNTL,
1509 		CP_RB1_CNTL,
1510 		CP_RB2_CNTL,
1511 	};
1512 	static const unsigned cp_rb_rptr_addr[] = {
1513 		CP_RB0_RPTR_ADDR,
1514 		CP_RB1_RPTR_ADDR,
1515 		CP_RB2_RPTR_ADDR
1516 	};
1517 	static const unsigned cp_rb_rptr_addr_hi[] = {
1518 		CP_RB0_RPTR_ADDR_HI,
1519 		CP_RB1_RPTR_ADDR_HI,
1520 		CP_RB2_RPTR_ADDR_HI
1521 	};
1522 	static const unsigned cp_rb_base[] = {
1523 		CP_RB0_BASE,
1524 		CP_RB1_BASE,
1525 		CP_RB2_BASE
1526 	};
1527 	struct radeon_ring *ring;
1528 	int i, r;
1529 
1530 	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1531 	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1532 				 SOFT_RESET_PA |
1533 				 SOFT_RESET_SH |
1534 				 SOFT_RESET_VGT |
1535 				 SOFT_RESET_SPI |
1536 				 SOFT_RESET_SX));
1537 	RREG32(GRBM_SOFT_RESET);
1538 	mdelay(15);
1539 	WREG32(GRBM_SOFT_RESET, 0);
1540 	RREG32(GRBM_SOFT_RESET);
1541 
1542 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
1543 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1544 
1545 	/* Set the write pointer delay */
1546 	WREG32(CP_RB_WPTR_DELAY, 0);
1547 
1548 	WREG32(CP_DEBUG, (1 << 27));
1549 
1550 	/* set the wb address whether it's enabled or not */
1551 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1552 	WREG32(SCRATCH_UMSK, 0xff);
1553 
1554 	for (i = 0; i < 3; ++i) {
1555 		uint32_t rb_cntl;
1556 		uint64_t addr;
1557 
1558 		/* Set ring buffer size */
1559 		ring = &rdev->ring[ridx[i]];
1560 		rb_cntl = order_base_2(ring->ring_size / 8);
1561 		rb_cntl |= order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8;
1562 #ifdef __BIG_ENDIAN
1563 		rb_cntl |= BUF_SWAP_32BIT;
1564 #endif
1565 		WREG32(cp_rb_cntl[i], rb_cntl);
1566 
1567 		/* set the wb address whether it's enabled or not */
1568 		addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
1569 		WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
1570 		WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
1571 	}
1572 
1573 	/* set the rb base addr, this causes an internal reset of ALL rings */
1574 	for (i = 0; i < 3; ++i) {
1575 		ring = &rdev->ring[ridx[i]];
1576 		WREG32(cp_rb_base[i], ring->gpu_addr >> 8);
1577 	}
1578 
1579 	for (i = 0; i < 3; ++i) {
1580 		/* Initialize the ring buffer's read and write pointers */
1581 		ring = &rdev->ring[ridx[i]];
1582 		WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);
1583 
1584 		ring->rptr = ring->wptr = 0;
1585 		WREG32(ring->rptr_reg, ring->rptr);
1586 		WREG32(ring->wptr_reg, ring->wptr);
1587 
1588 		mdelay(1);
1589 		WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA);
1590 	}
1591 
1592 	/* start the rings */
1593 	cayman_cp_start(rdev);
1594 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1595 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1596 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1597 	/* this only test cp0 */
1598 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1599 	if (r) {
1600 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1601 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1602 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1603 		return r;
1604 	}
1605 
1606 	return 0;
1607 }
1608 
1609 u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1610 {
1611 	u32 reset_mask = 0;
1612 	u32 tmp;
1613 
1614 	/* GRBM_STATUS */
1615 	tmp = RREG32(GRBM_STATUS);
1616 	if (tmp & (PA_BUSY | SC_BUSY |
1617 		   SH_BUSY | SX_BUSY |
1618 		   TA_BUSY | VGT_BUSY |
1619 		   DB_BUSY | CB_BUSY |
1620 		   GDS_BUSY | SPI_BUSY |
1621 		   IA_BUSY | IA_BUSY_NO_DMA))
1622 		reset_mask |= RADEON_RESET_GFX;
1623 
1624 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
1625 		   CP_BUSY | CP_COHERENCY_BUSY))
1626 		reset_mask |= RADEON_RESET_CP;
1627 
1628 	if (tmp & GRBM_EE_BUSY)
1629 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
1630 
1631 	/* DMA_STATUS_REG 0 */
1632 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1633 	if (!(tmp & DMA_IDLE))
1634 		reset_mask |= RADEON_RESET_DMA;
1635 
1636 	/* DMA_STATUS_REG 1 */
1637 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1638 	if (!(tmp & DMA_IDLE))
1639 		reset_mask |= RADEON_RESET_DMA1;
1640 
1641 	/* SRBM_STATUS2 */
1642 	tmp = RREG32(SRBM_STATUS2);
1643 	if (tmp & DMA_BUSY)
1644 		reset_mask |= RADEON_RESET_DMA;
1645 
1646 	if (tmp & DMA1_BUSY)
1647 		reset_mask |= RADEON_RESET_DMA1;
1648 
1649 	/* SRBM_STATUS */
1650 	tmp = RREG32(SRBM_STATUS);
1651 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
1652 		reset_mask |= RADEON_RESET_RLC;
1653 
1654 	if (tmp & IH_BUSY)
1655 		reset_mask |= RADEON_RESET_IH;
1656 
1657 	if (tmp & SEM_BUSY)
1658 		reset_mask |= RADEON_RESET_SEM;
1659 
1660 	if (tmp & GRBM_RQ_PENDING)
1661 		reset_mask |= RADEON_RESET_GRBM;
1662 
1663 	if (tmp & VMC_BUSY)
1664 		reset_mask |= RADEON_RESET_VMC;
1665 
1666 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
1667 		   MCC_BUSY | MCD_BUSY))
1668 		reset_mask |= RADEON_RESET_MC;
1669 
1670 	if (evergreen_is_display_hung(rdev))
1671 		reset_mask |= RADEON_RESET_DISPLAY;
1672 
1673 	/* VM_L2_STATUS */
1674 	tmp = RREG32(VM_L2_STATUS);
1675 	if (tmp & L2_BUSY)
1676 		reset_mask |= RADEON_RESET_VMC;
1677 
1678 	/* Skip MC reset as it's mostly likely not hung, just busy */
1679 	if (reset_mask & RADEON_RESET_MC) {
1680 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
1681 		reset_mask &= ~RADEON_RESET_MC;
1682 	}
1683 
1684 	return reset_mask;
1685 }
1686 
1687 static void cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
1688 {
1689 	struct evergreen_mc_save save;
1690 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
1691 	u32 tmp;
1692 
1693 	if (reset_mask == 0)
1694 		return;
1695 
1696 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
1697 
1698 	evergreen_print_gpu_status_regs(rdev);
1699 	dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_ADDR   0x%08X\n",
1700 		 RREG32(0x14F8));
1701 	dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1702 		 RREG32(0x14D8));
1703 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
1704 		 RREG32(0x14FC));
1705 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1706 		 RREG32(0x14DC));
1707 
1708 	/* Disable CP parsing/prefetching */
1709 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1710 
1711 	if (reset_mask & RADEON_RESET_DMA) {
1712 		/* dma0 */
1713 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1714 		tmp &= ~DMA_RB_ENABLE;
1715 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
1716 	}
1717 
1718 	if (reset_mask & RADEON_RESET_DMA1) {
1719 		/* dma1 */
1720 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1721 		tmp &= ~DMA_RB_ENABLE;
1722 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
1723 	}
1724 
1725 	udelay(50);
1726 
1727 	evergreen_mc_stop(rdev, &save);
1728 	if (evergreen_mc_wait_for_idle(rdev)) {
1729 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1730 	}
1731 
1732 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE)) {
1733 		grbm_soft_reset = SOFT_RESET_CB |
1734 			SOFT_RESET_DB |
1735 			SOFT_RESET_GDS |
1736 			SOFT_RESET_PA |
1737 			SOFT_RESET_SC |
1738 			SOFT_RESET_SPI |
1739 			SOFT_RESET_SH |
1740 			SOFT_RESET_SX |
1741 			SOFT_RESET_TC |
1742 			SOFT_RESET_TA |
1743 			SOFT_RESET_VGT |
1744 			SOFT_RESET_IA;
1745 	}
1746 
1747 	if (reset_mask & RADEON_RESET_CP) {
1748 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
1749 
1750 		srbm_soft_reset |= SOFT_RESET_GRBM;
1751 	}
1752 
1753 	if (reset_mask & RADEON_RESET_DMA)
1754 		srbm_soft_reset |= SOFT_RESET_DMA;
1755 
1756 	if (reset_mask & RADEON_RESET_DMA1)
1757 		srbm_soft_reset |= SOFT_RESET_DMA1;
1758 
1759 	if (reset_mask & RADEON_RESET_DISPLAY)
1760 		srbm_soft_reset |= SOFT_RESET_DC;
1761 
1762 	if (reset_mask & RADEON_RESET_RLC)
1763 		srbm_soft_reset |= SOFT_RESET_RLC;
1764 
1765 	if (reset_mask & RADEON_RESET_SEM)
1766 		srbm_soft_reset |= SOFT_RESET_SEM;
1767 
1768 	if (reset_mask & RADEON_RESET_IH)
1769 		srbm_soft_reset |= SOFT_RESET_IH;
1770 
1771 	if (reset_mask & RADEON_RESET_GRBM)
1772 		srbm_soft_reset |= SOFT_RESET_GRBM;
1773 
1774 	if (reset_mask & RADEON_RESET_VMC)
1775 		srbm_soft_reset |= SOFT_RESET_VMC;
1776 
1777 	if (!(rdev->flags & RADEON_IS_IGP)) {
1778 		if (reset_mask & RADEON_RESET_MC)
1779 			srbm_soft_reset |= SOFT_RESET_MC;
1780 	}
1781 
1782 	if (grbm_soft_reset) {
1783 		tmp = RREG32(GRBM_SOFT_RESET);
1784 		tmp |= grbm_soft_reset;
1785 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
1786 		WREG32(GRBM_SOFT_RESET, tmp);
1787 		tmp = RREG32(GRBM_SOFT_RESET);
1788 
1789 		udelay(50);
1790 
1791 		tmp &= ~grbm_soft_reset;
1792 		WREG32(GRBM_SOFT_RESET, tmp);
1793 		tmp = RREG32(GRBM_SOFT_RESET);
1794 	}
1795 
1796 	if (srbm_soft_reset) {
1797 		tmp = RREG32(SRBM_SOFT_RESET);
1798 		tmp |= srbm_soft_reset;
1799 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1800 		WREG32(SRBM_SOFT_RESET, tmp);
1801 		tmp = RREG32(SRBM_SOFT_RESET);
1802 
1803 		udelay(50);
1804 
1805 		tmp &= ~srbm_soft_reset;
1806 		WREG32(SRBM_SOFT_RESET, tmp);
1807 		tmp = RREG32(SRBM_SOFT_RESET);
1808 	}
1809 
1810 	/* Wait a little for things to settle down */
1811 	udelay(50);
1812 
1813 	evergreen_mc_resume(rdev, &save);
1814 	udelay(50);
1815 
1816 	evergreen_print_gpu_status_regs(rdev);
1817 }
1818 
1819 int cayman_asic_reset(struct radeon_device *rdev)
1820 {
1821 	u32 reset_mask;
1822 
1823 	reset_mask = cayman_gpu_check_soft_reset(rdev);
1824 
1825 	if (reset_mask)
1826 		r600_set_bios_scratch_engine_hung(rdev, true);
1827 
1828 	cayman_gpu_soft_reset(rdev, reset_mask);
1829 
1830 	reset_mask = cayman_gpu_check_soft_reset(rdev);
1831 
1832 	if (!reset_mask)
1833 		r600_set_bios_scratch_engine_hung(rdev, false);
1834 
1835 	return 0;
1836 }
1837 
1838 /**
1839  * cayman_gfx_is_lockup - Check if the GFX engine is locked up
1840  *
1841  * @rdev: radeon_device pointer
1842  * @ring: radeon_ring structure holding ring information
1843  *
1844  * Check if the GFX engine is locked up.
1845  * Returns true if the engine appears to be locked up, false if not.
1846  */
1847 bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1848 {
1849 	u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
1850 
1851 	if (!(reset_mask & (RADEON_RESET_GFX |
1852 			    RADEON_RESET_COMPUTE |
1853 			    RADEON_RESET_CP))) {
1854 		radeon_ring_lockup_update(ring);
1855 		return false;
1856 	}
1857 	/* force CP activities */
1858 	radeon_ring_force_activity(rdev, ring);
1859 	return radeon_ring_test_lockup(rdev, ring);
1860 }
1861 
1862 static int cayman_startup(struct radeon_device *rdev)
1863 {
1864 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1865 	int r;
1866 
1867 	/* enable pcie gen2 link */
1868 	evergreen_pcie_gen2_enable(rdev);
1869 	/* enable aspm */
1870 	evergreen_program_aspm(rdev);
1871 
1872 	/* scratch needs to be initialized before MC */
1873 	r = r600_vram_scratch_init(rdev);
1874 	if (r)
1875 		return r;
1876 
1877 	evergreen_mc_program(rdev);
1878 
1879 	if (rdev->flags & RADEON_IS_IGP) {
1880 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
1881 			r = ni_init_microcode(rdev);
1882 			if (r) {
1883 				DRM_ERROR("Failed to load firmware!\n");
1884 				return r;
1885 			}
1886 		}
1887 	} else {
1888 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
1889 			r = ni_init_microcode(rdev);
1890 			if (r) {
1891 				DRM_ERROR("Failed to load firmware!\n");
1892 				return r;
1893 			}
1894 		}
1895 
1896 		r = ni_mc_load_microcode(rdev);
1897 		if (r) {
1898 			DRM_ERROR("Failed to load MC firmware!\n");
1899 			return r;
1900 		}
1901 	}
1902 
1903 	r = cayman_pcie_gart_enable(rdev);
1904 	if (r)
1905 		return r;
1906 	cayman_gpu_init(rdev);
1907 
1908 	/* allocate rlc buffers */
1909 	if (rdev->flags & RADEON_IS_IGP) {
1910 		rdev->rlc.reg_list = tn_rlc_save_restore_register_list;
1911 		rdev->rlc.reg_list_size =
1912 			(u32)ARRAY_SIZE(tn_rlc_save_restore_register_list);
1913 		rdev->rlc.cs_data = cayman_cs_data;
1914 		r = sumo_rlc_init(rdev);
1915 		if (r) {
1916 			DRM_ERROR("Failed to init rlc BOs!\n");
1917 			return r;
1918 		}
1919 	}
1920 
1921 	/* allocate wb buffer */
1922 	r = radeon_wb_init(rdev);
1923 	if (r)
1924 		return r;
1925 
1926 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
1927 	if (r) {
1928 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1929 		return r;
1930 	}
1931 
1932 	r = uvd_v2_2_resume(rdev);
1933 	if (!r) {
1934 		r = radeon_fence_driver_start_ring(rdev,
1935 						   R600_RING_TYPE_UVD_INDEX);
1936 		if (r)
1937 			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
1938 	}
1939 	if (r)
1940 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
1941 
1942 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
1943 	if (r) {
1944 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1945 		return r;
1946 	}
1947 
1948 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
1949 	if (r) {
1950 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
1951 		return r;
1952 	}
1953 
1954 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
1955 	if (r) {
1956 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1957 		return r;
1958 	}
1959 
1960 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
1961 	if (r) {
1962 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1963 		return r;
1964 	}
1965 
1966 	/* Enable IRQ */
1967 	if (!rdev->irq.installed) {
1968 		r = radeon_irq_kms_init(rdev);
1969 		if (r)
1970 			return r;
1971 	}
1972 
1973 	r = r600_irq_init(rdev);
1974 	if (r) {
1975 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
1976 		radeon_irq_kms_fini(rdev);
1977 		return r;
1978 	}
1979 	evergreen_irq_set(rdev);
1980 
1981 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
1982 			     CP_RB0_RPTR, CP_RB0_WPTR,
1983 			     RADEON_CP_PACKET2);
1984 	if (r)
1985 		return r;
1986 
1987 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1988 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
1989 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
1990 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
1991 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1992 	if (r)
1993 		return r;
1994 
1995 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1996 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
1997 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
1998 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
1999 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2000 	if (r)
2001 		return r;
2002 
2003 	r = cayman_cp_load_microcode(rdev);
2004 	if (r)
2005 		return r;
2006 	r = cayman_cp_resume(rdev);
2007 	if (r)
2008 		return r;
2009 
2010 	r = cayman_dma_resume(rdev);
2011 	if (r)
2012 		return r;
2013 
2014 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2015 	if (ring->ring_size) {
2016 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
2017 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2018 				     RADEON_CP_PACKET2);
2019 		if (!r)
2020 			r = uvd_v1_0_init(rdev);
2021 		if (r)
2022 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2023 	}
2024 
2025 	r = radeon_ib_pool_init(rdev);
2026 	if (r) {
2027 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2028 		return r;
2029 	}
2030 
2031 	r = radeon_vm_manager_init(rdev);
2032 	if (r) {
2033 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
2034 		return r;
2035 	}
2036 
2037 	if (ASIC_IS_DCE6(rdev)) {
2038 		r = dce6_audio_init(rdev);
2039 		if (r)
2040 			return r;
2041 	} else {
2042 		r = r600_audio_init(rdev);
2043 		if (r)
2044 			return r;
2045 	}
2046 
2047 	return 0;
2048 }
2049 
2050 int cayman_resume(struct radeon_device *rdev)
2051 {
2052 	int r;
2053 
2054 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
2055 	 * posting will perform necessary task to bring back GPU into good
2056 	 * shape.
2057 	 */
2058 	/* post card */
2059 	atom_asic_init(rdev->mode_info.atom_context);
2060 
2061 	/* init golden registers */
2062 	ni_init_golden_registers(rdev);
2063 
2064 	rdev->accel_working = true;
2065 	r = cayman_startup(rdev);
2066 	if (r) {
2067 		DRM_ERROR("cayman startup failed on resume\n");
2068 		rdev->accel_working = false;
2069 		return r;
2070 	}
2071 	return r;
2072 }
2073 
2074 int cayman_suspend(struct radeon_device *rdev)
2075 {
2076 	if (ASIC_IS_DCE6(rdev))
2077 		dce6_audio_fini(rdev);
2078 	else
2079 		r600_audio_fini(rdev);
2080 	radeon_vm_manager_fini(rdev);
2081 	cayman_cp_enable(rdev, false);
2082 	cayman_dma_stop(rdev);
2083 	uvd_v1_0_fini(rdev);
2084 	radeon_uvd_suspend(rdev);
2085 	evergreen_irq_suspend(rdev);
2086 	radeon_wb_disable(rdev);
2087 	cayman_pcie_gart_disable(rdev);
2088 	return 0;
2089 }
2090 
2091 /* Plan is to move initialization in that function and use
2092  * helper function so that radeon_device_init pretty much
2093  * do nothing more than calling asic specific function. This
2094  * should also allow to remove a bunch of callback function
2095  * like vram_info.
2096  */
2097 int cayman_init(struct radeon_device *rdev)
2098 {
2099 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2100 	int r;
2101 
2102 	/* Read BIOS */
2103 	if (!radeon_get_bios(rdev)) {
2104 		if (ASIC_IS_AVIVO(rdev))
2105 			return -EINVAL;
2106 	}
2107 	/* Must be an ATOMBIOS */
2108 	if (!rdev->is_atom_bios) {
2109 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
2110 		return -EINVAL;
2111 	}
2112 	r = radeon_atombios_init(rdev);
2113 	if (r)
2114 		return r;
2115 
2116 	/* Post card if necessary */
2117 	if (!radeon_card_posted(rdev)) {
2118 		if (!rdev->bios) {
2119 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
2120 			return -EINVAL;
2121 		}
2122 		DRM_INFO("GPU not posted. posting now...\n");
2123 		atom_asic_init(rdev->mode_info.atom_context);
2124 	}
2125 	/* init golden registers */
2126 	ni_init_golden_registers(rdev);
2127 	/* Initialize scratch registers */
2128 	r600_scratch_init(rdev);
2129 	/* Initialize surface registers */
2130 	radeon_surface_init(rdev);
2131 	/* Initialize clocks */
2132 	radeon_get_clock_info(rdev->ddev);
2133 	/* Fence driver */
2134 	r = radeon_fence_driver_init(rdev);
2135 	if (r)
2136 		return r;
2137 	/* initialize memory controller */
2138 	r = evergreen_mc_init(rdev);
2139 	if (r)
2140 		return r;
2141 	/* Memory manager */
2142 	r = radeon_bo_init(rdev);
2143 	if (r)
2144 		return r;
2145 
2146 	ring->ring_obj = NULL;
2147 	r600_ring_init(rdev, ring, 1024 * 1024);
2148 
2149 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2150 	ring->ring_obj = NULL;
2151 	r600_ring_init(rdev, ring, 64 * 1024);
2152 
2153 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2154 	ring->ring_obj = NULL;
2155 	r600_ring_init(rdev, ring, 64 * 1024);
2156 
2157 	r = radeon_uvd_init(rdev);
2158 	if (!r) {
2159 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2160 		ring->ring_obj = NULL;
2161 		r600_ring_init(rdev, ring, 4096);
2162 	}
2163 
2164 	rdev->ih.ring_obj = NULL;
2165 	r600_ih_ring_init(rdev, 64 * 1024);
2166 
2167 	r = r600_pcie_gart_init(rdev);
2168 	if (r)
2169 		return r;
2170 
2171 	rdev->accel_working = true;
2172 	r = cayman_startup(rdev);
2173 	if (r) {
2174 		dev_err(rdev->dev, "disabling GPU acceleration\n");
2175 		cayman_cp_fini(rdev);
2176 		cayman_dma_fini(rdev);
2177 		r600_irq_fini(rdev);
2178 		if (rdev->flags & RADEON_IS_IGP)
2179 			sumo_rlc_fini(rdev);
2180 		radeon_wb_fini(rdev);
2181 		radeon_ib_pool_fini(rdev);
2182 		radeon_vm_manager_fini(rdev);
2183 		radeon_irq_kms_fini(rdev);
2184 		cayman_pcie_gart_fini(rdev);
2185 		rdev->accel_working = false;
2186 	}
2187 
2188 	/* Don't start up if the MC ucode is missing.
2189 	 * The default clocks and voltages before the MC ucode
2190 	 * is loaded are not suffient for advanced operations.
2191 	 *
2192 	 * We can skip this check for TN, because there is no MC
2193 	 * ucode.
2194 	 */
2195 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
2196 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
2197 		return -EINVAL;
2198 	}
2199 
2200 	return 0;
2201 }
2202 
2203 void cayman_fini(struct radeon_device *rdev)
2204 {
2205 	cayman_cp_fini(rdev);
2206 	cayman_dma_fini(rdev);
2207 	r600_irq_fini(rdev);
2208 	if (rdev->flags & RADEON_IS_IGP)
2209 		sumo_rlc_fini(rdev);
2210 	radeon_wb_fini(rdev);
2211 	radeon_vm_manager_fini(rdev);
2212 	radeon_ib_pool_fini(rdev);
2213 	radeon_irq_kms_fini(rdev);
2214 	uvd_v1_0_fini(rdev);
2215 	radeon_uvd_fini(rdev);
2216 	cayman_pcie_gart_fini(rdev);
2217 	r600_vram_scratch_fini(rdev);
2218 	radeon_gem_fini(rdev);
2219 	radeon_fence_driver_fini(rdev);
2220 	radeon_bo_fini(rdev);
2221 	radeon_atombios_fini(rdev);
2222 	ni_fini_microcode(rdev);
2223 	kfree(rdev->bios);
2224 	rdev->bios = NULL;
2225 }
2226 
2227 /*
2228  * vm
2229  */
2230 int cayman_vm_init(struct radeon_device *rdev)
2231 {
2232 	/* number of VMs */
2233 	rdev->vm_manager.nvm = 8;
2234 	/* base offset of vram pages */
2235 	if (rdev->flags & RADEON_IS_IGP) {
2236 		u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET);
2237 		tmp <<= 22;
2238 		rdev->vm_manager.vram_base_offset = tmp;
2239 	} else
2240 		rdev->vm_manager.vram_base_offset = 0;
2241 	return 0;
2242 }
2243 
2244 void cayman_vm_fini(struct radeon_device *rdev)
2245 {
2246 }
2247 
2248 /**
2249  * cayman_vm_decode_fault - print human readable fault info
2250  *
2251  * @rdev: radeon_device pointer
2252  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
2253  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
2254  *
2255  * Print human readable fault information (cayman/TN).
2256  */
2257 void cayman_vm_decode_fault(struct radeon_device *rdev,
2258 			    u32 status, u32 addr)
2259 {
2260 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
2261 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
2262 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
2263 	char *block;
2264 
2265 	switch (mc_id) {
2266 	case 32:
2267 	case 16:
2268 	case 96:
2269 	case 80:
2270 	case 160:
2271 	case 144:
2272 	case 224:
2273 	case 208:
2274 		block = "CB";
2275 		break;
2276 	case 33:
2277 	case 17:
2278 	case 97:
2279 	case 81:
2280 	case 161:
2281 	case 145:
2282 	case 225:
2283 	case 209:
2284 		block = "CB_FMASK";
2285 		break;
2286 	case 34:
2287 	case 18:
2288 	case 98:
2289 	case 82:
2290 	case 162:
2291 	case 146:
2292 	case 226:
2293 	case 210:
2294 		block = "CB_CMASK";
2295 		break;
2296 	case 35:
2297 	case 19:
2298 	case 99:
2299 	case 83:
2300 	case 163:
2301 	case 147:
2302 	case 227:
2303 	case 211:
2304 		block = "CB_IMMED";
2305 		break;
2306 	case 36:
2307 	case 20:
2308 	case 100:
2309 	case 84:
2310 	case 164:
2311 	case 148:
2312 	case 228:
2313 	case 212:
2314 		block = "DB";
2315 		break;
2316 	case 37:
2317 	case 21:
2318 	case 101:
2319 	case 85:
2320 	case 165:
2321 	case 149:
2322 	case 229:
2323 	case 213:
2324 		block = "DB_HTILE";
2325 		break;
2326 	case 38:
2327 	case 22:
2328 	case 102:
2329 	case 86:
2330 	case 166:
2331 	case 150:
2332 	case 230:
2333 	case 214:
2334 		block = "SX";
2335 		break;
2336 	case 39:
2337 	case 23:
2338 	case 103:
2339 	case 87:
2340 	case 167:
2341 	case 151:
2342 	case 231:
2343 	case 215:
2344 		block = "DB_STEN";
2345 		break;
2346 	case 40:
2347 	case 24:
2348 	case 104:
2349 	case 88:
2350 	case 232:
2351 	case 216:
2352 	case 168:
2353 	case 152:
2354 		block = "TC_TFETCH";
2355 		break;
2356 	case 41:
2357 	case 25:
2358 	case 105:
2359 	case 89:
2360 	case 233:
2361 	case 217:
2362 	case 169:
2363 	case 153:
2364 		block = "TC_VFETCH";
2365 		break;
2366 	case 42:
2367 	case 26:
2368 	case 106:
2369 	case 90:
2370 	case 234:
2371 	case 218:
2372 	case 170:
2373 	case 154:
2374 		block = "VC";
2375 		break;
2376 	case 112:
2377 		block = "CP";
2378 		break;
2379 	case 113:
2380 	case 114:
2381 		block = "SH";
2382 		break;
2383 	case 115:
2384 		block = "VGT";
2385 		break;
2386 	case 178:
2387 		block = "IH";
2388 		break;
2389 	case 51:
2390 		block = "RLC";
2391 		break;
2392 	case 55:
2393 		block = "DMA";
2394 		break;
2395 	case 56:
2396 		block = "HDP";
2397 		break;
2398 	default:
2399 		block = "unknown";
2400 		break;
2401 	}
2402 
2403 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
2404 	       protections, vmid, addr,
2405 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
2406 	       block, mc_id);
2407 }
2408 
2409 #define R600_ENTRY_VALID   (1 << 0)
2410 #define R600_PTE_SYSTEM    (1 << 1)
2411 #define R600_PTE_SNOOPED   (1 << 2)
2412 #define R600_PTE_READABLE  (1 << 5)
2413 #define R600_PTE_WRITEABLE (1 << 6)
2414 
2415 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
2416 {
2417 	uint32_t r600_flags = 0;
2418 	r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_ENTRY_VALID : 0;
2419 	r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
2420 	r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
2421 	if (flags & RADEON_VM_PAGE_SYSTEM) {
2422 		r600_flags |= R600_PTE_SYSTEM;
2423 		r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
2424 	}
2425 	return r600_flags;
2426 }
2427 
2428 /**
2429  * cayman_vm_set_page - update the page tables using the CP
2430  *
2431  * @rdev: radeon_device pointer
2432  * @ib: indirect buffer to fill with commands
2433  * @pe: addr of the page entry
2434  * @addr: dst addr to write into pe
2435  * @count: number of page entries to update
2436  * @incr: increase next addr by incr bytes
2437  * @flags: access flags
2438  *
2439  * Update the page tables using the CP (cayman/TN).
2440  */
2441 void cayman_vm_set_page(struct radeon_device *rdev,
2442 			struct radeon_ib *ib,
2443 			uint64_t pe,
2444 			uint64_t addr, unsigned count,
2445 			uint32_t incr, uint32_t flags)
2446 {
2447 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2448 	uint64_t value;
2449 	unsigned ndw;
2450 
2451 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2452 		while (count) {
2453 			ndw = 1 + count * 2;
2454 			if (ndw > 0x3FFF)
2455 				ndw = 0x3FFF;
2456 
2457 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
2458 			ib->ptr[ib->length_dw++] = pe;
2459 			ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2460 			for (; ndw > 1; ndw -= 2, --count, pe += 8) {
2461 				if (flags & RADEON_VM_PAGE_SYSTEM) {
2462 					value = radeon_vm_map_gart(rdev, addr);
2463 					value &= 0xFFFFFFFFFFFFF000ULL;
2464 				} else if (flags & RADEON_VM_PAGE_VALID) {
2465 					value = addr;
2466 				} else {
2467 					value = 0;
2468 				}
2469 				addr += incr;
2470 				value |= r600_flags;
2471 				ib->ptr[ib->length_dw++] = value;
2472 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
2473 			}
2474 		}
2475 	} else {
2476 		cayman_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
2477 	}
2478 }
2479 
2480 /**
2481  * cayman_vm_flush - vm flush using the CP
2482  *
2483  * @rdev: radeon_device pointer
2484  *
2485  * Update the page table base and flush the VM TLB
2486  * using the CP (cayman-si).
2487  */
2488 void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2489 {
2490 	struct radeon_ring *ring = &rdev->ring[ridx];
2491 
2492 	if (vm == NULL)
2493 		return;
2494 
2495 	radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
2496 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2497 
2498 	/* flush hdp cache */
2499 	radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
2500 	radeon_ring_write(ring, 0x1);
2501 
2502 	/* bits 0-7 are the VM contexts0-7 */
2503 	radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
2504 	radeon_ring_write(ring, 1 << vm->id);
2505 
2506 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
2507 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2508 	radeon_ring_write(ring, 0x0);
2509 }
2510