xref: /dragonfly/sys/dev/drm/radeon/ni.c (revision be09fc23)
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include <uapi_drm/radeon_drm.h>
30 #include "nid.h"
31 #include "atom.h"
32 #include "ni_reg.h"
33 #include "cayman_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_cayman.h"
36 
37 static u32 tn_rlc_save_restore_register_list[] =
38 {
39 	0x98fc,
40 	0x98f0,
41 	0x9834,
42 	0x9838,
43 	0x9870,
44 	0x9874,
45 	0x8a14,
46 	0x8b24,
47 	0x8bcc,
48 	0x8b10,
49 	0x8c30,
50 	0x8d00,
51 	0x8d04,
52 	0x8c00,
53 	0x8c04,
54 	0x8c10,
55 	0x8c14,
56 	0x8d8c,
57 	0x8cf0,
58 	0x8e38,
59 	0x9508,
60 	0x9688,
61 	0x9608,
62 	0x960c,
63 	0x9610,
64 	0x9614,
65 	0x88c4,
66 	0x8978,
67 	0x88d4,
68 	0x900c,
69 	0x9100,
70 	0x913c,
71 	0x90e8,
72 	0x9354,
73 	0xa008,
74 	0x98f8,
75 	0x9148,
76 	0x914c,
77 	0x3f94,
78 	0x98f4,
79 	0x9b7c,
80 	0x3f8c,
81 	0x8950,
82 	0x8954,
83 	0x8a18,
84 	0x8b28,
85 	0x9144,
86 	0x3f90,
87 	0x915c,
88 	0x9160,
89 	0x9178,
90 	0x917c,
91 	0x9180,
92 	0x918c,
93 	0x9190,
94 	0x9194,
95 	0x9198,
96 	0x919c,
97 	0x91a8,
98 	0x91ac,
99 	0x91b0,
100 	0x91b4,
101 	0x91b8,
102 	0x91c4,
103 	0x91c8,
104 	0x91cc,
105 	0x91d0,
106 	0x91d4,
107 	0x91e0,
108 	0x91e4,
109 	0x91ec,
110 	0x91f0,
111 	0x91f4,
112 	0x9200,
113 	0x9204,
114 	0x929c,
115 	0x8030,
116 	0x9150,
117 	0x9a60,
118 	0x920c,
119 	0x9210,
120 	0x9228,
121 	0x922c,
122 	0x9244,
123 	0x9248,
124 	0x91e8,
125 	0x9294,
126 	0x9208,
127 	0x9224,
128 	0x9240,
129 	0x9220,
130 	0x923c,
131 	0x9258,
132 	0x9744,
133 	0xa200,
134 	0xa204,
135 	0xa208,
136 	0xa20c,
137 	0x8d58,
138 	0x9030,
139 	0x9034,
140 	0x9038,
141 	0x903c,
142 	0x9040,
143 	0x9654,
144 	0x897c,
145 	0xa210,
146 	0xa214,
147 	0x9868,
148 	0xa02c,
149 	0x9664,
150 	0x9698,
151 	0x949c,
152 	0x8e10,
153 	0x8e18,
154 	0x8c50,
155 	0x8c58,
156 	0x8c60,
157 	0x8c68,
158 	0x89b4,
159 	0x9830,
160 	0x802c,
161 };
162 static u32 tn_rlc_save_restore_register_list_size = ARRAY_SIZE(tn_rlc_save_restore_register_list);
163 
164 /* Firmware Names */
165 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
166 MODULE_FIRMWARE("radeon/BARTS_me.bin");
167 MODULE_FIRMWARE("radeon/BARTS_mc.bin");
168 MODULE_FIRMWARE("radeon/BARTS_smc.bin");
169 MODULE_FIRMWARE("radeon/BTC_rlc.bin");
170 MODULE_FIRMWARE("radeon/TURKS_pfp.bin");
171 MODULE_FIRMWARE("radeon/TURKS_me.bin");
172 MODULE_FIRMWARE("radeon/TURKS_mc.bin");
173 MODULE_FIRMWARE("radeon/TURKS_smc.bin");
174 MODULE_FIRMWARE("radeon/CAICOS_pfp.bin");
175 MODULE_FIRMWARE("radeon/CAICOS_me.bin");
176 MODULE_FIRMWARE("radeon/CAICOS_mc.bin");
177 MODULE_FIRMWARE("radeon/CAICOS_smc.bin");
178 MODULE_FIRMWARE("radeon/CAYMAN_pfp.bin");
179 MODULE_FIRMWARE("radeon/CAYMAN_me.bin");
180 MODULE_FIRMWARE("radeon/CAYMAN_mc.bin");
181 MODULE_FIRMWARE("radeon/CAYMAN_rlc.bin");
182 MODULE_FIRMWARE("radeon/CAYMAN_smc.bin");
183 MODULE_FIRMWARE("radeon/ARUBA_pfp.bin");
184 MODULE_FIRMWARE("radeon/ARUBA_me.bin");
185 MODULE_FIRMWARE("radeon/ARUBA_rlc.bin");
186 
187 
188 static const u32 cayman_golden_registers2[] =
189 {
190 	0x3e5c, 0xffffffff, 0x00000000,
191 	0x3e48, 0xffffffff, 0x00000000,
192 	0x3e4c, 0xffffffff, 0x00000000,
193 	0x3e64, 0xffffffff, 0x00000000,
194 	0x3e50, 0xffffffff, 0x00000000,
195 	0x3e60, 0xffffffff, 0x00000000
196 };
197 
198 static const u32 cayman_golden_registers[] =
199 {
200 	0x5eb4, 0xffffffff, 0x00000002,
201 	0x5e78, 0x8f311ff1, 0x001000f0,
202 	0x3f90, 0xffff0000, 0xff000000,
203 	0x9148, 0xffff0000, 0xff000000,
204 	0x3f94, 0xffff0000, 0xff000000,
205 	0x914c, 0xffff0000, 0xff000000,
206 	0xc78, 0x00000080, 0x00000080,
207 	0xbd4, 0x70073777, 0x00011003,
208 	0xd02c, 0xbfffff1f, 0x08421000,
209 	0xd0b8, 0x73773777, 0x02011003,
210 	0x5bc0, 0x00200000, 0x50100000,
211 	0x98f8, 0x33773777, 0x02011003,
212 	0x98fc, 0xffffffff, 0x76541032,
213 	0x7030, 0x31000311, 0x00000011,
214 	0x2f48, 0x33773777, 0x42010001,
215 	0x6b28, 0x00000010, 0x00000012,
216 	0x7728, 0x00000010, 0x00000012,
217 	0x10328, 0x00000010, 0x00000012,
218 	0x10f28, 0x00000010, 0x00000012,
219 	0x11b28, 0x00000010, 0x00000012,
220 	0x12728, 0x00000010, 0x00000012,
221 	0x240c, 0x000007ff, 0x00000000,
222 	0x8a14, 0xf000001f, 0x00000007,
223 	0x8b24, 0x3fff3fff, 0x00ff0fff,
224 	0x8b10, 0x0000ff0f, 0x00000000,
225 	0x28a4c, 0x07ffffff, 0x06000000,
226 	0x10c, 0x00000001, 0x00010003,
227 	0xa02c, 0xffffffff, 0x0000009b,
228 	0x913c, 0x0000010f, 0x01000100,
229 	0x8c04, 0xf8ff00ff, 0x40600060,
230 	0x28350, 0x00000f01, 0x00000000,
231 	0x9508, 0x3700001f, 0x00000002,
232 	0x960c, 0xffffffff, 0x54763210,
233 	0x88c4, 0x001f3ae3, 0x00000082,
234 	0x88d0, 0xffffffff, 0x0f40df40,
235 	0x88d4, 0x0000001f, 0x00000010,
236 	0x8974, 0xffffffff, 0x00000000
237 };
238 
239 static const u32 dvst_golden_registers2[] =
240 {
241 	0x8f8, 0xffffffff, 0,
242 	0x8fc, 0x00380000, 0,
243 	0x8f8, 0xffffffff, 1,
244 	0x8fc, 0x0e000000, 0
245 };
246 
247 static const u32 dvst_golden_registers[] =
248 {
249 	0x690, 0x3fff3fff, 0x20c00033,
250 	0x918c, 0x0fff0fff, 0x00010006,
251 	0x91a8, 0x0fff0fff, 0x00010006,
252 	0x9150, 0xffffdfff, 0x6e944040,
253 	0x917c, 0x0fff0fff, 0x00030002,
254 	0x9198, 0x0fff0fff, 0x00030002,
255 	0x915c, 0x0fff0fff, 0x00010000,
256 	0x3f90, 0xffff0001, 0xff000000,
257 	0x9178, 0x0fff0fff, 0x00070000,
258 	0x9194, 0x0fff0fff, 0x00070000,
259 	0x9148, 0xffff0001, 0xff000000,
260 	0x9190, 0x0fff0fff, 0x00090008,
261 	0x91ac, 0x0fff0fff, 0x00090008,
262 	0x3f94, 0xffff0000, 0xff000000,
263 	0x914c, 0xffff0000, 0xff000000,
264 	0x929c, 0x00000fff, 0x00000001,
265 	0x55e4, 0xff607fff, 0xfc000100,
266 	0x8a18, 0xff000fff, 0x00000100,
267 	0x8b28, 0xff000fff, 0x00000100,
268 	0x9144, 0xfffc0fff, 0x00000100,
269 	0x6ed8, 0x00010101, 0x00010000,
270 	0x9830, 0xffffffff, 0x00000000,
271 	0x9834, 0xf00fffff, 0x00000400,
272 	0x9838, 0xfffffffe, 0x00000000,
273 	0xd0c0, 0xff000fff, 0x00000100,
274 	0xd02c, 0xbfffff1f, 0x08421000,
275 	0xd0b8, 0x73773777, 0x12010001,
276 	0x5bb0, 0x000000f0, 0x00000070,
277 	0x98f8, 0x73773777, 0x12010001,
278 	0x98fc, 0xffffffff, 0x00000010,
279 	0x9b7c, 0x00ff0000, 0x00fc0000,
280 	0x8030, 0x00001f0f, 0x0000100a,
281 	0x2f48, 0x73773777, 0x12010001,
282 	0x2408, 0x00030000, 0x000c007f,
283 	0x8a14, 0xf000003f, 0x00000007,
284 	0x8b24, 0x3fff3fff, 0x00ff0fff,
285 	0x8b10, 0x0000ff0f, 0x00000000,
286 	0x28a4c, 0x07ffffff, 0x06000000,
287 	0x4d8, 0x00000fff, 0x00000100,
288 	0xa008, 0xffffffff, 0x00010000,
289 	0x913c, 0xffff03ff, 0x01000100,
290 	0x8c00, 0x000000ff, 0x00000003,
291 	0x8c04, 0xf8ff00ff, 0x40600060,
292 	0x8cf0, 0x1fff1fff, 0x08e00410,
293 	0x28350, 0x00000f01, 0x00000000,
294 	0x9508, 0xf700071f, 0x00000002,
295 	0x960c, 0xffffffff, 0x54763210,
296 	0x20ef8, 0x01ff01ff, 0x00000002,
297 	0x20e98, 0xfffffbff, 0x00200000,
298 	0x2015c, 0xffffffff, 0x00000f40,
299 	0x88c4, 0x001f3ae3, 0x00000082,
300 	0x8978, 0x3fffffff, 0x04050140,
301 	0x88d4, 0x0000001f, 0x00000010,
302 	0x8974, 0xffffffff, 0x00000000
303 };
304 
305 static const u32 scrapper_golden_registers[] =
306 {
307 	0x690, 0x3fff3fff, 0x20c00033,
308 	0x918c, 0x0fff0fff, 0x00010006,
309 	0x918c, 0x0fff0fff, 0x00010006,
310 	0x91a8, 0x0fff0fff, 0x00010006,
311 	0x91a8, 0x0fff0fff, 0x00010006,
312 	0x9150, 0xffffdfff, 0x6e944040,
313 	0x9150, 0xffffdfff, 0x6e944040,
314 	0x917c, 0x0fff0fff, 0x00030002,
315 	0x917c, 0x0fff0fff, 0x00030002,
316 	0x9198, 0x0fff0fff, 0x00030002,
317 	0x9198, 0x0fff0fff, 0x00030002,
318 	0x915c, 0x0fff0fff, 0x00010000,
319 	0x915c, 0x0fff0fff, 0x00010000,
320 	0x3f90, 0xffff0001, 0xff000000,
321 	0x3f90, 0xffff0001, 0xff000000,
322 	0x9178, 0x0fff0fff, 0x00070000,
323 	0x9178, 0x0fff0fff, 0x00070000,
324 	0x9194, 0x0fff0fff, 0x00070000,
325 	0x9194, 0x0fff0fff, 0x00070000,
326 	0x9148, 0xffff0001, 0xff000000,
327 	0x9148, 0xffff0001, 0xff000000,
328 	0x9190, 0x0fff0fff, 0x00090008,
329 	0x9190, 0x0fff0fff, 0x00090008,
330 	0x91ac, 0x0fff0fff, 0x00090008,
331 	0x91ac, 0x0fff0fff, 0x00090008,
332 	0x3f94, 0xffff0000, 0xff000000,
333 	0x3f94, 0xffff0000, 0xff000000,
334 	0x914c, 0xffff0000, 0xff000000,
335 	0x914c, 0xffff0000, 0xff000000,
336 	0x929c, 0x00000fff, 0x00000001,
337 	0x929c, 0x00000fff, 0x00000001,
338 	0x55e4, 0xff607fff, 0xfc000100,
339 	0x8a18, 0xff000fff, 0x00000100,
340 	0x8a18, 0xff000fff, 0x00000100,
341 	0x8b28, 0xff000fff, 0x00000100,
342 	0x8b28, 0xff000fff, 0x00000100,
343 	0x9144, 0xfffc0fff, 0x00000100,
344 	0x9144, 0xfffc0fff, 0x00000100,
345 	0x6ed8, 0x00010101, 0x00010000,
346 	0x9830, 0xffffffff, 0x00000000,
347 	0x9830, 0xffffffff, 0x00000000,
348 	0x9834, 0xf00fffff, 0x00000400,
349 	0x9834, 0xf00fffff, 0x00000400,
350 	0x9838, 0xfffffffe, 0x00000000,
351 	0x9838, 0xfffffffe, 0x00000000,
352 	0xd0c0, 0xff000fff, 0x00000100,
353 	0xd02c, 0xbfffff1f, 0x08421000,
354 	0xd02c, 0xbfffff1f, 0x08421000,
355 	0xd0b8, 0x73773777, 0x12010001,
356 	0xd0b8, 0x73773777, 0x12010001,
357 	0x5bb0, 0x000000f0, 0x00000070,
358 	0x98f8, 0x73773777, 0x12010001,
359 	0x98f8, 0x73773777, 0x12010001,
360 	0x98fc, 0xffffffff, 0x00000010,
361 	0x98fc, 0xffffffff, 0x00000010,
362 	0x9b7c, 0x00ff0000, 0x00fc0000,
363 	0x9b7c, 0x00ff0000, 0x00fc0000,
364 	0x8030, 0x00001f0f, 0x0000100a,
365 	0x8030, 0x00001f0f, 0x0000100a,
366 	0x2f48, 0x73773777, 0x12010001,
367 	0x2f48, 0x73773777, 0x12010001,
368 	0x2408, 0x00030000, 0x000c007f,
369 	0x8a14, 0xf000003f, 0x00000007,
370 	0x8a14, 0xf000003f, 0x00000007,
371 	0x8b24, 0x3fff3fff, 0x00ff0fff,
372 	0x8b24, 0x3fff3fff, 0x00ff0fff,
373 	0x8b10, 0x0000ff0f, 0x00000000,
374 	0x8b10, 0x0000ff0f, 0x00000000,
375 	0x28a4c, 0x07ffffff, 0x06000000,
376 	0x28a4c, 0x07ffffff, 0x06000000,
377 	0x4d8, 0x00000fff, 0x00000100,
378 	0x4d8, 0x00000fff, 0x00000100,
379 	0xa008, 0xffffffff, 0x00010000,
380 	0xa008, 0xffffffff, 0x00010000,
381 	0x913c, 0xffff03ff, 0x01000100,
382 	0x913c, 0xffff03ff, 0x01000100,
383 	0x90e8, 0x001fffff, 0x010400c0,
384 	0x8c00, 0x000000ff, 0x00000003,
385 	0x8c00, 0x000000ff, 0x00000003,
386 	0x8c04, 0xf8ff00ff, 0x40600060,
387 	0x8c04, 0xf8ff00ff, 0x40600060,
388 	0x8c30, 0x0000000f, 0x00040005,
389 	0x8cf0, 0x1fff1fff, 0x08e00410,
390 	0x8cf0, 0x1fff1fff, 0x08e00410,
391 	0x900c, 0x00ffffff, 0x0017071f,
392 	0x28350, 0x00000f01, 0x00000000,
393 	0x28350, 0x00000f01, 0x00000000,
394 	0x9508, 0xf700071f, 0x00000002,
395 	0x9508, 0xf700071f, 0x00000002,
396 	0x9688, 0x00300000, 0x0017000f,
397 	0x960c, 0xffffffff, 0x54763210,
398 	0x960c, 0xffffffff, 0x54763210,
399 	0x20ef8, 0x01ff01ff, 0x00000002,
400 	0x20e98, 0xfffffbff, 0x00200000,
401 	0x2015c, 0xffffffff, 0x00000f40,
402 	0x88c4, 0x001f3ae3, 0x00000082,
403 	0x88c4, 0x001f3ae3, 0x00000082,
404 	0x8978, 0x3fffffff, 0x04050140,
405 	0x8978, 0x3fffffff, 0x04050140,
406 	0x88d4, 0x0000001f, 0x00000010,
407 	0x88d4, 0x0000001f, 0x00000010,
408 	0x8974, 0xffffffff, 0x00000000,
409 	0x8974, 0xffffffff, 0x00000000
410 };
411 
412 static void ni_init_golden_registers(struct radeon_device *rdev)
413 {
414 	switch (rdev->family) {
415 	case CHIP_CAYMAN:
416 		radeon_program_register_sequence(rdev,
417 						 cayman_golden_registers,
418 						 (const u32)ARRAY_SIZE(cayman_golden_registers));
419 		radeon_program_register_sequence(rdev,
420 						 cayman_golden_registers2,
421 						 (const u32)ARRAY_SIZE(cayman_golden_registers2));
422 		break;
423 	case CHIP_ARUBA:
424 		if ((rdev->ddev->pci_device == 0x9900) ||
425 		    (rdev->ddev->pci_device == 0x9901) ||
426 		    (rdev->ddev->pci_device == 0x9903) ||
427 		    (rdev->ddev->pci_device == 0x9904) ||
428 		    (rdev->ddev->pci_device == 0x9905) ||
429 		    (rdev->ddev->pci_device == 0x9906) ||
430 		    (rdev->ddev->pci_device == 0x9907) ||
431 		    (rdev->ddev->pci_device == 0x9908) ||
432 		    (rdev->ddev->pci_device == 0x9909) ||
433 		    (rdev->ddev->pci_device == 0x990A) ||
434 		    (rdev->ddev->pci_device == 0x990B) ||
435 		    (rdev->ddev->pci_device == 0x990C) ||
436 		    (rdev->ddev->pci_device == 0x990D) ||
437 		    (rdev->ddev->pci_device == 0x990E) ||
438 		    (rdev->ddev->pci_device == 0x990F) ||
439 		    (rdev->ddev->pci_device == 0x9910) ||
440 		    (rdev->ddev->pci_device == 0x9913) ||
441 		    (rdev->ddev->pci_device == 0x9917) ||
442 		    (rdev->ddev->pci_device == 0x9918)) {
443 			radeon_program_register_sequence(rdev,
444 							 dvst_golden_registers,
445 							 (const u32)ARRAY_SIZE(dvst_golden_registers));
446 			radeon_program_register_sequence(rdev,
447 							 dvst_golden_registers2,
448 							 (const u32)ARRAY_SIZE(dvst_golden_registers2));
449 		} else {
450 			radeon_program_register_sequence(rdev,
451 							 scrapper_golden_registers,
452 							 (const u32)ARRAY_SIZE(scrapper_golden_registers));
453 			radeon_program_register_sequence(rdev,
454 							 dvst_golden_registers2,
455 							 (const u32)ARRAY_SIZE(dvst_golden_registers2));
456 		}
457 		break;
458 	default:
459 		break;
460 	}
461 }
462 
463 #define BTC_IO_MC_REGS_SIZE 29
464 
465 static const u32 barts_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
466 	{0x00000077, 0xff010100},
467 	{0x00000078, 0x00000000},
468 	{0x00000079, 0x00001434},
469 	{0x0000007a, 0xcc08ec08},
470 	{0x0000007b, 0x00040000},
471 	{0x0000007c, 0x000080c0},
472 	{0x0000007d, 0x09000000},
473 	{0x0000007e, 0x00210404},
474 	{0x00000081, 0x08a8e800},
475 	{0x00000082, 0x00030444},
476 	{0x00000083, 0x00000000},
477 	{0x00000085, 0x00000001},
478 	{0x00000086, 0x00000002},
479 	{0x00000087, 0x48490000},
480 	{0x00000088, 0x20244647},
481 	{0x00000089, 0x00000005},
482 	{0x0000008b, 0x66030000},
483 	{0x0000008c, 0x00006603},
484 	{0x0000008d, 0x00000100},
485 	{0x0000008f, 0x00001c0a},
486 	{0x00000090, 0xff000001},
487 	{0x00000094, 0x00101101},
488 	{0x00000095, 0x00000fff},
489 	{0x00000096, 0x00116fff},
490 	{0x00000097, 0x60010000},
491 	{0x00000098, 0x10010000},
492 	{0x00000099, 0x00006000},
493 	{0x0000009a, 0x00001000},
494 	{0x0000009f, 0x00946a00}
495 };
496 
497 static const u32 turks_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
498 	{0x00000077, 0xff010100},
499 	{0x00000078, 0x00000000},
500 	{0x00000079, 0x00001434},
501 	{0x0000007a, 0xcc08ec08},
502 	{0x0000007b, 0x00040000},
503 	{0x0000007c, 0x000080c0},
504 	{0x0000007d, 0x09000000},
505 	{0x0000007e, 0x00210404},
506 	{0x00000081, 0x08a8e800},
507 	{0x00000082, 0x00030444},
508 	{0x00000083, 0x00000000},
509 	{0x00000085, 0x00000001},
510 	{0x00000086, 0x00000002},
511 	{0x00000087, 0x48490000},
512 	{0x00000088, 0x20244647},
513 	{0x00000089, 0x00000005},
514 	{0x0000008b, 0x66030000},
515 	{0x0000008c, 0x00006603},
516 	{0x0000008d, 0x00000100},
517 	{0x0000008f, 0x00001c0a},
518 	{0x00000090, 0xff000001},
519 	{0x00000094, 0x00101101},
520 	{0x00000095, 0x00000fff},
521 	{0x00000096, 0x00116fff},
522 	{0x00000097, 0x60010000},
523 	{0x00000098, 0x10010000},
524 	{0x00000099, 0x00006000},
525 	{0x0000009a, 0x00001000},
526 	{0x0000009f, 0x00936a00}
527 };
528 
529 static const u32 caicos_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
530 	{0x00000077, 0xff010100},
531 	{0x00000078, 0x00000000},
532 	{0x00000079, 0x00001434},
533 	{0x0000007a, 0xcc08ec08},
534 	{0x0000007b, 0x00040000},
535 	{0x0000007c, 0x000080c0},
536 	{0x0000007d, 0x09000000},
537 	{0x0000007e, 0x00210404},
538 	{0x00000081, 0x08a8e800},
539 	{0x00000082, 0x00030444},
540 	{0x00000083, 0x00000000},
541 	{0x00000085, 0x00000001},
542 	{0x00000086, 0x00000002},
543 	{0x00000087, 0x48490000},
544 	{0x00000088, 0x20244647},
545 	{0x00000089, 0x00000005},
546 	{0x0000008b, 0x66030000},
547 	{0x0000008c, 0x00006603},
548 	{0x0000008d, 0x00000100},
549 	{0x0000008f, 0x00001c0a},
550 	{0x00000090, 0xff000001},
551 	{0x00000094, 0x00101101},
552 	{0x00000095, 0x00000fff},
553 	{0x00000096, 0x00116fff},
554 	{0x00000097, 0x60010000},
555 	{0x00000098, 0x10010000},
556 	{0x00000099, 0x00006000},
557 	{0x0000009a, 0x00001000},
558 	{0x0000009f, 0x00916a00}
559 };
560 
561 static const u32 cayman_io_mc_regs[BTC_IO_MC_REGS_SIZE][2] = {
562 	{0x00000077, 0xff010100},
563 	{0x00000078, 0x00000000},
564 	{0x00000079, 0x00001434},
565 	{0x0000007a, 0xcc08ec08},
566 	{0x0000007b, 0x00040000},
567 	{0x0000007c, 0x000080c0},
568 	{0x0000007d, 0x09000000},
569 	{0x0000007e, 0x00210404},
570 	{0x00000081, 0x08a8e800},
571 	{0x00000082, 0x00030444},
572 	{0x00000083, 0x00000000},
573 	{0x00000085, 0x00000001},
574 	{0x00000086, 0x00000002},
575 	{0x00000087, 0x48490000},
576 	{0x00000088, 0x20244647},
577 	{0x00000089, 0x00000005},
578 	{0x0000008b, 0x66030000},
579 	{0x0000008c, 0x00006603},
580 	{0x0000008d, 0x00000100},
581 	{0x0000008f, 0x00001c0a},
582 	{0x00000090, 0xff000001},
583 	{0x00000094, 0x00101101},
584 	{0x00000095, 0x00000fff},
585 	{0x00000096, 0x00116fff},
586 	{0x00000097, 0x60010000},
587 	{0x00000098, 0x10010000},
588 	{0x00000099, 0x00006000},
589 	{0x0000009a, 0x00001000},
590 	{0x0000009f, 0x00976b00}
591 };
592 
593 int ni_mc_load_microcode(struct radeon_device *rdev)
594 {
595 	const __be32 *fw_data;
596 	u32 mem_type, running, blackout = 0;
597 	u32 *io_mc_regs;
598 	int i, ucode_size, regs_size;
599 
600 	if (!rdev->mc_fw)
601 		return -EINVAL;
602 
603 	switch (rdev->family) {
604 	case CHIP_BARTS:
605 		io_mc_regs = (u32 *)&barts_io_mc_regs;
606 		ucode_size = BTC_MC_UCODE_SIZE;
607 		regs_size = BTC_IO_MC_REGS_SIZE;
608 		break;
609 	case CHIP_TURKS:
610 		io_mc_regs = (u32 *)&turks_io_mc_regs;
611 		ucode_size = BTC_MC_UCODE_SIZE;
612 		regs_size = BTC_IO_MC_REGS_SIZE;
613 		break;
614 	case CHIP_CAICOS:
615 	default:
616 		io_mc_regs = (u32 *)&caicos_io_mc_regs;
617 		ucode_size = BTC_MC_UCODE_SIZE;
618 		regs_size = BTC_IO_MC_REGS_SIZE;
619 		break;
620 	case CHIP_CAYMAN:
621 		io_mc_regs = (u32 *)&cayman_io_mc_regs;
622 		ucode_size = CAYMAN_MC_UCODE_SIZE;
623 		regs_size = BTC_IO_MC_REGS_SIZE;
624 		break;
625 	}
626 
627 	mem_type = (RREG32(MC_SEQ_MISC0) & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT;
628 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
629 
630 	if ((mem_type == MC_SEQ_MISC0_GDDR5_VALUE) && (running == 0)) {
631 		if (running) {
632 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
633 			WREG32(MC_SHARED_BLACKOUT_CNTL, 1);
634 		}
635 
636 		/* reset the engine and set to writable */
637 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
638 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
639 
640 		/* load mc io regs */
641 		for (i = 0; i < regs_size; i++) {
642 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
643 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
644 		}
645 		/* load the MC ucode */
646 		fw_data = (const __be32 *)rdev->mc_fw->data;
647 		for (i = 0; i < ucode_size; i++)
648 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
649 
650 		/* put the engine back into the active state */
651 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
652 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
653 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
654 
655 		/* wait for training to complete */
656 		for (i = 0; i < rdev->usec_timeout; i++) {
657 			if (RREG32(MC_IO_PAD_CNTL_D0) & MEM_FALL_OUT_CMD)
658 				break;
659 			udelay(1);
660 		}
661 
662 		if (running)
663 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
664 	}
665 
666 	return 0;
667 }
668 
669 int ni_init_microcode(struct radeon_device *rdev)
670 {
671 	const char *chip_name;
672 	const char *rlc_chip_name;
673 	size_t pfp_req_size, me_req_size, rlc_req_size, mc_req_size;
674 	size_t smc_req_size = 0;
675 	char fw_name[30];
676 	int err;
677 
678 	DRM_DEBUG("\n");
679 
680 	switch (rdev->family) {
681 	case CHIP_BARTS:
682 		chip_name = "BARTS";
683 		rlc_chip_name = "BTC";
684 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
685 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
686 		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
687 		mc_req_size = BTC_MC_UCODE_SIZE * 4;
688 		smc_req_size = ALIGN(BARTS_SMC_UCODE_SIZE, 4);
689 		break;
690 	case CHIP_TURKS:
691 		chip_name = "TURKS";
692 		rlc_chip_name = "BTC";
693 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
694 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
695 		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
696 		mc_req_size = BTC_MC_UCODE_SIZE * 4;
697 		smc_req_size = ALIGN(TURKS_SMC_UCODE_SIZE, 4);
698 		break;
699 	case CHIP_CAICOS:
700 		chip_name = "CAICOS";
701 		rlc_chip_name = "BTC";
702 		pfp_req_size = EVERGREEN_PFP_UCODE_SIZE * 4;
703 		me_req_size = EVERGREEN_PM4_UCODE_SIZE * 4;
704 		rlc_req_size = EVERGREEN_RLC_UCODE_SIZE * 4;
705 		mc_req_size = BTC_MC_UCODE_SIZE * 4;
706 		smc_req_size = ALIGN(CAICOS_SMC_UCODE_SIZE, 4);
707 		break;
708 	case CHIP_CAYMAN:
709 		chip_name = "CAYMAN";
710 		rlc_chip_name = "CAYMAN";
711 		pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
712 		me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
713 		rlc_req_size = CAYMAN_RLC_UCODE_SIZE * 4;
714 		mc_req_size = CAYMAN_MC_UCODE_SIZE * 4;
715 		smc_req_size = ALIGN(CAYMAN_SMC_UCODE_SIZE, 4);
716 		break;
717 	case CHIP_ARUBA:
718 		chip_name = "ARUBA";
719 		rlc_chip_name = "ARUBA";
720 		/* pfp/me same size as CAYMAN */
721 		pfp_req_size = CAYMAN_PFP_UCODE_SIZE * 4;
722 		me_req_size = CAYMAN_PM4_UCODE_SIZE * 4;
723 		rlc_req_size = ARUBA_RLC_UCODE_SIZE * 4;
724 		mc_req_size = 0;
725 		break;
726 	default: BUG();
727 	}
728 
729 	DRM_INFO("Loading %s Microcode\n", chip_name);
730 
731 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
732 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
733 	if (err)
734 		goto out;
735 	if (rdev->pfp_fw->datasize != pfp_req_size) {
736 		printk(KERN_ERR
737 		       "ni_pfp: Bogus length %zu in firmware \"%s\"\n",
738 		       rdev->pfp_fw->datasize, fw_name);
739 		err = -EINVAL;
740 		goto out;
741 	}
742 
743 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
744 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
745 	if (err)
746 		goto out;
747 	if (rdev->me_fw->datasize != me_req_size) {
748 		printk(KERN_ERR
749 		       "ni_me: Bogus length %zu in firmware \"%s\"\n",
750 		       rdev->me_fw->datasize, fw_name);
751 		err = -EINVAL;
752 	}
753 
754 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc",
755 		  rlc_chip_name);
756 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
757 	if (err)
758 		goto out;
759 	if (rdev->rlc_fw->datasize != rlc_req_size) {
760 		printk(KERN_ERR
761 		       "ni_rlc: Bogus length %zu in firmware \"%s\"\n",
762 		       rdev->rlc_fw->datasize, fw_name);
763 		err = -EINVAL;
764 	}
765 
766 	/* no MC ucode on TN */
767 	if (!(rdev->flags & RADEON_IS_IGP)) {
768 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
769 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
770 		if (err)
771 			goto out;
772 		if (rdev->mc_fw->datasize != mc_req_size) {
773 			printk(KERN_ERR
774 			       "ni_mc: Bogus length %zu in firmware \"%s\"\n",
775 			       rdev->mc_fw->datasize, fw_name);
776 			err = -EINVAL;
777 		}
778 	}
779 
780 	if ((rdev->family >= CHIP_BARTS) && (rdev->family <= CHIP_CAYMAN)) {
781 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
782 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
783 		if (err) {
784 			printk(KERN_ERR
785 			       "smc: error loading firmware \"%s\"\n",
786 			       fw_name);
787 			release_firmware(rdev->smc_fw);
788 			rdev->smc_fw = NULL;
789 		} else if (rdev->smc_fw->datasize != smc_req_size) {
790 			printk(KERN_ERR
791 			       "ni_smc: Bogus length %zu in firmware \"%s\"\n",
792 			       rdev->smc_fw->datasize, fw_name);
793 			err = -EINVAL;
794 		}
795 	}
796 
797 out:
798 	if (err) {
799 		if (err != -EINVAL)
800 			printk(KERN_ERR
801 			       "ni_cp: Failed to load firmware \"%s\"\n",
802 			       fw_name);
803 		release_firmware(rdev->pfp_fw);
804 		rdev->pfp_fw = NULL;
805 		release_firmware(rdev->me_fw);
806 		rdev->me_fw = NULL;
807 		release_firmware(rdev->rlc_fw);
808 		rdev->rlc_fw = NULL;
809 		release_firmware(rdev->mc_fw);
810 		rdev->mc_fw = NULL;
811 		release_firmware(rdev->smc_fw);
812 		rdev->smc_fw = NULL;
813 	}
814 	return err;
815 }
816 
817 /**
818  * ni_fini_microcode - drop the firmwares image references
819  *
820  * @rdev: radeon_device pointer
821  *
822  * Drop the pfp, me, mc and rlc firmwares image references.
823  * Called at driver shutdown.
824  */
825 void ni_fini_microcode(struct radeon_device *rdev)
826 {
827 	release_firmware(rdev->pfp_fw);
828 	rdev->pfp_fw = NULL;
829 	release_firmware(rdev->me_fw);
830 	rdev->me_fw = NULL;
831 	release_firmware(rdev->rlc_fw);
832 	rdev->rlc_fw = NULL;
833 	release_firmware(rdev->mc_fw);
834 	rdev->mc_fw = NULL;
835 }
836 
837 int tn_get_temp(struct radeon_device *rdev)
838 {
839 	u32 temp = RREG32_SMC(TN_CURRENT_GNB_TEMP) & 0x7ff;
840 	int actual_temp = (temp / 8) - 49;
841 
842 	return actual_temp * 1000;
843 }
844 
845 /*
846  * Core functions
847  */
848 static void cayman_gpu_init(struct radeon_device *rdev)
849 {
850 	u32 gb_addr_config = 0;
851 	u32 mc_shared_chmap, mc_arb_ramcfg;
852 	u32 cgts_tcc_disable;
853 	u32 sx_debug_1;
854 	u32 smx_dc_ctl0;
855 	u32 cgts_sm_ctrl_reg;
856 	u32 hdp_host_path_cntl;
857 	u32 tmp;
858 	u32 disabled_rb_mask;
859 	int i, j;
860 
861 	switch (rdev->family) {
862 	case CHIP_CAYMAN:
863 		rdev->config.cayman.max_shader_engines = 2;
864 		rdev->config.cayman.max_pipes_per_simd = 4;
865 		rdev->config.cayman.max_tile_pipes = 8;
866 		rdev->config.cayman.max_simds_per_se = 12;
867 		rdev->config.cayman.max_backends_per_se = 4;
868 		rdev->config.cayman.max_texture_channel_caches = 8;
869 		rdev->config.cayman.max_gprs = 256;
870 		rdev->config.cayman.max_threads = 256;
871 		rdev->config.cayman.max_gs_threads = 32;
872 		rdev->config.cayman.max_stack_entries = 512;
873 		rdev->config.cayman.sx_num_of_sets = 8;
874 		rdev->config.cayman.sx_max_export_size = 256;
875 		rdev->config.cayman.sx_max_export_pos_size = 64;
876 		rdev->config.cayman.sx_max_export_smx_size = 192;
877 		rdev->config.cayman.max_hw_contexts = 8;
878 		rdev->config.cayman.sq_num_cf_insts = 2;
879 
880 		rdev->config.cayman.sc_prim_fifo_size = 0x100;
881 		rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
882 		rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
883 		gb_addr_config = CAYMAN_GB_ADDR_CONFIG_GOLDEN;
884 		break;
885 	case CHIP_ARUBA:
886 	default:
887 		rdev->config.cayman.max_shader_engines = 1;
888 		rdev->config.cayman.max_pipes_per_simd = 4;
889 		rdev->config.cayman.max_tile_pipes = 2;
890 		if ((rdev->ddev->pci_device == 0x9900) ||
891 		    (rdev->ddev->pci_device == 0x9901) ||
892 		    (rdev->ddev->pci_device == 0x9905) ||
893 		    (rdev->ddev->pci_device == 0x9906) ||
894 		    (rdev->ddev->pci_device == 0x9907) ||
895 		    (rdev->ddev->pci_device == 0x9908) ||
896 		    (rdev->ddev->pci_device == 0x9909) ||
897 		    (rdev->ddev->pci_device == 0x990B) ||
898 		    (rdev->ddev->pci_device == 0x990C) ||
899 		    (rdev->ddev->pci_device == 0x990F) ||
900 		    (rdev->ddev->pci_device == 0x9910) ||
901 		    (rdev->ddev->pci_device == 0x9917) ||
902 		    (rdev->ddev->pci_device == 0x9999) ||
903 		    (rdev->ddev->pci_device == 0x999C)) {
904 			rdev->config.cayman.max_simds_per_se = 6;
905 			rdev->config.cayman.max_backends_per_se = 2;
906 		} else if ((rdev->ddev->pci_device == 0x9903) ||
907 			   (rdev->ddev->pci_device == 0x9904) ||
908 			   (rdev->ddev->pci_device == 0x990A) ||
909 			   (rdev->ddev->pci_device == 0x990D) ||
910 			   (rdev->ddev->pci_device == 0x990E) ||
911 			   (rdev->ddev->pci_device == 0x9913) ||
912 			   (rdev->ddev->pci_device == 0x9918) ||
913 			   (rdev->ddev->pci_device == 0x999D)) {
914 			rdev->config.cayman.max_simds_per_se = 4;
915 			rdev->config.cayman.max_backends_per_se = 2;
916 		} else if ((rdev->ddev->pci_device == 0x9919) ||
917 			   (rdev->ddev->pci_device == 0x9990) ||
918 			   (rdev->ddev->pci_device == 0x9991) ||
919 			   (rdev->ddev->pci_device == 0x9994) ||
920 			   (rdev->ddev->pci_device == 0x9995) ||
921 			   (rdev->ddev->pci_device == 0x9996) ||
922 			   (rdev->ddev->pci_device == 0x999A) ||
923 			   (rdev->ddev->pci_device == 0x99A0)) {
924 			rdev->config.cayman.max_simds_per_se = 3;
925 			rdev->config.cayman.max_backends_per_se = 1;
926 		} else {
927 			rdev->config.cayman.max_simds_per_se = 2;
928 			rdev->config.cayman.max_backends_per_se = 1;
929 		}
930 		rdev->config.cayman.max_texture_channel_caches = 2;
931 		rdev->config.cayman.max_gprs = 256;
932 		rdev->config.cayman.max_threads = 256;
933 		rdev->config.cayman.max_gs_threads = 32;
934 		rdev->config.cayman.max_stack_entries = 512;
935 		rdev->config.cayman.sx_num_of_sets = 8;
936 		rdev->config.cayman.sx_max_export_size = 256;
937 		rdev->config.cayman.sx_max_export_pos_size = 64;
938 		rdev->config.cayman.sx_max_export_smx_size = 192;
939 		rdev->config.cayman.max_hw_contexts = 8;
940 		rdev->config.cayman.sq_num_cf_insts = 2;
941 
942 		rdev->config.cayman.sc_prim_fifo_size = 0x40;
943 		rdev->config.cayman.sc_hiz_tile_fifo_size = 0x30;
944 		rdev->config.cayman.sc_earlyz_tile_fifo_size = 0x130;
945 		gb_addr_config = ARUBA_GB_ADDR_CONFIG_GOLDEN;
946 		break;
947 	}
948 
949 	/* Initialize HDP */
950 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
951 		WREG32((0x2c14 + j), 0x00000000);
952 		WREG32((0x2c18 + j), 0x00000000);
953 		WREG32((0x2c1c + j), 0x00000000);
954 		WREG32((0x2c20 + j), 0x00000000);
955 		WREG32((0x2c24 + j), 0x00000000);
956 	}
957 
958 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
959 
960 	evergreen_fix_pci_max_read_req_size(rdev);
961 
962 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
963 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
964 
965 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
966 	rdev->config.cayman.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
967 	if (rdev->config.cayman.mem_row_size_in_kb > 4)
968 		rdev->config.cayman.mem_row_size_in_kb = 4;
969 	/* XXX use MC settings? */
970 	rdev->config.cayman.shader_engine_tile_size = 32;
971 	rdev->config.cayman.num_gpus = 1;
972 	rdev->config.cayman.multi_gpu_tile_size = 64;
973 
974 	tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT;
975 	rdev->config.cayman.num_tile_pipes = (1 << tmp);
976 	tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
977 	rdev->config.cayman.mem_max_burst_length_bytes = (tmp + 1) * 256;
978 	tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT;
979 	rdev->config.cayman.num_shader_engines = tmp + 1;
980 	tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT;
981 	rdev->config.cayman.num_gpus = tmp + 1;
982 	tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT;
983 	rdev->config.cayman.multi_gpu_tile_size = 1 << tmp;
984 	tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT;
985 	rdev->config.cayman.mem_row_size_in_kb = 1 << tmp;
986 
987 
988 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
989 	 * not have bank info, so create a custom tiling dword.
990 	 * bits 3:0   num_pipes
991 	 * bits 7:4   num_banks
992 	 * bits 11:8  group_size
993 	 * bits 15:12 row_size
994 	 */
995 	rdev->config.cayman.tile_config = 0;
996 	switch (rdev->config.cayman.num_tile_pipes) {
997 	case 1:
998 	default:
999 		rdev->config.cayman.tile_config |= (0 << 0);
1000 		break;
1001 	case 2:
1002 		rdev->config.cayman.tile_config |= (1 << 0);
1003 		break;
1004 	case 4:
1005 		rdev->config.cayman.tile_config |= (2 << 0);
1006 		break;
1007 	case 8:
1008 		rdev->config.cayman.tile_config |= (3 << 0);
1009 		break;
1010 	}
1011 
1012 	/* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
1013 	if (rdev->flags & RADEON_IS_IGP)
1014 		rdev->config.cayman.tile_config |= 1 << 4;
1015 	else {
1016 		switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
1017 		case 0: /* four banks */
1018 			rdev->config.cayman.tile_config |= 0 << 4;
1019 			break;
1020 		case 1: /* eight banks */
1021 			rdev->config.cayman.tile_config |= 1 << 4;
1022 			break;
1023 		case 2: /* sixteen banks */
1024 		default:
1025 			rdev->config.cayman.tile_config |= 2 << 4;
1026 			break;
1027 		}
1028 	}
1029 	rdev->config.cayman.tile_config |=
1030 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1031 	rdev->config.cayman.tile_config |=
1032 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1033 
1034 	tmp = 0;
1035 	for (i = (rdev->config.cayman.max_shader_engines - 1); i >= 0; i--) {
1036 		u32 rb_disable_bitmap;
1037 
1038 		WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1039 		WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_INDEX(i));
1040 		rb_disable_bitmap = (RREG32(CC_RB_BACKEND_DISABLE) & 0x00ff0000) >> 16;
1041 		tmp <<= 4;
1042 		tmp |= rb_disable_bitmap;
1043 	}
1044 	/* enabled rb are just the one not disabled :) */
1045 	disabled_rb_mask = tmp;
1046 	tmp = 0;
1047 	for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1048 		tmp |= (1 << i);
1049 	/* if all the backends are disabled, fix it up here */
1050 	if ((disabled_rb_mask & tmp) == tmp) {
1051 		for (i = 0; i < (rdev->config.cayman.max_backends_per_se * rdev->config.cayman.max_shader_engines); i++)
1052 			disabled_rb_mask &= ~(1 << i);
1053 	}
1054 
1055 	WREG32(GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1056 	WREG32(RLC_GFX_INDEX, INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES);
1057 
1058 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1059 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1060 	if (ASIC_IS_DCE6(rdev))
1061 		WREG32(DMIF_ADDR_CALC, gb_addr_config);
1062 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1063 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1064 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
1065 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1066 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1067 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1068 
1069 	if ((rdev->config.cayman.max_backends_per_se == 1) &&
1070 	    (rdev->flags & RADEON_IS_IGP)) {
1071 		if ((disabled_rb_mask & 3) == 1) {
1072 			/* RB0 disabled, RB1 enabled */
1073 			tmp = 0x11111111;
1074 		} else {
1075 			/* RB1 disabled, RB0 enabled */
1076 			tmp = 0x00000000;
1077 		}
1078 	} else {
1079 		tmp = gb_addr_config & NUM_PIPES_MASK;
1080 		tmp = r6xx_remap_render_backend(rdev, tmp,
1081 						rdev->config.cayman.max_backends_per_se *
1082 						rdev->config.cayman.max_shader_engines,
1083 						CAYMAN_MAX_BACKENDS, disabled_rb_mask);
1084 	}
1085 	WREG32(GB_BACKEND_MAP, tmp);
1086 
1087 	cgts_tcc_disable = 0xffff0000;
1088 	for (i = 0; i < rdev->config.cayman.max_texture_channel_caches; i++)
1089 		cgts_tcc_disable &= ~(1 << (16 + i));
1090 	WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable);
1091 	WREG32(CGTS_SYS_TCC_DISABLE, cgts_tcc_disable);
1092 	WREG32(CGTS_USER_SYS_TCC_DISABLE, cgts_tcc_disable);
1093 	WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable);
1094 
1095 	/* reprogram the shader complex */
1096 	cgts_sm_ctrl_reg = RREG32(CGTS_SM_CTRL_REG);
1097 	for (i = 0; i < 16; i++)
1098 		WREG32(CGTS_SM_CTRL_REG, OVERRIDE);
1099 	WREG32(CGTS_SM_CTRL_REG, cgts_sm_ctrl_reg);
1100 
1101 	/* set HW defaults for 3D engine */
1102 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1103 
1104 	sx_debug_1 = RREG32(SX_DEBUG_1);
1105 	sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS;
1106 	WREG32(SX_DEBUG_1, sx_debug_1);
1107 
1108 	smx_dc_ctl0 = RREG32(SMX_DC_CTL0);
1109 	smx_dc_ctl0 &= ~NUMBER_OF_SETS(0x1ff);
1110 	smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.cayman.sx_num_of_sets);
1111 	WREG32(SMX_DC_CTL0, smx_dc_ctl0);
1112 
1113 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4) | CRC_SIMD_ID_WADDR_DISABLE);
1114 
1115 	/* need to be explicitly zero-ed */
1116 	WREG32(VGT_OFFCHIP_LDS_BASE, 0);
1117 	WREG32(SQ_LSTMP_RING_BASE, 0);
1118 	WREG32(SQ_HSTMP_RING_BASE, 0);
1119 	WREG32(SQ_ESTMP_RING_BASE, 0);
1120 	WREG32(SQ_GSTMP_RING_BASE, 0);
1121 	WREG32(SQ_VSTMP_RING_BASE, 0);
1122 	WREG32(SQ_PSTMP_RING_BASE, 0);
1123 
1124 	WREG32(TA_CNTL_AUX, DISABLE_CUBE_ANISO);
1125 
1126 	WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.cayman.sx_max_export_size / 4) - 1) |
1127 					POSITION_BUFFER_SIZE((rdev->config.cayman.sx_max_export_pos_size / 4) - 1) |
1128 					SMX_BUFFER_SIZE((rdev->config.cayman.sx_max_export_smx_size / 4) - 1)));
1129 
1130 	WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.cayman.sc_prim_fifo_size) |
1131 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_hiz_tile_fifo_size) |
1132 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cayman.sc_earlyz_tile_fifo_size)));
1133 
1134 
1135 	WREG32(VGT_NUM_INSTANCES, 1);
1136 
1137 	WREG32(CP_PERFMON_CNTL, 0);
1138 
1139 	WREG32(SQ_MS_FIFO_SIZES, (CACHE_FIFO_SIZE(16 * rdev->config.cayman.sq_num_cf_insts) |
1140 				  FETCH_FIFO_HIWATER(0x4) |
1141 				  DONE_FIFO_HIWATER(0xe0) |
1142 				  ALU_UPDATE_FIFO_HIWATER(0x8)));
1143 
1144 	WREG32(SQ_GPR_RESOURCE_MGMT_1, NUM_CLAUSE_TEMP_GPRS(4));
1145 	WREG32(SQ_CONFIG, (VC_ENABLE |
1146 			   EXPORT_SRC_C |
1147 			   GFX_PRIO(0) |
1148 			   CS1_PRIO(0) |
1149 			   CS2_PRIO(1)));
1150 	WREG32(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, DYN_GPR_ENABLE);
1151 
1152 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
1153 					  FORCE_EOV_MAX_REZ_CNT(255)));
1154 
1155 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
1156 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
1157 
1158 	WREG32(VGT_GS_VERTEX_REUSE, 16);
1159 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
1160 
1161 	WREG32(CB_PERF_CTR0_SEL_0, 0);
1162 	WREG32(CB_PERF_CTR0_SEL_1, 0);
1163 	WREG32(CB_PERF_CTR1_SEL_0, 0);
1164 	WREG32(CB_PERF_CTR1_SEL_1, 0);
1165 	WREG32(CB_PERF_CTR2_SEL_0, 0);
1166 	WREG32(CB_PERF_CTR2_SEL_1, 0);
1167 	WREG32(CB_PERF_CTR3_SEL_0, 0);
1168 	WREG32(CB_PERF_CTR3_SEL_1, 0);
1169 
1170 	tmp = RREG32(HDP_MISC_CNTL);
1171 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
1172 	WREG32(HDP_MISC_CNTL, tmp);
1173 
1174 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
1175 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
1176 
1177 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
1178 
1179 	udelay(50);
1180 
1181 	/* set clockgating golden values on TN */
1182 	if (rdev->family == CHIP_ARUBA) {
1183 		tmp = RREG32_CG(CG_CGTT_LOCAL_0);
1184 		tmp &= ~0x00380000;
1185 		WREG32_CG(CG_CGTT_LOCAL_0, tmp);
1186                 tmp = RREG32_CG(CG_CGTT_LOCAL_1);
1187 		tmp &= ~0x0e000000;
1188 		WREG32_CG(CG_CGTT_LOCAL_1, tmp);
1189 	}
1190 }
1191 
1192 /*
1193  * GART
1194  */
1195 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
1196 {
1197 	/* flush hdp cache */
1198 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1199 
1200 	/* bits 0-7 are the VM contexts0-7 */
1201 	WREG32(VM_INVALIDATE_REQUEST, 1);
1202 }
1203 
1204 static int cayman_pcie_gart_enable(struct radeon_device *rdev)
1205 {
1206 	int i, r;
1207 
1208 	if (rdev->gart.robj == NULL) {
1209 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
1210 		return -EINVAL;
1211 	}
1212 	r = radeon_gart_table_vram_pin(rdev);
1213 	if (r)
1214 		return r;
1215 	radeon_gart_restore(rdev);
1216 	/* Setup TLB control */
1217 	WREG32(MC_VM_MX_L1_TLB_CNTL,
1218 	       (0xA << 7) |
1219 	       ENABLE_L1_TLB |
1220 	       ENABLE_L1_FRAGMENT_PROCESSING |
1221 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1222 	       ENABLE_ADVANCED_DRIVER_MODEL |
1223 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1224 	/* Setup L2 cache */
1225 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
1226 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1227 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1228 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
1229 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
1230 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
1231 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1232 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1233 	/* setup context0 */
1234 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
1235 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
1236 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
1237 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
1238 			(u32)(rdev->dummy_page.addr >> 12));
1239 	WREG32(VM_CONTEXT0_CNTL2, 0);
1240 	WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
1241 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
1242 
1243 	WREG32(0x15D4, 0);
1244 	WREG32(0x15D8, 0);
1245 	WREG32(0x15DC, 0);
1246 
1247 	/* empty context1-7 */
1248 	/* Assign the pt base to something valid for now; the pts used for
1249 	 * the VMs are determined by the application and setup and assigned
1250 	 * on the fly in the vm part of radeon_gart.c
1251 	 */
1252 	for (i = 1; i < 8; i++) {
1253 		WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
1254 		WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), rdev->vm_manager.max_pfn);
1255 		WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
1256 			rdev->gart.table_addr >> 12);
1257 	}
1258 
1259 	/* enable context1-7 */
1260 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
1261 	       (u32)(rdev->dummy_page.addr >> 12));
1262 	WREG32(VM_CONTEXT1_CNTL2, 4);
1263 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
1264 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1265 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1266 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1267 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
1268 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
1269 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
1270 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
1271 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
1272 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
1273 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
1274 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
1275 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
1276 
1277 	cayman_pcie_gart_tlb_flush(rdev);
1278 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1279 		 (unsigned)(rdev->mc.gtt_size >> 20),
1280 		 (unsigned long long)rdev->gart.table_addr);
1281 	rdev->gart.ready = true;
1282 	return 0;
1283 }
1284 
1285 static void cayman_pcie_gart_disable(struct radeon_device *rdev)
1286 {
1287 	/* Disable all tables */
1288 	WREG32(VM_CONTEXT0_CNTL, 0);
1289 	WREG32(VM_CONTEXT1_CNTL, 0);
1290 	/* Setup TLB control */
1291 	WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_FRAGMENT_PROCESSING |
1292 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
1293 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
1294 	/* Setup L2 cache */
1295 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
1296 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
1297 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
1298 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
1299 	WREG32(VM_L2_CNTL2, 0);
1300 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
1301 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
1302 	radeon_gart_table_vram_unpin(rdev);
1303 }
1304 
1305 static void cayman_pcie_gart_fini(struct radeon_device *rdev)
1306 {
1307 	cayman_pcie_gart_disable(rdev);
1308 	radeon_gart_table_vram_free(rdev);
1309 	radeon_gart_fini(rdev);
1310 }
1311 
1312 void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
1313 			      int ring, u32 cp_int_cntl)
1314 {
1315 	u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
1316 
1317 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
1318 	WREG32(CP_INT_CNTL, cp_int_cntl);
1319 }
1320 
1321 /*
1322  * CP.
1323  */
1324 void cayman_fence_ring_emit(struct radeon_device *rdev,
1325 			    struct radeon_fence *fence)
1326 {
1327 	struct radeon_ring *ring = &rdev->ring[fence->ring];
1328 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1329 
1330 	/* flush read cache over gart for this vmid */
1331 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1332 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1333 	radeon_ring_write(ring, 0);
1334 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1335 	radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1336 	radeon_ring_write(ring, 0xFFFFFFFF);
1337 	radeon_ring_write(ring, 0);
1338 	radeon_ring_write(ring, 10); /* poll interval */
1339 	/* EVENT_WRITE_EOP - flush caches, send int */
1340 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
1341 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_EVENT_TS) | EVENT_INDEX(5));
1342 	radeon_ring_write(ring, addr & 0xffffffff);
1343 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
1344 	radeon_ring_write(ring, fence->seq);
1345 	radeon_ring_write(ring, 0);
1346 }
1347 
1348 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1349 {
1350 	struct radeon_ring *ring = &rdev->ring[ib->ring];
1351 
1352 	/* set to DX10/11 mode */
1353 	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
1354 	radeon_ring_write(ring, 1);
1355 
1356 	if (ring->rptr_save_reg) {
1357 		uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
1358 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1359 		radeon_ring_write(ring, ((ring->rptr_save_reg -
1360 					  PACKET3_SET_CONFIG_REG_START) >> 2));
1361 		radeon_ring_write(ring, next_rptr);
1362 	}
1363 
1364 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1365 	radeon_ring_write(ring,
1366 #ifdef __BIG_ENDIAN
1367 			  (2 << 0) |
1368 #endif
1369 			  (ib->gpu_addr & 0xFFFFFFFC));
1370 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1371 	radeon_ring_write(ring, ib->length_dw |
1372 			  (ib->vm ? (ib->vm->id << 24) : 0));
1373 
1374 	/* flush read cache over gart for this vmid */
1375 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1376 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1377 	radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
1378 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1379 	radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1380 	radeon_ring_write(ring, 0xFFFFFFFF);
1381 	radeon_ring_write(ring, 0);
1382 	radeon_ring_write(ring, 10); /* poll interval */
1383 }
1384 
1385 void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
1386 			       struct radeon_ring *ring,
1387 			       struct radeon_semaphore *semaphore,
1388 			       bool emit_wait)
1389 {
1390 	uint64_t addr = semaphore->gpu_addr;
1391 
1392 	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
1393 	radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
1394 
1395 	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
1396 	radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
1397 
1398 	radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
1399 	radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
1400 }
1401 
1402 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1403 {
1404 	if (enable)
1405 		WREG32(CP_ME_CNTL, 0);
1406 	else {
1407 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1408 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
1409 		WREG32(SCRATCH_UMSK, 0);
1410 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1411 	}
1412 }
1413 
1414 static int cayman_cp_load_microcode(struct radeon_device *rdev)
1415 {
1416 	const __be32 *fw_data;
1417 	int i;
1418 
1419 	if (!rdev->me_fw || !rdev->pfp_fw)
1420 		return -EINVAL;
1421 
1422 	cayman_cp_enable(rdev, false);
1423 
1424 	fw_data = (const __be32 *)rdev->pfp_fw->data;
1425 	WREG32(CP_PFP_UCODE_ADDR, 0);
1426 	for (i = 0; i < CAYMAN_PFP_UCODE_SIZE; i++)
1427 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
1428 	WREG32(CP_PFP_UCODE_ADDR, 0);
1429 
1430 	fw_data = (const __be32 *)rdev->me_fw->data;
1431 	WREG32(CP_ME_RAM_WADDR, 0);
1432 	for (i = 0; i < CAYMAN_PM4_UCODE_SIZE; i++)
1433 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
1434 
1435 	WREG32(CP_PFP_UCODE_ADDR, 0);
1436 	WREG32(CP_ME_RAM_WADDR, 0);
1437 	WREG32(CP_ME_RAM_RADDR, 0);
1438 	return 0;
1439 }
1440 
1441 static int cayman_cp_start(struct radeon_device *rdev)
1442 {
1443 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1444 	int r, i;
1445 
1446 	r = radeon_ring_lock(rdev, ring, 7);
1447 	if (r) {
1448 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1449 		return r;
1450 	}
1451 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
1452 	radeon_ring_write(ring, 0x1);
1453 	radeon_ring_write(ring, 0x0);
1454 	radeon_ring_write(ring, rdev->config.cayman.max_hw_contexts - 1);
1455 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
1456 	radeon_ring_write(ring, 0);
1457 	radeon_ring_write(ring, 0);
1458 	radeon_ring_unlock_commit(rdev, ring);
1459 
1460 	cayman_cp_enable(rdev, true);
1461 
1462 	r = radeon_ring_lock(rdev, ring, cayman_default_size + 19);
1463 	if (r) {
1464 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
1465 		return r;
1466 	}
1467 
1468 	/* setup clear context state */
1469 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1470 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1471 
1472 	for (i = 0; i < cayman_default_size; i++)
1473 		radeon_ring_write(ring, cayman_default_state[i]);
1474 
1475 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1476 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1477 
1478 	/* set clear context state */
1479 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1480 	radeon_ring_write(ring, 0);
1481 
1482 	/* SQ_VTX_BASE_VTX_LOC */
1483 	radeon_ring_write(ring, 0xc0026f00);
1484 	radeon_ring_write(ring, 0x00000000);
1485 	radeon_ring_write(ring, 0x00000000);
1486 	radeon_ring_write(ring, 0x00000000);
1487 
1488 	/* Clear consts */
1489 	radeon_ring_write(ring, 0xc0036f00);
1490 	radeon_ring_write(ring, 0x00000bc4);
1491 	radeon_ring_write(ring, 0xffffffff);
1492 	radeon_ring_write(ring, 0xffffffff);
1493 	radeon_ring_write(ring, 0xffffffff);
1494 
1495 	radeon_ring_write(ring, 0xc0026900);
1496 	radeon_ring_write(ring, 0x00000316);
1497 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
1498 	radeon_ring_write(ring, 0x00000010); /*  */
1499 
1500 	radeon_ring_unlock_commit(rdev, ring);
1501 
1502 	/* XXX init other rings */
1503 
1504 	return 0;
1505 }
1506 
1507 static void cayman_cp_fini(struct radeon_device *rdev)
1508 {
1509 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
1510 	cayman_cp_enable(rdev, false);
1511 	radeon_ring_fini(rdev, ring);
1512 	radeon_scratch_free(rdev, ring->rptr_save_reg);
1513 }
1514 
1515 static int cayman_cp_resume(struct radeon_device *rdev)
1516 {
1517 	static const int ridx[] = {
1518 		RADEON_RING_TYPE_GFX_INDEX,
1519 		CAYMAN_RING_TYPE_CP1_INDEX,
1520 		CAYMAN_RING_TYPE_CP2_INDEX
1521 	};
1522 	static const unsigned cp_rb_cntl[] = {
1523 		CP_RB0_CNTL,
1524 		CP_RB1_CNTL,
1525 		CP_RB2_CNTL,
1526 	};
1527 	static const unsigned cp_rb_rptr_addr[] = {
1528 		CP_RB0_RPTR_ADDR,
1529 		CP_RB1_RPTR_ADDR,
1530 		CP_RB2_RPTR_ADDR
1531 	};
1532 	static const unsigned cp_rb_rptr_addr_hi[] = {
1533 		CP_RB0_RPTR_ADDR_HI,
1534 		CP_RB1_RPTR_ADDR_HI,
1535 		CP_RB2_RPTR_ADDR_HI
1536 	};
1537 	static const unsigned cp_rb_base[] = {
1538 		CP_RB0_BASE,
1539 		CP_RB1_BASE,
1540 		CP_RB2_BASE
1541 	};
1542 	struct radeon_ring *ring;
1543 	int i, r;
1544 
1545 	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
1546 	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
1547 				 SOFT_RESET_PA |
1548 				 SOFT_RESET_SH |
1549 				 SOFT_RESET_VGT |
1550 				 SOFT_RESET_SPI |
1551 				 SOFT_RESET_SX));
1552 	RREG32(GRBM_SOFT_RESET);
1553 	mdelay(15);
1554 	WREG32(GRBM_SOFT_RESET, 0);
1555 	RREG32(GRBM_SOFT_RESET);
1556 
1557 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
1558 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
1559 
1560 	/* Set the write pointer delay */
1561 	WREG32(CP_RB_WPTR_DELAY, 0);
1562 
1563 	WREG32(CP_DEBUG, (1 << 27));
1564 
1565 	/* set the wb address whether it's enabled or not */
1566 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
1567 	WREG32(SCRATCH_UMSK, 0xff);
1568 
1569 	for (i = 0; i < 3; ++i) {
1570 		uint32_t rb_cntl;
1571 		uint64_t addr;
1572 
1573 		/* Set ring buffer size */
1574 		ring = &rdev->ring[ridx[i]];
1575 		rb_cntl = drm_order(ring->ring_size / 8);
1576 		rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8;
1577 #ifdef __BIG_ENDIAN
1578 		rb_cntl |= BUF_SWAP_32BIT;
1579 #endif
1580 		WREG32(cp_rb_cntl[i], rb_cntl);
1581 
1582 		/* set the wb address whether it's enabled or not */
1583 		addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
1584 		WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
1585 		WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
1586 	}
1587 
1588 	/* set the rb base addr, this causes an internal reset of ALL rings */
1589 	for (i = 0; i < 3; ++i) {
1590 		ring = &rdev->ring[ridx[i]];
1591 		WREG32(cp_rb_base[i], ring->gpu_addr >> 8);
1592 	}
1593 
1594 	for (i = 0; i < 3; ++i) {
1595 		/* Initialize the ring buffer's read and write pointers */
1596 		ring = &rdev->ring[ridx[i]];
1597 		WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);
1598 
1599 		ring->rptr = ring->wptr = 0;
1600 		WREG32(ring->rptr_reg, ring->rptr);
1601 		WREG32(ring->wptr_reg, ring->wptr);
1602 
1603 		mdelay(1);
1604 		WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA);
1605 	}
1606 
1607 	/* start the rings */
1608 	cayman_cp_start(rdev);
1609 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
1610 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1611 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1612 	/* this only test cp0 */
1613 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1614 	if (r) {
1615 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1616 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1617 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1618 		return r;
1619 	}
1620 
1621 	return 0;
1622 }
1623 
1624 /*
1625  * DMA
1626  * Starting with R600, the GPU has an asynchronous
1627  * DMA engine.  The programming model is very similar
1628  * to the 3D engine (ring buffer, IBs, etc.), but the
1629  * DMA controller has it's own packet format that is
1630  * different form the PM4 format used by the 3D engine.
1631  * It supports copying data, writing embedded data,
1632  * solid fills, and a number of other things.  It also
1633  * has support for tiling/detiling of buffers.
1634  * Cayman and newer support two asynchronous DMA engines.
1635  */
1636 /**
1637  * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1638  *
1639  * @rdev: radeon_device pointer
1640  * @ib: IB object to schedule
1641  *
1642  * Schedule an IB in the DMA ring (cayman-SI).
1643  */
1644 void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1645 				struct radeon_ib *ib)
1646 {
1647 	struct radeon_ring *ring = &rdev->ring[ib->ring];
1648 
1649 	if (rdev->wb.enabled) {
1650 		u32 next_rptr = ring->wptr + 4;
1651 		while ((next_rptr & 7) != 5)
1652 			next_rptr++;
1653 		next_rptr += 3;
1654 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1655 		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1656 		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1657 		radeon_ring_write(ring, next_rptr);
1658 	}
1659 
1660 	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1661 	 * Pad as necessary with NOPs.
1662 	 */
1663 	while ((ring->wptr & 7) != 5)
1664 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1665 	radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1666 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1667 	radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1668 
1669 }
1670 
1671 /**
1672  * cayman_dma_stop - stop the async dma engines
1673  *
1674  * @rdev: radeon_device pointer
1675  *
1676  * Stop the async dma engines (cayman-SI).
1677  */
1678 void cayman_dma_stop(struct radeon_device *rdev)
1679 {
1680 	u32 rb_cntl;
1681 
1682 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1683 
1684 	/* dma0 */
1685 	rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1686 	rb_cntl &= ~DMA_RB_ENABLE;
1687 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1688 
1689 	/* dma1 */
1690 	rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1691 	rb_cntl &= ~DMA_RB_ENABLE;
1692 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1693 
1694 	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1695 	rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1696 }
1697 
1698 /**
1699  * cayman_dma_resume - setup and start the async dma engines
1700  *
1701  * @rdev: radeon_device pointer
1702  *
1703  * Set up the DMA ring buffers and enable them. (cayman-SI).
1704  * Returns 0 for success, error for failure.
1705  */
1706 int cayman_dma_resume(struct radeon_device *rdev)
1707 {
1708 	struct radeon_ring *ring;
1709 	u32 rb_cntl, dma_cntl, ib_cntl;
1710 	u32 rb_bufsz;
1711 	u32 reg_offset, wb_offset;
1712 	int i, r;
1713 
1714 	/* Reset dma */
1715 	WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1716 	RREG32(SRBM_SOFT_RESET);
1717 	udelay(50);
1718 	WREG32(SRBM_SOFT_RESET, 0);
1719 
1720 	for (i = 0; i < 2; i++) {
1721 		if (i == 0) {
1722 			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1723 			reg_offset = DMA0_REGISTER_OFFSET;
1724 			wb_offset = R600_WB_DMA_RPTR_OFFSET;
1725 		} else {
1726 			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1727 			reg_offset = DMA1_REGISTER_OFFSET;
1728 			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1729 		}
1730 
1731 		WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1732 		WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1733 
1734 		/* Set ring buffer size in dwords */
1735 		rb_bufsz = drm_order(ring->ring_size / 4);
1736 		rb_cntl = rb_bufsz << 1;
1737 #ifdef __BIG_ENDIAN
1738 		rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1739 #endif
1740 		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1741 
1742 		/* Initialize the ring buffer's read and write pointers */
1743 		WREG32(DMA_RB_RPTR + reg_offset, 0);
1744 		WREG32(DMA_RB_WPTR + reg_offset, 0);
1745 
1746 		/* set the wb address whether it's enabled or not */
1747 		WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1748 		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1749 		WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1750 		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1751 
1752 		if (rdev->wb.enabled)
1753 			rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1754 
1755 		WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1756 
1757 		/* enable DMA IBs */
1758 		ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
1759 #ifdef __BIG_ENDIAN
1760 		ib_cntl |= DMA_IB_SWAP_ENABLE;
1761 #endif
1762 		WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
1763 
1764 		dma_cntl = RREG32(DMA_CNTL + reg_offset);
1765 		dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1766 		WREG32(DMA_CNTL + reg_offset, dma_cntl);
1767 
1768 		ring->wptr = 0;
1769 		WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1770 
1771 		ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1772 
1773 		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1774 
1775 		ring->ready = true;
1776 
1777 		r = radeon_ring_test(rdev, ring->idx, ring);
1778 		if (r) {
1779 			ring->ready = false;
1780 			return r;
1781 		}
1782 	}
1783 
1784 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1785 
1786 	return 0;
1787 }
1788 
1789 /**
1790  * cayman_dma_fini - tear down the async dma engines
1791  *
1792  * @rdev: radeon_device pointer
1793  *
1794  * Stop the async dma engines and free the rings (cayman-SI).
1795  */
1796 void cayman_dma_fini(struct radeon_device *rdev)
1797 {
1798 	cayman_dma_stop(rdev);
1799 	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1800 	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1801 }
1802 
1803 static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
1804 {
1805 	u32 reset_mask = 0;
1806 	u32 tmp;
1807 
1808 	/* GRBM_STATUS */
1809 	tmp = RREG32(GRBM_STATUS);
1810 	if (tmp & (PA_BUSY | SC_BUSY |
1811 		   SH_BUSY | SX_BUSY |
1812 		   TA_BUSY | VGT_BUSY |
1813 		   DB_BUSY | CB_BUSY |
1814 		   GDS_BUSY | SPI_BUSY |
1815 		   IA_BUSY | IA_BUSY_NO_DMA))
1816 		reset_mask |= RADEON_RESET_GFX;
1817 
1818 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
1819 		   CP_BUSY | CP_COHERENCY_BUSY))
1820 		reset_mask |= RADEON_RESET_CP;
1821 
1822 	if (tmp & GRBM_EE_BUSY)
1823 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
1824 
1825 	/* DMA_STATUS_REG 0 */
1826 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1827 	if (!(tmp & DMA_IDLE))
1828 		reset_mask |= RADEON_RESET_DMA;
1829 
1830 	/* DMA_STATUS_REG 1 */
1831 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1832 	if (!(tmp & DMA_IDLE))
1833 		reset_mask |= RADEON_RESET_DMA1;
1834 
1835 	/* SRBM_STATUS2 */
1836 	tmp = RREG32(SRBM_STATUS2);
1837 	if (tmp & DMA_BUSY)
1838 		reset_mask |= RADEON_RESET_DMA;
1839 
1840 	if (tmp & DMA1_BUSY)
1841 		reset_mask |= RADEON_RESET_DMA1;
1842 
1843 	/* SRBM_STATUS */
1844 	tmp = RREG32(SRBM_STATUS);
1845 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
1846 		reset_mask |= RADEON_RESET_RLC;
1847 
1848 	if (tmp & IH_BUSY)
1849 		reset_mask |= RADEON_RESET_IH;
1850 
1851 	if (tmp & SEM_BUSY)
1852 		reset_mask |= RADEON_RESET_SEM;
1853 
1854 	if (tmp & GRBM_RQ_PENDING)
1855 		reset_mask |= RADEON_RESET_GRBM;
1856 
1857 	if (tmp & VMC_BUSY)
1858 		reset_mask |= RADEON_RESET_VMC;
1859 
1860 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
1861 		   MCC_BUSY | MCD_BUSY))
1862 		reset_mask |= RADEON_RESET_MC;
1863 
1864 	if (evergreen_is_display_hung(rdev))
1865 		reset_mask |= RADEON_RESET_DISPLAY;
1866 
1867 	/* VM_L2_STATUS */
1868 	tmp = RREG32(VM_L2_STATUS);
1869 	if (tmp & L2_BUSY)
1870 		reset_mask |= RADEON_RESET_VMC;
1871 
1872 	/* Skip MC reset as it's mostly likely not hung, just busy */
1873 	if (reset_mask & RADEON_RESET_MC) {
1874 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
1875 		reset_mask &= ~RADEON_RESET_MC;
1876 	}
1877 
1878 	return reset_mask;
1879 }
1880 
1881 static void cayman_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
1882 {
1883 	struct evergreen_mc_save save;
1884 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
1885 	u32 tmp;
1886 
1887 	if (reset_mask == 0)
1888 		return;
1889 
1890 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
1891 
1892 	evergreen_print_gpu_status_regs(rdev);
1893 	dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_ADDR   0x%08X\n",
1894 		 RREG32(0x14F8));
1895 	dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1896 		 RREG32(0x14D8));
1897 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
1898 		 RREG32(0x14FC));
1899 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1900 		 RREG32(0x14DC));
1901 
1902 	/* Disable CP parsing/prefetching */
1903 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT);
1904 
1905 	if (reset_mask & RADEON_RESET_DMA) {
1906 		/* dma0 */
1907 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1908 		tmp &= ~DMA_RB_ENABLE;
1909 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
1910 	}
1911 
1912 	if (reset_mask & RADEON_RESET_DMA1) {
1913 		/* dma1 */
1914 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1915 		tmp &= ~DMA_RB_ENABLE;
1916 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
1917 	}
1918 
1919 	udelay(50);
1920 
1921 	evergreen_mc_stop(rdev, &save);
1922 	if (evergreen_mc_wait_for_idle(rdev)) {
1923 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
1924 	}
1925 
1926 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE)) {
1927 		grbm_soft_reset = SOFT_RESET_CB |
1928 			SOFT_RESET_DB |
1929 			SOFT_RESET_GDS |
1930 			SOFT_RESET_PA |
1931 			SOFT_RESET_SC |
1932 			SOFT_RESET_SPI |
1933 			SOFT_RESET_SH |
1934 			SOFT_RESET_SX |
1935 			SOFT_RESET_TC |
1936 			SOFT_RESET_TA |
1937 			SOFT_RESET_VGT |
1938 			SOFT_RESET_IA;
1939 	}
1940 
1941 	if (reset_mask & RADEON_RESET_CP) {
1942 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
1943 
1944 		srbm_soft_reset |= SOFT_RESET_GRBM;
1945 	}
1946 
1947 	if (reset_mask & RADEON_RESET_DMA)
1948 		srbm_soft_reset |= SOFT_RESET_DMA;
1949 
1950 	if (reset_mask & RADEON_RESET_DMA1)
1951 		srbm_soft_reset |= SOFT_RESET_DMA1;
1952 
1953 	if (reset_mask & RADEON_RESET_DISPLAY)
1954 		srbm_soft_reset |= SOFT_RESET_DC;
1955 
1956 	if (reset_mask & RADEON_RESET_RLC)
1957 		srbm_soft_reset |= SOFT_RESET_RLC;
1958 
1959 	if (reset_mask & RADEON_RESET_SEM)
1960 		srbm_soft_reset |= SOFT_RESET_SEM;
1961 
1962 	if (reset_mask & RADEON_RESET_IH)
1963 		srbm_soft_reset |= SOFT_RESET_IH;
1964 
1965 	if (reset_mask & RADEON_RESET_GRBM)
1966 		srbm_soft_reset |= SOFT_RESET_GRBM;
1967 
1968 	if (reset_mask & RADEON_RESET_VMC)
1969 		srbm_soft_reset |= SOFT_RESET_VMC;
1970 
1971 	if (!(rdev->flags & RADEON_IS_IGP)) {
1972 		if (reset_mask & RADEON_RESET_MC)
1973 			srbm_soft_reset |= SOFT_RESET_MC;
1974 	}
1975 
1976 	if (grbm_soft_reset) {
1977 		tmp = RREG32(GRBM_SOFT_RESET);
1978 		tmp |= grbm_soft_reset;
1979 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
1980 		WREG32(GRBM_SOFT_RESET, tmp);
1981 		tmp = RREG32(GRBM_SOFT_RESET);
1982 
1983 		udelay(50);
1984 
1985 		tmp &= ~grbm_soft_reset;
1986 		WREG32(GRBM_SOFT_RESET, tmp);
1987 		tmp = RREG32(GRBM_SOFT_RESET);
1988 	}
1989 
1990 	if (srbm_soft_reset) {
1991 		tmp = RREG32(SRBM_SOFT_RESET);
1992 		tmp |= srbm_soft_reset;
1993 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1994 		WREG32(SRBM_SOFT_RESET, tmp);
1995 		tmp = RREG32(SRBM_SOFT_RESET);
1996 
1997 		udelay(50);
1998 
1999 		tmp &= ~srbm_soft_reset;
2000 		WREG32(SRBM_SOFT_RESET, tmp);
2001 		tmp = RREG32(SRBM_SOFT_RESET);
2002 	}
2003 
2004 	/* Wait a little for things to settle down */
2005 	udelay(50);
2006 
2007 	evergreen_mc_resume(rdev, &save);
2008 	udelay(50);
2009 
2010 	evergreen_print_gpu_status_regs(rdev);
2011 }
2012 
2013 int cayman_asic_reset(struct radeon_device *rdev)
2014 {
2015 	u32 reset_mask;
2016 
2017 	reset_mask = cayman_gpu_check_soft_reset(rdev);
2018 
2019 	if (reset_mask)
2020 		r600_set_bios_scratch_engine_hung(rdev, true);
2021 
2022 	cayman_gpu_soft_reset(rdev, reset_mask);
2023 
2024 	reset_mask = cayman_gpu_check_soft_reset(rdev);
2025 
2026 	if (!reset_mask)
2027 		r600_set_bios_scratch_engine_hung(rdev, false);
2028 
2029 	return 0;
2030 }
2031 
2032 /**
2033  * cayman_gfx_is_lockup - Check if the GFX engine is locked up
2034  *
2035  * @rdev: radeon_device pointer
2036  * @ring: radeon_ring structure holding ring information
2037  *
2038  * Check if the GFX engine is locked up.
2039  * Returns true if the engine appears to be locked up, false if not.
2040  */
2041 bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2042 {
2043 	u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
2044 
2045 	if (!(reset_mask & (RADEON_RESET_GFX |
2046 			    RADEON_RESET_COMPUTE |
2047 			    RADEON_RESET_CP))) {
2048 		radeon_ring_lockup_update(ring);
2049 		return false;
2050 	}
2051 	/* force CP activities */
2052 	radeon_ring_force_activity(rdev, ring);
2053 	return radeon_ring_test_lockup(rdev, ring);
2054 }
2055 
2056 /**
2057  * cayman_dma_is_lockup - Check if the DMA engine is locked up
2058  *
2059  * @rdev: radeon_device pointer
2060  * @ring: radeon_ring structure holding ring information
2061  *
2062  * Check if the async DMA engine is locked up.
2063  * Returns true if the engine appears to be locked up, false if not.
2064  */
2065 bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2066 {
2067 	u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
2068 	u32 mask;
2069 
2070 	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
2071 		mask = RADEON_RESET_DMA;
2072 	else
2073 		mask = RADEON_RESET_DMA1;
2074 
2075 	if (!(reset_mask & mask)) {
2076 		radeon_ring_lockup_update(ring);
2077 		return false;
2078 	}
2079 	/* force ring activities */
2080 	radeon_ring_force_activity(rdev, ring);
2081 	return radeon_ring_test_lockup(rdev, ring);
2082 }
2083 
2084 static int cayman_startup(struct radeon_device *rdev)
2085 {
2086 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2087 	int r;
2088 
2089 	/* enable pcie gen2 link */
2090 	evergreen_pcie_gen2_enable(rdev);
2091 	/* enable aspm */
2092 	evergreen_program_aspm(rdev);
2093 
2094 	evergreen_mc_program(rdev);
2095 
2096 	if (rdev->flags & RADEON_IS_IGP) {
2097 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
2098 			r = ni_init_microcode(rdev);
2099 			if (r) {
2100 				DRM_ERROR("Failed to load firmware!\n");
2101 				return r;
2102 			}
2103 		}
2104 	} else {
2105 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
2106 			r = ni_init_microcode(rdev);
2107 			if (r) {
2108 				DRM_ERROR("Failed to load firmware!\n");
2109 				return r;
2110 			}
2111 		}
2112 
2113 		r = ni_mc_load_microcode(rdev);
2114 		if (r) {
2115 			DRM_ERROR("Failed to load MC firmware!\n");
2116 			return r;
2117 		}
2118 	}
2119 
2120 	r = r600_vram_scratch_init(rdev);
2121 	if (r)
2122 		return r;
2123 
2124 	r = cayman_pcie_gart_enable(rdev);
2125 	if (r)
2126 		return r;
2127 	cayman_gpu_init(rdev);
2128 
2129 	r = evergreen_blit_init(rdev);
2130 	if (r) {
2131 		r600_blit_fini(rdev);
2132 		rdev->asic->copy.copy = NULL;
2133 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
2134 	}
2135 
2136 	/* allocate rlc buffers */
2137 	if (rdev->flags & RADEON_IS_IGP) {
2138 		rdev->rlc.reg_list = tn_rlc_save_restore_register_list;
2139 		rdev->rlc.reg_list_size = tn_rlc_save_restore_register_list_size;
2140 		rdev->rlc.cs_data = cayman_cs_data;
2141 		r = sumo_rlc_init(rdev);
2142 		if (r) {
2143 			DRM_ERROR("Failed to init rlc BOs!\n");
2144 			return r;
2145 		}
2146 	}
2147 
2148 	/* allocate wb buffer */
2149 	r = radeon_wb_init(rdev);
2150 	if (r)
2151 		return r;
2152 
2153 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
2154 	if (r) {
2155 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2156 		return r;
2157 	}
2158 
2159 	r = rv770_uvd_resume(rdev);
2160 	if (!r) {
2161 		r = radeon_fence_driver_start_ring(rdev,
2162 						   R600_RING_TYPE_UVD_INDEX);
2163 		if (r)
2164 			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
2165 	}
2166 	if (r)
2167 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
2168 
2169 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
2170 	if (r) {
2171 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2172 		return r;
2173 	}
2174 
2175 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
2176 	if (r) {
2177 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
2178 		return r;
2179 	}
2180 
2181 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
2182 	if (r) {
2183 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2184 		return r;
2185 	}
2186 
2187 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
2188 	if (r) {
2189 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2190 		return r;
2191 	}
2192 
2193 	/* Enable IRQ */
2194 	if (!rdev->irq.installed) {
2195 		r = radeon_irq_kms_init(rdev);
2196 		if (r)
2197 			return r;
2198 	}
2199 
2200 	r = r600_irq_init(rdev);
2201 	if (r) {
2202 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
2203 		radeon_irq_kms_fini(rdev);
2204 		return r;
2205 	}
2206 	evergreen_irq_set(rdev);
2207 
2208 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
2209 			     CP_RB0_RPTR, CP_RB0_WPTR,
2210 			     0, 0xfffff, RADEON_CP_PACKET2);
2211 	if (r)
2212 		return r;
2213 
2214 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2215 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
2216 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
2217 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
2218 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2219 	if (r)
2220 		return r;
2221 
2222 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2223 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
2224 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
2225 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
2226 			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2227 	if (r)
2228 		return r;
2229 
2230 	r = cayman_cp_load_microcode(rdev);
2231 	if (r)
2232 		return r;
2233 	r = cayman_cp_resume(rdev);
2234 	if (r)
2235 		return r;
2236 
2237 	r = cayman_dma_resume(rdev);
2238 	if (r)
2239 		return r;
2240 
2241 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2242 	if (ring->ring_size) {
2243 		r = radeon_ring_init(rdev, ring, ring->ring_size,
2244 				     R600_WB_UVD_RPTR_OFFSET,
2245 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
2246 				     0, 0xfffff, RADEON_CP_PACKET2);
2247 		if (!r)
2248 			r = r600_uvd_init(rdev);
2249 		if (r)
2250 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
2251 	}
2252 
2253 	r = radeon_ib_pool_init(rdev);
2254 	if (r) {
2255 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
2256 		return r;
2257 	}
2258 
2259 	r = radeon_vm_manager_init(rdev);
2260 	if (r) {
2261 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
2262 		return r;
2263 	}
2264 
2265 	r = r600_audio_init(rdev);
2266 	if (r)
2267 		return r;
2268 
2269 	return 0;
2270 }
2271 
2272 int cayman_resume(struct radeon_device *rdev)
2273 {
2274 	int r;
2275 
2276 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
2277 	 * posting will perform necessary task to bring back GPU into good
2278 	 * shape.
2279 	 */
2280 	/* post card */
2281 	atom_asic_init(rdev->mode_info.atom_context);
2282 
2283 	/* init golden registers */
2284 	ni_init_golden_registers(rdev);
2285 
2286 	rdev->accel_working = true;
2287 	r = cayman_startup(rdev);
2288 	if (r) {
2289 		DRM_ERROR("cayman startup failed on resume\n");
2290 		rdev->accel_working = false;
2291 		return r;
2292 	}
2293 	return r;
2294 }
2295 
2296 int cayman_suspend(struct radeon_device *rdev)
2297 {
2298 	r600_audio_fini(rdev);
2299 	radeon_vm_manager_fini(rdev);
2300 	cayman_cp_enable(rdev, false);
2301 	cayman_dma_stop(rdev);
2302 	r600_uvd_stop(rdev);
2303 	radeon_uvd_suspend(rdev);
2304 	evergreen_irq_suspend(rdev);
2305 	radeon_wb_disable(rdev);
2306 	cayman_pcie_gart_disable(rdev);
2307 	return 0;
2308 }
2309 
2310 /* Plan is to move initialization in that function and use
2311  * helper function so that radeon_device_init pretty much
2312  * do nothing more than calling asic specific function. This
2313  * should also allow to remove a bunch of callback function
2314  * like vram_info.
2315  */
2316 int cayman_init(struct radeon_device *rdev)
2317 {
2318 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2319 	int r;
2320 
2321 	/* Read BIOS */
2322 	if (!radeon_get_bios(rdev)) {
2323 		if (ASIC_IS_AVIVO(rdev))
2324 			return -EINVAL;
2325 	}
2326 	/* Must be an ATOMBIOS */
2327 	if (!rdev->is_atom_bios) {
2328 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
2329 		return -EINVAL;
2330 	}
2331 	r = radeon_atombios_init(rdev);
2332 	if (r)
2333 		return r;
2334 
2335 	/* Post card if necessary */
2336 	if (!radeon_card_posted(rdev)) {
2337 		if (!rdev->bios) {
2338 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
2339 			return -EINVAL;
2340 		}
2341 		DRM_INFO("GPU not posted. posting now...\n");
2342 		atom_asic_init(rdev->mode_info.atom_context);
2343 	}
2344 	/* init golden registers */
2345 	ni_init_golden_registers(rdev);
2346 	/* Initialize scratch registers */
2347 	r600_scratch_init(rdev);
2348 	/* Initialize surface registers */
2349 	radeon_surface_init(rdev);
2350 	/* Initialize clocks */
2351 	radeon_get_clock_info(rdev->ddev);
2352 	/* Fence driver */
2353 	r = radeon_fence_driver_init(rdev);
2354 	if (r)
2355 		return r;
2356 	/* initialize memory controller */
2357 	r = evergreen_mc_init(rdev);
2358 	if (r)
2359 		return r;
2360 	/* Memory manager */
2361 	r = radeon_bo_init(rdev);
2362 	if (r)
2363 		return r;
2364 
2365 	ring->ring_obj = NULL;
2366 	r600_ring_init(rdev, ring, 1024 * 1024);
2367 
2368 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2369 	ring->ring_obj = NULL;
2370 	r600_ring_init(rdev, ring, 64 * 1024);
2371 
2372 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
2373 	ring->ring_obj = NULL;
2374 	r600_ring_init(rdev, ring, 64 * 1024);
2375 
2376 	r = radeon_uvd_init(rdev);
2377 	if (!r) {
2378 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
2379 		ring->ring_obj = NULL;
2380 		r600_ring_init(rdev, ring, 4096);
2381 	}
2382 
2383 	rdev->ih.ring_obj = NULL;
2384 	r600_ih_ring_init(rdev, 64 * 1024);
2385 
2386 	r = r600_pcie_gart_init(rdev);
2387 	if (r)
2388 		return r;
2389 
2390 	rdev->accel_working = true;
2391 	r = cayman_startup(rdev);
2392 	if (r) {
2393 		dev_err(rdev->dev, "disabling GPU acceleration\n");
2394 		cayman_cp_fini(rdev);
2395 		cayman_dma_fini(rdev);
2396 		r600_irq_fini(rdev);
2397 		if (rdev->flags & RADEON_IS_IGP)
2398 			sumo_rlc_fini(rdev);
2399 		radeon_wb_fini(rdev);
2400 		radeon_ib_pool_fini(rdev);
2401 		radeon_vm_manager_fini(rdev);
2402 		radeon_irq_kms_fini(rdev);
2403 		cayman_pcie_gart_fini(rdev);
2404 		rdev->accel_working = false;
2405 	}
2406 
2407 	/* Don't start up if the MC ucode is missing.
2408 	 * The default clocks and voltages before the MC ucode
2409 	 * is loaded are not suffient for advanced operations.
2410 	 *
2411 	 * We can skip this check for TN, because there is no MC
2412 	 * ucode.
2413 	 */
2414 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
2415 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
2416 		return -EINVAL;
2417 	}
2418 
2419 	return 0;
2420 }
2421 
2422 void cayman_fini(struct radeon_device *rdev)
2423 {
2424 	r600_blit_fini(rdev);
2425 	cayman_cp_fini(rdev);
2426 	cayman_dma_fini(rdev);
2427 	r600_irq_fini(rdev);
2428 	if (rdev->flags & RADEON_IS_IGP)
2429 		sumo_rlc_fini(rdev);
2430 	radeon_wb_fini(rdev);
2431 	radeon_vm_manager_fini(rdev);
2432 	radeon_ib_pool_fini(rdev);
2433 	radeon_irq_kms_fini(rdev);
2434 	r600_uvd_stop(rdev);
2435 	radeon_uvd_fini(rdev);
2436 	cayman_pcie_gart_fini(rdev);
2437 	r600_vram_scratch_fini(rdev);
2438 	radeon_gem_fini(rdev);
2439 	radeon_fence_driver_fini(rdev);
2440 	radeon_bo_fini(rdev);
2441 	radeon_atombios_fini(rdev);
2442 	ni_fini_microcode(rdev);
2443 	kfree(rdev->bios);
2444 	rdev->bios = NULL;
2445 }
2446 
2447 /*
2448  * vm
2449  */
2450 int cayman_vm_init(struct radeon_device *rdev)
2451 {
2452 	/* number of VMs */
2453 	rdev->vm_manager.nvm = 8;
2454 	/* base offset of vram pages */
2455 	if (rdev->flags & RADEON_IS_IGP) {
2456 		u64 tmp = RREG32(FUS_MC_VM_FB_OFFSET);
2457 		tmp <<= 22;
2458 		rdev->vm_manager.vram_base_offset = tmp;
2459 	} else
2460 		rdev->vm_manager.vram_base_offset = 0;
2461 	return 0;
2462 }
2463 
2464 void cayman_vm_fini(struct radeon_device *rdev)
2465 {
2466 }
2467 
2468 /**
2469  * cayman_vm_decode_fault - print human readable fault info
2470  *
2471  * @rdev: radeon_device pointer
2472  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
2473  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
2474  *
2475  * Print human readable fault information (cayman/TN).
2476  */
2477 void cayman_vm_decode_fault(struct radeon_device *rdev,
2478 			    u32 status, u32 addr)
2479 {
2480 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
2481 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
2482 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
2483 	char *block;
2484 
2485 	switch (mc_id) {
2486 	case 32:
2487 	case 16:
2488 	case 96:
2489 	case 80:
2490 	case 160:
2491 	case 144:
2492 	case 224:
2493 	case 208:
2494 		block = "CB";
2495 		break;
2496 	case 33:
2497 	case 17:
2498 	case 97:
2499 	case 81:
2500 	case 161:
2501 	case 145:
2502 	case 225:
2503 	case 209:
2504 		block = "CB_FMASK";
2505 		break;
2506 	case 34:
2507 	case 18:
2508 	case 98:
2509 	case 82:
2510 	case 162:
2511 	case 146:
2512 	case 226:
2513 	case 210:
2514 		block = "CB_CMASK";
2515 		break;
2516 	case 35:
2517 	case 19:
2518 	case 99:
2519 	case 83:
2520 	case 163:
2521 	case 147:
2522 	case 227:
2523 	case 211:
2524 		block = "CB_IMMED";
2525 		break;
2526 	case 36:
2527 	case 20:
2528 	case 100:
2529 	case 84:
2530 	case 164:
2531 	case 148:
2532 	case 228:
2533 	case 212:
2534 		block = "DB";
2535 		break;
2536 	case 37:
2537 	case 21:
2538 	case 101:
2539 	case 85:
2540 	case 165:
2541 	case 149:
2542 	case 229:
2543 	case 213:
2544 		block = "DB_HTILE";
2545 		break;
2546 	case 38:
2547 	case 22:
2548 	case 102:
2549 	case 86:
2550 	case 166:
2551 	case 150:
2552 	case 230:
2553 	case 214:
2554 		block = "SX";
2555 		break;
2556 	case 39:
2557 	case 23:
2558 	case 103:
2559 	case 87:
2560 	case 167:
2561 	case 151:
2562 	case 231:
2563 	case 215:
2564 		block = "DB_STEN";
2565 		break;
2566 	case 40:
2567 	case 24:
2568 	case 104:
2569 	case 88:
2570 	case 232:
2571 	case 216:
2572 	case 168:
2573 	case 152:
2574 		block = "TC_TFETCH";
2575 		break;
2576 	case 41:
2577 	case 25:
2578 	case 105:
2579 	case 89:
2580 	case 233:
2581 	case 217:
2582 	case 169:
2583 	case 153:
2584 		block = "TC_VFETCH";
2585 		break;
2586 	case 42:
2587 	case 26:
2588 	case 106:
2589 	case 90:
2590 	case 234:
2591 	case 218:
2592 	case 170:
2593 	case 154:
2594 		block = "VC";
2595 		break;
2596 	case 112:
2597 		block = "CP";
2598 		break;
2599 	case 113:
2600 	case 114:
2601 		block = "SH";
2602 		break;
2603 	case 115:
2604 		block = "VGT";
2605 		break;
2606 	case 178:
2607 		block = "IH";
2608 		break;
2609 	case 51:
2610 		block = "RLC";
2611 		break;
2612 	case 55:
2613 		block = "DMA";
2614 		break;
2615 	case 56:
2616 		block = "HDP";
2617 		break;
2618 	default:
2619 		block = "unknown";
2620 		break;
2621 	}
2622 
2623 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
2624 	       protections, vmid, addr,
2625 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
2626 	       block, mc_id);
2627 }
2628 
2629 #define R600_ENTRY_VALID   (1 << 0)
2630 #define R600_PTE_SYSTEM    (1 << 1)
2631 #define R600_PTE_SNOOPED   (1 << 2)
2632 #define R600_PTE_READABLE  (1 << 5)
2633 #define R600_PTE_WRITEABLE (1 << 6)
2634 
2635 uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags)
2636 {
2637 	uint32_t r600_flags = 0;
2638 	r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_ENTRY_VALID : 0;
2639 	r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
2640 	r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
2641 	if (flags & RADEON_VM_PAGE_SYSTEM) {
2642 		r600_flags |= R600_PTE_SYSTEM;
2643 		r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
2644 	}
2645 	return r600_flags;
2646 }
2647 
2648 /**
2649  * cayman_vm_set_page - update the page tables using the CP
2650  *
2651  * @rdev: radeon_device pointer
2652  * @ib: indirect buffer to fill with commands
2653  * @pe: addr of the page entry
2654  * @addr: dst addr to write into pe
2655  * @count: number of page entries to update
2656  * @incr: increase next addr by incr bytes
2657  * @flags: access flags
2658  *
2659  * Update the page tables using the CP (cayman/TN).
2660  */
2661 void cayman_vm_set_page(struct radeon_device *rdev,
2662 			struct radeon_ib *ib,
2663 			uint64_t pe,
2664 			uint64_t addr, unsigned count,
2665 			uint32_t incr, uint32_t flags)
2666 {
2667 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2668 	uint64_t value;
2669 	unsigned ndw;
2670 
2671 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2672 		while (count) {
2673 			ndw = 1 + count * 2;
2674 			if (ndw > 0x3FFF)
2675 				ndw = 0x3FFF;
2676 
2677 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw);
2678 			ib->ptr[ib->length_dw++] = pe;
2679 			ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2680 			for (; ndw > 1; ndw -= 2, --count, pe += 8) {
2681 				if (flags & RADEON_VM_PAGE_SYSTEM) {
2682 					value = radeon_vm_map_gart(rdev, addr);
2683 					value &= 0xFFFFFFFFFFFFF000ULL;
2684 				} else if (flags & RADEON_VM_PAGE_VALID) {
2685 					value = addr;
2686 				} else {
2687 					value = 0;
2688 				}
2689 				addr += incr;
2690 				value |= r600_flags;
2691 				ib->ptr[ib->length_dw++] = value;
2692 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
2693 			}
2694 		}
2695 	} else {
2696 		if ((flags & RADEON_VM_PAGE_SYSTEM) ||
2697 		    (count == 1)) {
2698 			while (count) {
2699 				ndw = count * 2;
2700 				if (ndw > 0xFFFFE)
2701 					ndw = 0xFFFFE;
2702 
2703 				/* for non-physically contiguous pages (system) */
2704 				ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
2705 				ib->ptr[ib->length_dw++] = pe;
2706 				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2707 				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2708 					if (flags & RADEON_VM_PAGE_SYSTEM) {
2709 						value = radeon_vm_map_gart(rdev, addr);
2710 						value &= 0xFFFFFFFFFFFFF000ULL;
2711 					} else if (flags & RADEON_VM_PAGE_VALID) {
2712 						value = addr;
2713 					} else {
2714 						value = 0;
2715 					}
2716 					addr += incr;
2717 					value |= r600_flags;
2718 					ib->ptr[ib->length_dw++] = value;
2719 					ib->ptr[ib->length_dw++] = upper_32_bits(value);
2720 				}
2721 			}
2722 			while (ib->length_dw & 0x7)
2723 				ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2724 		} else {
2725 			while (count) {
2726 				ndw = count * 2;
2727 				if (ndw > 0xFFFFE)
2728 					ndw = 0xFFFFE;
2729 
2730 				if (flags & RADEON_VM_PAGE_VALID)
2731 					value = addr;
2732 				else
2733 					value = 0;
2734 				/* for physically contiguous pages (vram) */
2735 				ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
2736 				ib->ptr[ib->length_dw++] = pe; /* dst addr */
2737 				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
2738 				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
2739 				ib->ptr[ib->length_dw++] = 0;
2740 				ib->ptr[ib->length_dw++] = value; /* value */
2741 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
2742 				ib->ptr[ib->length_dw++] = incr; /* increment size */
2743 				ib->ptr[ib->length_dw++] = 0;
2744 				pe += ndw * 4;
2745 				addr += (ndw / 2) * incr;
2746 				count -= ndw / 2;
2747 			}
2748 		}
2749 		while (ib->length_dw & 0x7)
2750 			ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
2751 	}
2752 }
2753 
2754 /**
2755  * cayman_vm_flush - vm flush using the CP
2756  *
2757  * @rdev: radeon_device pointer
2758  *
2759  * Update the page table base and flush the VM TLB
2760  * using the CP (cayman-si).
2761  */
2762 void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2763 {
2764 	struct radeon_ring *ring = &rdev->ring[ridx];
2765 
2766 	if (vm == NULL)
2767 		return;
2768 
2769 	radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
2770 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2771 
2772 	/* flush hdp cache */
2773 	radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
2774 	radeon_ring_write(ring, 0x1);
2775 
2776 	/* bits 0-7 are the VM contexts0-7 */
2777 	radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
2778 	radeon_ring_write(ring, 1 << vm->id);
2779 
2780 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
2781 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
2782 	radeon_ring_write(ring, 0x0);
2783 }
2784 
2785 void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2786 {
2787 	struct radeon_ring *ring = &rdev->ring[ridx];
2788 
2789 	if (vm == NULL)
2790 		return;
2791 
2792 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2793 	radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
2794 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
2795 
2796 	/* flush hdp cache */
2797 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2798 	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
2799 	radeon_ring_write(ring, 1);
2800 
2801 	/* bits 0-7 are the VM contexts0-7 */
2802 	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
2803 	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
2804 	radeon_ring_write(ring, 1 << vm->id);
2805 }
2806 
2807