xref: /dragonfly/sys/dev/drm/radeon/cik.c (revision b0d289c2)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/module.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "radeon_asic.h"
29 #include "cikd.h"
30 #include "atom.h"
31 #include "cik_blit_shaders.h"
32 
33 /* GFX */
34 #define CIK_PFP_UCODE_SIZE 2144
35 #define CIK_ME_UCODE_SIZE 2144
36 #define CIK_CE_UCODE_SIZE 2144
37 /* compute */
38 #define CIK_MEC_UCODE_SIZE 4192
39 /* interrupts */
40 #define BONAIRE_RLC_UCODE_SIZE 2048
41 #define KB_RLC_UCODE_SIZE 2560
42 #define KV_RLC_UCODE_SIZE 2560
43 /* gddr controller */
44 #define CIK_MC_UCODE_SIZE 7866
45 /* sdma */
46 #define CIK_SDMA_UCODE_SIZE 1050
47 #define CIK_SDMA_UCODE_VERSION 64
48 
49 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
50 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
62 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
63 MODULE_FIRMWARE("radeon/KABINI_me.bin");
64 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
65 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
66 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
67 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
68 
69 static void cik_rlc_stop(struct radeon_device *rdev);
70 
71 /*
72  * Indirect registers accessor
73  */
74 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
75 {
76 	u32 r;
77 
78 	WREG32(PCIE_INDEX, reg);
79 	(void)RREG32(PCIE_INDEX);
80 	r = RREG32(PCIE_DATA);
81 	return r;
82 }
83 
84 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
85 {
86 	WREG32(PCIE_INDEX, reg);
87 	(void)RREG32(PCIE_INDEX);
88 	WREG32(PCIE_DATA, v);
89 	(void)RREG32(PCIE_DATA);
90 }
91 
92 static const u32 bonaire_golden_spm_registers[] =
93 {
94 	0x30800, 0xe0ffffff, 0xe0000000
95 };
96 
97 static const u32 bonaire_golden_common_registers[] =
98 {
99 	0xc770, 0xffffffff, 0x00000800,
100 	0xc774, 0xffffffff, 0x00000800,
101 	0xc798, 0xffffffff, 0x00007fbf,
102 	0xc79c, 0xffffffff, 0x00007faf
103 };
104 
105 static const u32 bonaire_golden_registers[] =
106 {
107 	0x3354, 0x00000333, 0x00000333,
108 	0x3350, 0x000c0fc0, 0x00040200,
109 	0x9a10, 0x00010000, 0x00058208,
110 	0x3c000, 0xffff1fff, 0x00140000,
111 	0x3c200, 0xfdfc0fff, 0x00000100,
112 	0x3c234, 0x40000000, 0x40000200,
113 	0x9830, 0xffffffff, 0x00000000,
114 	0x9834, 0xf00fffff, 0x00000400,
115 	0x9838, 0x0002021c, 0x00020200,
116 	0xc78, 0x00000080, 0x00000000,
117 	0x5bb0, 0x000000f0, 0x00000070,
118 	0x5bc0, 0xf0311fff, 0x80300000,
119 	0x98f8, 0x73773777, 0x12010001,
120 	0x350c, 0x00810000, 0x408af000,
121 	0x7030, 0x31000111, 0x00000011,
122 	0x2f48, 0x73773777, 0x12010001,
123 	0x220c, 0x00007fb6, 0x0021a1b1,
124 	0x2210, 0x00007fb6, 0x002021b1,
125 	0x2180, 0x00007fb6, 0x00002191,
126 	0x2218, 0x00007fb6, 0x002121b1,
127 	0x221c, 0x00007fb6, 0x002021b1,
128 	0x21dc, 0x00007fb6, 0x00002191,
129 	0x21e0, 0x00007fb6, 0x00002191,
130 	0x3628, 0x0000003f, 0x0000000a,
131 	0x362c, 0x0000003f, 0x0000000a,
132 	0x2ae4, 0x00073ffe, 0x000022a2,
133 	0x240c, 0x000007ff, 0x00000000,
134 	0x8a14, 0xf000003f, 0x00000007,
135 	0x8bf0, 0x00002001, 0x00000001,
136 	0x8b24, 0xffffffff, 0x00ffffff,
137 	0x30a04, 0x0000ff0f, 0x00000000,
138 	0x28a4c, 0x07ffffff, 0x06000000,
139 	0x4d8, 0x00000fff, 0x00000100,
140 	0x3e78, 0x00000001, 0x00000002,
141 	0x9100, 0x03000000, 0x0362c688,
142 	0x8c00, 0x000000ff, 0x00000001,
143 	0xe40, 0x00001fff, 0x00001fff,
144 	0x9060, 0x0000007f, 0x00000020,
145 	0x9508, 0x00010000, 0x00010000,
146 	0xac14, 0x000003ff, 0x000000f3,
147 	0xac0c, 0xffffffff, 0x00001032
148 };
149 
150 static const u32 bonaire_mgcg_cgcg_init[] =
151 {
152 	0xc420, 0xffffffff, 0xfffffffc,
153 	0x30800, 0xffffffff, 0xe0000000,
154 	0x3c2a0, 0xffffffff, 0x00000100,
155 	0x3c208, 0xffffffff, 0x00000100,
156 	0x3c2c0, 0xffffffff, 0xc0000100,
157 	0x3c2c8, 0xffffffff, 0xc0000100,
158 	0x3c2c4, 0xffffffff, 0xc0000100,
159 	0x55e4, 0xffffffff, 0x00600100,
160 	0x3c280, 0xffffffff, 0x00000100,
161 	0x3c214, 0xffffffff, 0x06000100,
162 	0x3c220, 0xffffffff, 0x00000100,
163 	0x3c218, 0xffffffff, 0x06000100,
164 	0x3c204, 0xffffffff, 0x00000100,
165 	0x3c2e0, 0xffffffff, 0x00000100,
166 	0x3c224, 0xffffffff, 0x00000100,
167 	0x3c200, 0xffffffff, 0x00000100,
168 	0x3c230, 0xffffffff, 0x00000100,
169 	0x3c234, 0xffffffff, 0x00000100,
170 	0x3c250, 0xffffffff, 0x00000100,
171 	0x3c254, 0xffffffff, 0x00000100,
172 	0x3c258, 0xffffffff, 0x00000100,
173 	0x3c25c, 0xffffffff, 0x00000100,
174 	0x3c260, 0xffffffff, 0x00000100,
175 	0x3c27c, 0xffffffff, 0x00000100,
176 	0x3c278, 0xffffffff, 0x00000100,
177 	0x3c210, 0xffffffff, 0x06000100,
178 	0x3c290, 0xffffffff, 0x00000100,
179 	0x3c274, 0xffffffff, 0x00000100,
180 	0x3c2b4, 0xffffffff, 0x00000100,
181 	0x3c2b0, 0xffffffff, 0x00000100,
182 	0x3c270, 0xffffffff, 0x00000100,
183 	0x30800, 0xffffffff, 0xe0000000,
184 	0x3c020, 0xffffffff, 0x00010000,
185 	0x3c024, 0xffffffff, 0x00030002,
186 	0x3c028, 0xffffffff, 0x00040007,
187 	0x3c02c, 0xffffffff, 0x00060005,
188 	0x3c030, 0xffffffff, 0x00090008,
189 	0x3c034, 0xffffffff, 0x00010000,
190 	0x3c038, 0xffffffff, 0x00030002,
191 	0x3c03c, 0xffffffff, 0x00040007,
192 	0x3c040, 0xffffffff, 0x00060005,
193 	0x3c044, 0xffffffff, 0x00090008,
194 	0x3c048, 0xffffffff, 0x00010000,
195 	0x3c04c, 0xffffffff, 0x00030002,
196 	0x3c050, 0xffffffff, 0x00040007,
197 	0x3c054, 0xffffffff, 0x00060005,
198 	0x3c058, 0xffffffff, 0x00090008,
199 	0x3c05c, 0xffffffff, 0x00010000,
200 	0x3c060, 0xffffffff, 0x00030002,
201 	0x3c064, 0xffffffff, 0x00040007,
202 	0x3c068, 0xffffffff, 0x00060005,
203 	0x3c06c, 0xffffffff, 0x00090008,
204 	0x3c070, 0xffffffff, 0x00010000,
205 	0x3c074, 0xffffffff, 0x00030002,
206 	0x3c078, 0xffffffff, 0x00040007,
207 	0x3c07c, 0xffffffff, 0x00060005,
208 	0x3c080, 0xffffffff, 0x00090008,
209 	0x3c084, 0xffffffff, 0x00010000,
210 	0x3c088, 0xffffffff, 0x00030002,
211 	0x3c08c, 0xffffffff, 0x00040007,
212 	0x3c090, 0xffffffff, 0x00060005,
213 	0x3c094, 0xffffffff, 0x00090008,
214 	0x3c098, 0xffffffff, 0x00010000,
215 	0x3c09c, 0xffffffff, 0x00030002,
216 	0x3c0a0, 0xffffffff, 0x00040007,
217 	0x3c0a4, 0xffffffff, 0x00060005,
218 	0x3c0a8, 0xffffffff, 0x00090008,
219 	0x3c000, 0xffffffff, 0x96e00200,
220 	0x8708, 0xffffffff, 0x00900100,
221 	0xc424, 0xffffffff, 0x0020003f,
222 	0x38, 0xffffffff, 0x0140001c,
223 	0x3c, 0x000f0000, 0x000f0000,
224 	0x220, 0xffffffff, 0xC060000C,
225 	0x224, 0xc0000fff, 0x00000100,
226 	0xf90, 0xffffffff, 0x00000100,
227 	0xf98, 0x00000101, 0x00000000,
228 	0x20a8, 0xffffffff, 0x00000104,
229 	0x55e4, 0xff000fff, 0x00000100,
230 	0x30cc, 0xc0000fff, 0x00000104,
231 	0xc1e4, 0x00000001, 0x00000001,
232 	0xd00c, 0xff000ff0, 0x00000100,
233 	0xd80c, 0xff000ff0, 0x00000100
234 };
235 
236 static const u32 spectre_golden_spm_registers[] =
237 {
238 	0x30800, 0xe0ffffff, 0xe0000000
239 };
240 
241 static const u32 spectre_golden_common_registers[] =
242 {
243 	0xc770, 0xffffffff, 0x00000800,
244 	0xc774, 0xffffffff, 0x00000800,
245 	0xc798, 0xffffffff, 0x00007fbf,
246 	0xc79c, 0xffffffff, 0x00007faf
247 };
248 
249 static const u32 spectre_golden_registers[] =
250 {
251 	0x3c000, 0xffff1fff, 0x96940200,
252 	0x3c00c, 0xffff0001, 0xff000000,
253 	0x3c200, 0xfffc0fff, 0x00000100,
254 	0x6ed8, 0x00010101, 0x00010000,
255 	0x9834, 0xf00fffff, 0x00000400,
256 	0x9838, 0xfffffffc, 0x00020200,
257 	0x5bb0, 0x000000f0, 0x00000070,
258 	0x5bc0, 0xf0311fff, 0x80300000,
259 	0x98f8, 0x73773777, 0x12010001,
260 	0x9b7c, 0x00ff0000, 0x00fc0000,
261 	0x2f48, 0x73773777, 0x12010001,
262 	0x8a14, 0xf000003f, 0x00000007,
263 	0x8b24, 0xffffffff, 0x00ffffff,
264 	0x28350, 0x3f3f3fff, 0x00000082,
265 	0x28355, 0x0000003f, 0x00000000,
266 	0x3e78, 0x00000001, 0x00000002,
267 	0x913c, 0xffff03df, 0x00000004,
268 	0xc768, 0x00000008, 0x00000008,
269 	0x8c00, 0x000008ff, 0x00000800,
270 	0x9508, 0x00010000, 0x00010000,
271 	0xac0c, 0xffffffff, 0x54763210,
272 	0x214f8, 0x01ff01ff, 0x00000002,
273 	0x21498, 0x007ff800, 0x00200000,
274 	0x2015c, 0xffffffff, 0x00000f40,
275 	0x30934, 0xffffffff, 0x00000001
276 };
277 
278 static const u32 spectre_mgcg_cgcg_init[] =
279 {
280 	0xc420, 0xffffffff, 0xfffffffc,
281 	0x30800, 0xffffffff, 0xe0000000,
282 	0x3c2a0, 0xffffffff, 0x00000100,
283 	0x3c208, 0xffffffff, 0x00000100,
284 	0x3c2c0, 0xffffffff, 0x00000100,
285 	0x3c2c8, 0xffffffff, 0x00000100,
286 	0x3c2c4, 0xffffffff, 0x00000100,
287 	0x55e4, 0xffffffff, 0x00600100,
288 	0x3c280, 0xffffffff, 0x00000100,
289 	0x3c214, 0xffffffff, 0x06000100,
290 	0x3c220, 0xffffffff, 0x00000100,
291 	0x3c218, 0xffffffff, 0x06000100,
292 	0x3c204, 0xffffffff, 0x00000100,
293 	0x3c2e0, 0xffffffff, 0x00000100,
294 	0x3c224, 0xffffffff, 0x00000100,
295 	0x3c200, 0xffffffff, 0x00000100,
296 	0x3c230, 0xffffffff, 0x00000100,
297 	0x3c234, 0xffffffff, 0x00000100,
298 	0x3c250, 0xffffffff, 0x00000100,
299 	0x3c254, 0xffffffff, 0x00000100,
300 	0x3c258, 0xffffffff, 0x00000100,
301 	0x3c25c, 0xffffffff, 0x00000100,
302 	0x3c260, 0xffffffff, 0x00000100,
303 	0x3c27c, 0xffffffff, 0x00000100,
304 	0x3c278, 0xffffffff, 0x00000100,
305 	0x3c210, 0xffffffff, 0x06000100,
306 	0x3c290, 0xffffffff, 0x00000100,
307 	0x3c274, 0xffffffff, 0x00000100,
308 	0x3c2b4, 0xffffffff, 0x00000100,
309 	0x3c2b0, 0xffffffff, 0x00000100,
310 	0x3c270, 0xffffffff, 0x00000100,
311 	0x30800, 0xffffffff, 0xe0000000,
312 	0x3c020, 0xffffffff, 0x00010000,
313 	0x3c024, 0xffffffff, 0x00030002,
314 	0x3c028, 0xffffffff, 0x00040007,
315 	0x3c02c, 0xffffffff, 0x00060005,
316 	0x3c030, 0xffffffff, 0x00090008,
317 	0x3c034, 0xffffffff, 0x00010000,
318 	0x3c038, 0xffffffff, 0x00030002,
319 	0x3c03c, 0xffffffff, 0x00040007,
320 	0x3c040, 0xffffffff, 0x00060005,
321 	0x3c044, 0xffffffff, 0x00090008,
322 	0x3c048, 0xffffffff, 0x00010000,
323 	0x3c04c, 0xffffffff, 0x00030002,
324 	0x3c050, 0xffffffff, 0x00040007,
325 	0x3c054, 0xffffffff, 0x00060005,
326 	0x3c058, 0xffffffff, 0x00090008,
327 	0x3c05c, 0xffffffff, 0x00010000,
328 	0x3c060, 0xffffffff, 0x00030002,
329 	0x3c064, 0xffffffff, 0x00040007,
330 	0x3c068, 0xffffffff, 0x00060005,
331 	0x3c06c, 0xffffffff, 0x00090008,
332 	0x3c070, 0xffffffff, 0x00010000,
333 	0x3c074, 0xffffffff, 0x00030002,
334 	0x3c078, 0xffffffff, 0x00040007,
335 	0x3c07c, 0xffffffff, 0x00060005,
336 	0x3c080, 0xffffffff, 0x00090008,
337 	0x3c084, 0xffffffff, 0x00010000,
338 	0x3c088, 0xffffffff, 0x00030002,
339 	0x3c08c, 0xffffffff, 0x00040007,
340 	0x3c090, 0xffffffff, 0x00060005,
341 	0x3c094, 0xffffffff, 0x00090008,
342 	0x3c098, 0xffffffff, 0x00010000,
343 	0x3c09c, 0xffffffff, 0x00030002,
344 	0x3c0a0, 0xffffffff, 0x00040007,
345 	0x3c0a4, 0xffffffff, 0x00060005,
346 	0x3c0a8, 0xffffffff, 0x00090008,
347 	0x3c0ac, 0xffffffff, 0x00010000,
348 	0x3c0b0, 0xffffffff, 0x00030002,
349 	0x3c0b4, 0xffffffff, 0x00040007,
350 	0x3c0b8, 0xffffffff, 0x00060005,
351 	0x3c0bc, 0xffffffff, 0x00090008,
352 	0x3c000, 0xffffffff, 0x96e00200,
353 	0x8708, 0xffffffff, 0x00900100,
354 	0xc424, 0xffffffff, 0x0020003f,
355 	0x38, 0xffffffff, 0x0140001c,
356 	0x3c, 0x000f0000, 0x000f0000,
357 	0x220, 0xffffffff, 0xC060000C,
358 	0x224, 0xc0000fff, 0x00000100,
359 	0xf90, 0xffffffff, 0x00000100,
360 	0xf98, 0x00000101, 0x00000000,
361 	0x20a8, 0xffffffff, 0x00000104,
362 	0x55e4, 0xff000fff, 0x00000100,
363 	0x30cc, 0xc0000fff, 0x00000104,
364 	0xc1e4, 0x00000001, 0x00000001,
365 	0xd00c, 0xff000ff0, 0x00000100,
366 	0xd80c, 0xff000ff0, 0x00000100
367 };
368 
369 static const u32 kalindi_golden_spm_registers[] =
370 {
371 	0x30800, 0xe0ffffff, 0xe0000000
372 };
373 
374 static const u32 kalindi_golden_common_registers[] =
375 {
376 	0xc770, 0xffffffff, 0x00000800,
377 	0xc774, 0xffffffff, 0x00000800,
378 	0xc798, 0xffffffff, 0x00007fbf,
379 	0xc79c, 0xffffffff, 0x00007faf
380 };
381 
382 static const u32 kalindi_golden_registers[] =
383 {
384 	0x3c000, 0xffffdfff, 0x6e944040,
385 	0x55e4, 0xff607fff, 0xfc000100,
386 	0x3c220, 0xff000fff, 0x00000100,
387 	0x3c224, 0xff000fff, 0x00000100,
388 	0x3c200, 0xfffc0fff, 0x00000100,
389 	0x6ed8, 0x00010101, 0x00010000,
390 	0x9830, 0xffffffff, 0x00000000,
391 	0x9834, 0xf00fffff, 0x00000400,
392 	0x5bb0, 0x000000f0, 0x00000070,
393 	0x5bc0, 0xf0311fff, 0x80300000,
394 	0x98f8, 0x73773777, 0x12010001,
395 	0x98fc, 0xffffffff, 0x00000010,
396 	0x9b7c, 0x00ff0000, 0x00fc0000,
397 	0x8030, 0x00001f0f, 0x0000100a,
398 	0x2f48, 0x73773777, 0x12010001,
399 	0x2408, 0x000fffff, 0x000c007f,
400 	0x8a14, 0xf000003f, 0x00000007,
401 	0x8b24, 0x3fff3fff, 0x00ffcfff,
402 	0x30a04, 0x0000ff0f, 0x00000000,
403 	0x28a4c, 0x07ffffff, 0x06000000,
404 	0x4d8, 0x00000fff, 0x00000100,
405 	0x3e78, 0x00000001, 0x00000002,
406 	0xc768, 0x00000008, 0x00000008,
407 	0x8c00, 0x000000ff, 0x00000003,
408 	0x214f8, 0x01ff01ff, 0x00000002,
409 	0x21498, 0x007ff800, 0x00200000,
410 	0x2015c, 0xffffffff, 0x00000f40,
411 	0x88c4, 0x001f3ae3, 0x00000082,
412 	0x88d4, 0x0000001f, 0x00000010,
413 	0x30934, 0xffffffff, 0x00000000
414 };
415 
416 static const u32 kalindi_mgcg_cgcg_init[] =
417 {
418 	0xc420, 0xffffffff, 0xfffffffc,
419 	0x30800, 0xffffffff, 0xe0000000,
420 	0x3c2a0, 0xffffffff, 0x00000100,
421 	0x3c208, 0xffffffff, 0x00000100,
422 	0x3c2c0, 0xffffffff, 0x00000100,
423 	0x3c2c8, 0xffffffff, 0x00000100,
424 	0x3c2c4, 0xffffffff, 0x00000100,
425 	0x55e4, 0xffffffff, 0x00600100,
426 	0x3c280, 0xffffffff, 0x00000100,
427 	0x3c214, 0xffffffff, 0x06000100,
428 	0x3c220, 0xffffffff, 0x00000100,
429 	0x3c218, 0xffffffff, 0x06000100,
430 	0x3c204, 0xffffffff, 0x00000100,
431 	0x3c2e0, 0xffffffff, 0x00000100,
432 	0x3c224, 0xffffffff, 0x00000100,
433 	0x3c200, 0xffffffff, 0x00000100,
434 	0x3c230, 0xffffffff, 0x00000100,
435 	0x3c234, 0xffffffff, 0x00000100,
436 	0x3c250, 0xffffffff, 0x00000100,
437 	0x3c254, 0xffffffff, 0x00000100,
438 	0x3c258, 0xffffffff, 0x00000100,
439 	0x3c25c, 0xffffffff, 0x00000100,
440 	0x3c260, 0xffffffff, 0x00000100,
441 	0x3c27c, 0xffffffff, 0x00000100,
442 	0x3c278, 0xffffffff, 0x00000100,
443 	0x3c210, 0xffffffff, 0x06000100,
444 	0x3c290, 0xffffffff, 0x00000100,
445 	0x3c274, 0xffffffff, 0x00000100,
446 	0x3c2b4, 0xffffffff, 0x00000100,
447 	0x3c2b0, 0xffffffff, 0x00000100,
448 	0x3c270, 0xffffffff, 0x00000100,
449 	0x30800, 0xffffffff, 0xe0000000,
450 	0x3c020, 0xffffffff, 0x00010000,
451 	0x3c024, 0xffffffff, 0x00030002,
452 	0x3c028, 0xffffffff, 0x00040007,
453 	0x3c02c, 0xffffffff, 0x00060005,
454 	0x3c030, 0xffffffff, 0x00090008,
455 	0x3c034, 0xffffffff, 0x00010000,
456 	0x3c038, 0xffffffff, 0x00030002,
457 	0x3c03c, 0xffffffff, 0x00040007,
458 	0x3c040, 0xffffffff, 0x00060005,
459 	0x3c044, 0xffffffff, 0x00090008,
460 	0x3c000, 0xffffffff, 0x96e00200,
461 	0x8708, 0xffffffff, 0x00900100,
462 	0xc424, 0xffffffff, 0x0020003f,
463 	0x38, 0xffffffff, 0x0140001c,
464 	0x3c, 0x000f0000, 0x000f0000,
465 	0x220, 0xffffffff, 0xC060000C,
466 	0x224, 0xc0000fff, 0x00000100,
467 	0x20a8, 0xffffffff, 0x00000104,
468 	0x55e4, 0xff000fff, 0x00000100,
469 	0x30cc, 0xc0000fff, 0x00000104,
470 	0xc1e4, 0x00000001, 0x00000001,
471 	0xd00c, 0xff000ff0, 0x00000100,
472 	0xd80c, 0xff000ff0, 0x00000100
473 };
474 
475 static void cik_init_golden_registers(struct radeon_device *rdev)
476 {
477 	switch (rdev->family) {
478 	case CHIP_BONAIRE:
479 		radeon_program_register_sequence(rdev,
480 						 bonaire_mgcg_cgcg_init,
481 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
482 		radeon_program_register_sequence(rdev,
483 						 bonaire_golden_registers,
484 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
485 		radeon_program_register_sequence(rdev,
486 						 bonaire_golden_common_registers,
487 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
488 		radeon_program_register_sequence(rdev,
489 						 bonaire_golden_spm_registers,
490 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
491 		break;
492 	case CHIP_KABINI:
493 		radeon_program_register_sequence(rdev,
494 						 kalindi_mgcg_cgcg_init,
495 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
496 		radeon_program_register_sequence(rdev,
497 						 kalindi_golden_registers,
498 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
499 		radeon_program_register_sequence(rdev,
500 						 kalindi_golden_common_registers,
501 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
502 		radeon_program_register_sequence(rdev,
503 						 kalindi_golden_spm_registers,
504 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
505 		break;
506 	case CHIP_KAVERI:
507 		radeon_program_register_sequence(rdev,
508 						 spectre_mgcg_cgcg_init,
509 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
510 		radeon_program_register_sequence(rdev,
511 						 spectre_golden_registers,
512 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
513 		radeon_program_register_sequence(rdev,
514 						 spectre_golden_common_registers,
515 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
516 		radeon_program_register_sequence(rdev,
517 						 spectre_golden_spm_registers,
518 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
519 		break;
520 	default:
521 		break;
522 	}
523 }
524 
525 /**
526  * cik_get_xclk - get the xclk
527  *
528  * @rdev: radeon_device pointer
529  *
530  * Returns the reference clock used by the gfx engine
531  * (CIK).
532  */
533 u32 cik_get_xclk(struct radeon_device *rdev)
534 {
535         u32 reference_clock = rdev->clock.spll.reference_freq;
536 
537 	if (rdev->flags & RADEON_IS_IGP) {
538 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
539 			return reference_clock / 2;
540 	} else {
541 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
542 			return reference_clock / 4;
543 	}
544 	return reference_clock;
545 }
546 
547 /**
548  * cik_mm_rdoorbell - read a doorbell dword
549  *
550  * @rdev: radeon_device pointer
551  * @offset: byte offset into the aperture
552  *
553  * Returns the value in the doorbell aperture at the
554  * requested offset (CIK).
555  */
556 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
557 {
558 	if (offset < rdev->doorbell.size) {
559 		return readl(((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
560 	} else {
561 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
562 		return 0;
563 	}
564 }
565 
566 /**
567  * cik_mm_wdoorbell - write a doorbell dword
568  *
569  * @rdev: radeon_device pointer
570  * @offset: byte offset into the aperture
571  * @v: value to write
572  *
573  * Writes @v to the doorbell aperture at the
574  * requested offset (CIK).
575  */
576 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
577 {
578 	if (offset < rdev->doorbell.size) {
579 		writel(v, ((uint8_t __iomem *)rdev->doorbell.ptr) + offset);
580 	} else {
581 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
582 	}
583 }
584 
585 #define BONAIRE_IO_MC_REGS_SIZE 36
586 
587 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
588 {
589 	{0x00000070, 0x04400000},
590 	{0x00000071, 0x80c01803},
591 	{0x00000072, 0x00004004},
592 	{0x00000073, 0x00000100},
593 	{0x00000074, 0x00ff0000},
594 	{0x00000075, 0x34000000},
595 	{0x00000076, 0x08000014},
596 	{0x00000077, 0x00cc08ec},
597 	{0x00000078, 0x00000400},
598 	{0x00000079, 0x00000000},
599 	{0x0000007a, 0x04090000},
600 	{0x0000007c, 0x00000000},
601 	{0x0000007e, 0x4408a8e8},
602 	{0x0000007f, 0x00000304},
603 	{0x00000080, 0x00000000},
604 	{0x00000082, 0x00000001},
605 	{0x00000083, 0x00000002},
606 	{0x00000084, 0xf3e4f400},
607 	{0x00000085, 0x052024e3},
608 	{0x00000087, 0x00000000},
609 	{0x00000088, 0x01000000},
610 	{0x0000008a, 0x1c0a0000},
611 	{0x0000008b, 0xff010000},
612 	{0x0000008d, 0xffffefff},
613 	{0x0000008e, 0xfff3efff},
614 	{0x0000008f, 0xfff3efbf},
615 	{0x00000092, 0xf7ffffff},
616 	{0x00000093, 0xffffff7f},
617 	{0x00000095, 0x00101101},
618 	{0x00000096, 0x00000fff},
619 	{0x00000097, 0x00116fff},
620 	{0x00000098, 0x60010000},
621 	{0x00000099, 0x10010000},
622 	{0x0000009a, 0x00006000},
623 	{0x0000009b, 0x00001000},
624 	{0x0000009f, 0x00b48000}
625 };
626 
627 /**
628  * cik_srbm_select - select specific register instances
629  *
630  * @rdev: radeon_device pointer
631  * @me: selected ME (micro engine)
632  * @pipe: pipe
633  * @queue: queue
634  * @vmid: VMID
635  *
636  * Switches the currently active registers instances.  Some
637  * registers are instanced per VMID, others are instanced per
638  * me/pipe/queue combination.
639  */
640 static void cik_srbm_select(struct radeon_device *rdev,
641 			    u32 me, u32 pipe, u32 queue, u32 vmid)
642 {
643 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
644 			     MEID(me & 0x3) |
645 			     VMID(vmid & 0xf) |
646 			     QUEUEID(queue & 0x7));
647 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
648 }
649 
650 /* ucode loading */
651 /**
652  * ci_mc_load_microcode - load MC ucode into the hw
653  *
654  * @rdev: radeon_device pointer
655  *
656  * Load the GDDR MC ucode into the hw (CIK).
657  * Returns 0 on success, error on failure.
658  */
659 static __unused int ci_mc_load_microcode(struct radeon_device *rdev)
660 {
661 	const __be32 *fw_data;
662 	u32 running, blackout = 0;
663 	u32 *io_mc_regs;
664 	int i, ucode_size, regs_size;
665 
666 	if (!rdev->mc_fw)
667 		return -EINVAL;
668 
669 	switch (rdev->family) {
670 	case CHIP_BONAIRE:
671 	default:
672 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
673 		ucode_size = CIK_MC_UCODE_SIZE;
674 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
675 		break;
676 	}
677 
678 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
679 
680 	if (running == 0) {
681 		if (running) {
682 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
683 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
684 		}
685 
686 		/* reset the engine and set to writable */
687 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
688 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
689 
690 		/* load mc io regs */
691 		for (i = 0; i < regs_size; i++) {
692 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
693 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
694 		}
695 		/* load the MC ucode */
696 		fw_data = (const __be32 *)rdev->mc_fw->data;
697 		for (i = 0; i < ucode_size; i++)
698 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
699 
700 		/* put the engine back into the active state */
701 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
702 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
703 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
704 
705 		/* wait for training to complete */
706 		for (i = 0; i < rdev->usec_timeout; i++) {
707 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
708 				break;
709 			udelay(1);
710 		}
711 		for (i = 0; i < rdev->usec_timeout; i++) {
712 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
713 				break;
714 			udelay(1);
715 		}
716 
717 		if (running)
718 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
719 	}
720 
721 	return 0;
722 }
723 
724 /**
725  * cik_init_microcode - load ucode images from disk
726  *
727  * @rdev: radeon_device pointer
728  *
729  * Use the firmware interface to load the ucode images into
730  * the driver (not loaded into hw).
731  * Returns 0 on success, error on failure.
732  */
733 static int cik_init_microcode(struct radeon_device *rdev)
734 {
735 	const char *chip_name;
736 	size_t pfp_req_size, me_req_size, ce_req_size,
737 		mec_req_size, rlc_req_size, mc_req_size,
738 		sdma_req_size;
739 	char fw_name[30];
740 	int err;
741 
742 	DRM_DEBUG("\n");
743 
744 	switch (rdev->family) {
745 	case CHIP_BONAIRE:
746 		chip_name = "BONAIRE";
747 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
748 		me_req_size = CIK_ME_UCODE_SIZE * 4;
749 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
750 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
751 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
752 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
753 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
754 		break;
755 	case CHIP_KAVERI:
756 		chip_name = "KAVERI";
757 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
758 		me_req_size = CIK_ME_UCODE_SIZE * 4;
759 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
760 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
761 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
762 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
763 		break;
764 	case CHIP_KABINI:
765 		chip_name = "KABINI";
766 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767 		me_req_size = CIK_ME_UCODE_SIZE * 4;
768 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
769 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
771 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
772 		break;
773 	default: BUG();
774 	}
775 
776 	DRM_INFO("Loading %s Microcode\n", chip_name);
777 
778 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
779 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
780 	if (err)
781 		goto out;
782 	if (rdev->pfp_fw->datasize != pfp_req_size) {
783 		printk(KERN_ERR
784 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
785 		       rdev->pfp_fw->datasize, fw_name);
786 		err = -EINVAL;
787 		goto out;
788 	}
789 
790 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
791 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
792 	if (err)
793 		goto out;
794 	if (rdev->me_fw->datasize != me_req_size) {
795 		printk(KERN_ERR
796 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
797 		       rdev->me_fw->datasize, fw_name);
798 		err = -EINVAL;
799 	}
800 
801 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
802 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
803 	if (err)
804 		goto out;
805 	if (rdev->ce_fw->datasize != ce_req_size) {
806 		printk(KERN_ERR
807 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
808 		       rdev->ce_fw->datasize, fw_name);
809 		err = -EINVAL;
810 	}
811 
812 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
813 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
814 	if (err)
815 		goto out;
816 	if (rdev->mec_fw->datasize != mec_req_size) {
817 		printk(KERN_ERR
818 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
819 		       rdev->mec_fw->datasize, fw_name);
820 		err = -EINVAL;
821 	}
822 
823 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
824 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
825 	if (err)
826 		goto out;
827 	if (rdev->rlc_fw->datasize != rlc_req_size) {
828 		printk(KERN_ERR
829 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
830 		       rdev->rlc_fw->datasize, fw_name);
831 		err = -EINVAL;
832 	}
833 
834 	ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
835 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
836 	if (err)
837 		goto out;
838 	if (rdev->sdma_fw->datasize != sdma_req_size) {
839 		printk(KERN_ERR
840 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
841 		       rdev->sdma_fw->datasize, fw_name);
842 		err = -EINVAL;
843 	}
844 
845 	/* No MC ucode on APUs */
846 	if (!(rdev->flags & RADEON_IS_IGP)) {
847 		ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
848 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
849 		if (err)
850 			goto out;
851 		if (rdev->mc_fw->datasize != mc_req_size) {
852 			printk(KERN_ERR
853 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
854 			       rdev->mc_fw->datasize, fw_name);
855 			err = -EINVAL;
856 		}
857 	}
858 
859 out:
860 	if (err) {
861 		if (err != -EINVAL)
862 			printk(KERN_ERR
863 			       "cik_cp: Failed to load firmware \"%s\"\n",
864 			       fw_name);
865 		release_firmware(rdev->pfp_fw);
866 		rdev->pfp_fw = NULL;
867 		release_firmware(rdev->me_fw);
868 		rdev->me_fw = NULL;
869 		release_firmware(rdev->ce_fw);
870 		rdev->ce_fw = NULL;
871 		release_firmware(rdev->mec_fw);
872 		rdev->mec_fw = NULL;
873 		release_firmware(rdev->rlc_fw);
874 		rdev->rlc_fw = NULL;
875 		release_firmware(rdev->sdma_fw);
876 		rdev->sdma_fw = NULL;
877 		release_firmware(rdev->mc_fw);
878 		rdev->mc_fw = NULL;
879 	}
880 	return err;
881 }
882 
883 /*
884  * Core functions
885  */
886 /**
887  * cik_tiling_mode_table_init - init the hw tiling table
888  *
889  * @rdev: radeon_device pointer
890  *
891  * Starting with SI, the tiling setup is done globally in a
892  * set of 32 tiling modes.  Rather than selecting each set of
893  * parameters per surface as on older asics, we just select
894  * which index in the tiling table we want to use, and the
895  * surface uses those parameters (CIK).
896  */
897 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
898 {
899 	const u32 num_tile_mode_states = 32;
900 	const u32 num_secondary_tile_mode_states = 16;
901 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
902 	u32 num_pipe_configs;
903 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
904 		rdev->config.cik.max_shader_engines;
905 
906 	switch (rdev->config.cik.mem_row_size_in_kb) {
907 	case 1:
908 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
909 		break;
910 	case 2:
911 	default:
912 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
913 		break;
914 	case 4:
915 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
916 		break;
917 	}
918 
919 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
920 	if (num_pipe_configs > 8)
921 		num_pipe_configs = 8; /* ??? */
922 
923 	if (num_pipe_configs == 8) {
924 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
925 			switch (reg_offset) {
926 			case 0:
927 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
928 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
929 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
930 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
931 				break;
932 			case 1:
933 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
934 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
935 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
936 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
937 				break;
938 			case 2:
939 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
940 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
941 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
942 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
943 				break;
944 			case 3:
945 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
946 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
947 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
948 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
949 				break;
950 			case 4:
951 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
952 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
953 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
954 						 TILE_SPLIT(split_equal_to_row_size));
955 				break;
956 			case 5:
957 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
958 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
959 				break;
960 			case 6:
961 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
962 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
963 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
964 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
965 				break;
966 			case 7:
967 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
968 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
969 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
970 						 TILE_SPLIT(split_equal_to_row_size));
971 				break;
972 			case 8:
973 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
974 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
975 				break;
976 			case 9:
977 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
978 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
979 				break;
980 			case 10:
981 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
982 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
983 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
984 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
985 				break;
986 			case 11:
987 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
988 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
989 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
990 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
991 				break;
992 			case 12:
993 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
994 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
995 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
996 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
997 				break;
998 			case 13:
999 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1000 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1001 				break;
1002 			case 14:
1003 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1004 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1005 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1006 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1007 				break;
1008 			case 16:
1009 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1010 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1011 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1012 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1013 				break;
1014 			case 17:
1015 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1016 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1017 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1018 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1019 				break;
1020 			case 27:
1021 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1022 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1023 				break;
1024 			case 28:
1025 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1026 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1027 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1028 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1029 				break;
1030 			case 29:
1031 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1032 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1033 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1034 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1035 				break;
1036 			case 30:
1037 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1038 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1039 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1040 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1041 				break;
1042 			default:
1043 				gb_tile_moden = 0;
1044 				break;
1045 			}
1046 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1047 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1048 		}
1049 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1050 			switch (reg_offset) {
1051 			case 0:
1052 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1053 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1054 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1055 						 NUM_BANKS(ADDR_SURF_16_BANK));
1056 				break;
1057 			case 1:
1058 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1059 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1060 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1061 						 NUM_BANKS(ADDR_SURF_16_BANK));
1062 				break;
1063 			case 2:
1064 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1065 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1066 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1067 						 NUM_BANKS(ADDR_SURF_16_BANK));
1068 				break;
1069 			case 3:
1070 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1071 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1072 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1073 						 NUM_BANKS(ADDR_SURF_16_BANK));
1074 				break;
1075 			case 4:
1076 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1077 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1078 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1079 						 NUM_BANKS(ADDR_SURF_8_BANK));
1080 				break;
1081 			case 5:
1082 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1083 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1084 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1085 						 NUM_BANKS(ADDR_SURF_4_BANK));
1086 				break;
1087 			case 6:
1088 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1089 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1090 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1091 						 NUM_BANKS(ADDR_SURF_2_BANK));
1092 				break;
1093 			case 8:
1094 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1095 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1096 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1097 						 NUM_BANKS(ADDR_SURF_16_BANK));
1098 				break;
1099 			case 9:
1100 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1101 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1102 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1103 						 NUM_BANKS(ADDR_SURF_16_BANK));
1104 				break;
1105 			case 10:
1106 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1107 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1108 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1109 						 NUM_BANKS(ADDR_SURF_16_BANK));
1110 				break;
1111 			case 11:
1112 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1113 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1114 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1115 						 NUM_BANKS(ADDR_SURF_16_BANK));
1116 				break;
1117 			case 12:
1118 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1119 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1120 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1121 						 NUM_BANKS(ADDR_SURF_8_BANK));
1122 				break;
1123 			case 13:
1124 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1125 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1126 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1127 						 NUM_BANKS(ADDR_SURF_4_BANK));
1128 				break;
1129 			case 14:
1130 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1131 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1132 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1133 						 NUM_BANKS(ADDR_SURF_2_BANK));
1134 				break;
1135 			default:
1136 				gb_tile_moden = 0;
1137 				break;
1138 			}
1139 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1140 		}
1141 	} else if (num_pipe_configs == 4) {
1142 		if (num_rbs == 4) {
1143 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1144 				switch (reg_offset) {
1145 				case 0:
1146 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1147 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1148 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1149 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1150 					break;
1151 				case 1:
1152 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1153 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1154 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1155 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1156 					break;
1157 				case 2:
1158 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1159 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1160 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1161 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1162 					break;
1163 				case 3:
1164 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1165 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1166 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1167 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1168 					break;
1169 				case 4:
1170 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1171 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1172 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1173 							 TILE_SPLIT(split_equal_to_row_size));
1174 					break;
1175 				case 5:
1176 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1177 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1178 					break;
1179 				case 6:
1180 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1181 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1182 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1183 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1184 					break;
1185 				case 7:
1186 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1187 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1188 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1189 							 TILE_SPLIT(split_equal_to_row_size));
1190 					break;
1191 				case 8:
1192 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1193 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1194 					break;
1195 				case 9:
1196 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1197 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1198 					break;
1199 				case 10:
1200 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1201 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1202 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1203 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1204 					break;
1205 				case 11:
1206 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1207 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1208 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1209 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1210 					break;
1211 				case 12:
1212 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1213 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1214 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1215 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1216 					break;
1217 				case 13:
1218 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1219 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1220 					break;
1221 				case 14:
1222 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1223 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1224 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1225 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1226 					break;
1227 				case 16:
1228 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1229 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1230 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1231 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1232 					break;
1233 				case 17:
1234 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1235 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1236 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1237 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1238 					break;
1239 				case 27:
1240 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1241 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1242 					break;
1243 				case 28:
1244 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1245 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1246 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1247 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1248 					break;
1249 				case 29:
1250 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1251 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1252 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1253 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1254 					break;
1255 				case 30:
1256 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1257 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1258 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1259 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1260 					break;
1261 				default:
1262 					gb_tile_moden = 0;
1263 					break;
1264 				}
1265 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1266 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1267 			}
1268 		} else if (num_rbs < 4) {
1269 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1270 				switch (reg_offset) {
1271 				case 0:
1272 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1273 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1274 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1275 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1276 					break;
1277 				case 1:
1278 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1279 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1280 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1281 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1282 					break;
1283 				case 2:
1284 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1285 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1286 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1287 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1288 					break;
1289 				case 3:
1290 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1291 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1292 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1293 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1294 					break;
1295 				case 4:
1296 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1297 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1298 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1299 							 TILE_SPLIT(split_equal_to_row_size));
1300 					break;
1301 				case 5:
1302 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1303 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1304 					break;
1305 				case 6:
1306 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1307 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1308 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1309 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1310 					break;
1311 				case 7:
1312 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1313 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1314 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1315 							 TILE_SPLIT(split_equal_to_row_size));
1316 					break;
1317 				case 8:
1318 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1319 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1320 					break;
1321 				case 9:
1322 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1323 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1324 					break;
1325 				case 10:
1326 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1327 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1328 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1329 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1330 					break;
1331 				case 11:
1332 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1333 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1334 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1335 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1336 					break;
1337 				case 12:
1338 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1339 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1340 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1341 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1342 					break;
1343 				case 13:
1344 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1345 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1346 					break;
1347 				case 14:
1348 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1349 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1350 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1351 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1352 					break;
1353 				case 16:
1354 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1355 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1356 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1357 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1358 					break;
1359 				case 17:
1360 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1361 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1362 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1363 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1364 					break;
1365 				case 27:
1366 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1367 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1368 					break;
1369 				case 28:
1370 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1371 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1372 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1373 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1374 					break;
1375 				case 29:
1376 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1377 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1378 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1379 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1380 					break;
1381 				case 30:
1382 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1383 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1384 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1385 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1386 					break;
1387 				default:
1388 					gb_tile_moden = 0;
1389 					break;
1390 				}
1391 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1392 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1393 			}
1394 		}
1395 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1396 			switch (reg_offset) {
1397 			case 0:
1398 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1399 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1400 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1401 						 NUM_BANKS(ADDR_SURF_16_BANK));
1402 				break;
1403 			case 1:
1404 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1405 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1406 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1407 						 NUM_BANKS(ADDR_SURF_16_BANK));
1408 				break;
1409 			case 2:
1410 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1411 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1412 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1413 						 NUM_BANKS(ADDR_SURF_16_BANK));
1414 				break;
1415 			case 3:
1416 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1417 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1418 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1419 						 NUM_BANKS(ADDR_SURF_16_BANK));
1420 				break;
1421 			case 4:
1422 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1423 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1424 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1425 						 NUM_BANKS(ADDR_SURF_16_BANK));
1426 				break;
1427 			case 5:
1428 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1429 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1430 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1431 						 NUM_BANKS(ADDR_SURF_8_BANK));
1432 				break;
1433 			case 6:
1434 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1435 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1436 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1437 						 NUM_BANKS(ADDR_SURF_4_BANK));
1438 				break;
1439 			case 8:
1440 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1441 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1442 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1443 						 NUM_BANKS(ADDR_SURF_16_BANK));
1444 				break;
1445 			case 9:
1446 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1447 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1448 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1449 						 NUM_BANKS(ADDR_SURF_16_BANK));
1450 				break;
1451 			case 10:
1452 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1453 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1454 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1455 						 NUM_BANKS(ADDR_SURF_16_BANK));
1456 				break;
1457 			case 11:
1458 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1459 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1460 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1461 						 NUM_BANKS(ADDR_SURF_16_BANK));
1462 				break;
1463 			case 12:
1464 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1465 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1466 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1467 						 NUM_BANKS(ADDR_SURF_16_BANK));
1468 				break;
1469 			case 13:
1470 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1471 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1472 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1473 						 NUM_BANKS(ADDR_SURF_8_BANK));
1474 				break;
1475 			case 14:
1476 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1477 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1478 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1479 						 NUM_BANKS(ADDR_SURF_4_BANK));
1480 				break;
1481 			default:
1482 				gb_tile_moden = 0;
1483 				break;
1484 			}
1485 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1486 		}
1487 	} else if (num_pipe_configs == 2) {
1488 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1489 			switch (reg_offset) {
1490 			case 0:
1491 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1492 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1493 						 PIPE_CONFIG(ADDR_SURF_P2) |
1494 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1495 				break;
1496 			case 1:
1497 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1498 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1499 						 PIPE_CONFIG(ADDR_SURF_P2) |
1500 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1501 				break;
1502 			case 2:
1503 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1504 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1505 						 PIPE_CONFIG(ADDR_SURF_P2) |
1506 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1507 				break;
1508 			case 3:
1509 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1510 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1511 						 PIPE_CONFIG(ADDR_SURF_P2) |
1512 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1513 				break;
1514 			case 4:
1515 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1516 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1517 						 PIPE_CONFIG(ADDR_SURF_P2) |
1518 						 TILE_SPLIT(split_equal_to_row_size));
1519 				break;
1520 			case 5:
1521 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1522 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1523 				break;
1524 			case 6:
1525 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1526 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1527 						 PIPE_CONFIG(ADDR_SURF_P2) |
1528 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1529 				break;
1530 			case 7:
1531 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1532 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1533 						 PIPE_CONFIG(ADDR_SURF_P2) |
1534 						 TILE_SPLIT(split_equal_to_row_size));
1535 				break;
1536 			case 8:
1537 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1538 				break;
1539 			case 9:
1540 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1541 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1542 				break;
1543 			case 10:
1544 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1545 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1546 						 PIPE_CONFIG(ADDR_SURF_P2) |
1547 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1548 				break;
1549 			case 11:
1550 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1551 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1552 						 PIPE_CONFIG(ADDR_SURF_P2) |
1553 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1554 				break;
1555 			case 12:
1556 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1557 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1558 						 PIPE_CONFIG(ADDR_SURF_P2) |
1559 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1560 				break;
1561 			case 13:
1562 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1563 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1564 				break;
1565 			case 14:
1566 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1567 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1568 						 PIPE_CONFIG(ADDR_SURF_P2) |
1569 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1570 				break;
1571 			case 16:
1572 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1573 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1574 						 PIPE_CONFIG(ADDR_SURF_P2) |
1575 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1576 				break;
1577 			case 17:
1578 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1579 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1580 						 PIPE_CONFIG(ADDR_SURF_P2) |
1581 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1582 				break;
1583 			case 27:
1584 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1585 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1586 				break;
1587 			case 28:
1588 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1589 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1590 						 PIPE_CONFIG(ADDR_SURF_P2) |
1591 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1592 				break;
1593 			case 29:
1594 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1595 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1596 						 PIPE_CONFIG(ADDR_SURF_P2) |
1597 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1598 				break;
1599 			case 30:
1600 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1601 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1602 						 PIPE_CONFIG(ADDR_SURF_P2) |
1603 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1604 				break;
1605 			default:
1606 				gb_tile_moden = 0;
1607 				break;
1608 			}
1609 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1610 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1611 		}
1612 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1613 			switch (reg_offset) {
1614 			case 0:
1615 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1616 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1617 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1618 						 NUM_BANKS(ADDR_SURF_16_BANK));
1619 				break;
1620 			case 1:
1621 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1622 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1623 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1624 						 NUM_BANKS(ADDR_SURF_16_BANK));
1625 				break;
1626 			case 2:
1627 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1628 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1629 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1630 						 NUM_BANKS(ADDR_SURF_16_BANK));
1631 				break;
1632 			case 3:
1633 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1634 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1635 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1636 						 NUM_BANKS(ADDR_SURF_16_BANK));
1637 				break;
1638 			case 4:
1639 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1640 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1641 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1642 						 NUM_BANKS(ADDR_SURF_16_BANK));
1643 				break;
1644 			case 5:
1645 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1646 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1647 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1648 						 NUM_BANKS(ADDR_SURF_16_BANK));
1649 				break;
1650 			case 6:
1651 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1652 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1653 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1654 						 NUM_BANKS(ADDR_SURF_8_BANK));
1655 				break;
1656 			case 8:
1657 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1658 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1659 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1660 						 NUM_BANKS(ADDR_SURF_16_BANK));
1661 				break;
1662 			case 9:
1663 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1664 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1665 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1666 						 NUM_BANKS(ADDR_SURF_16_BANK));
1667 				break;
1668 			case 10:
1669 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1670 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1671 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1672 						 NUM_BANKS(ADDR_SURF_16_BANK));
1673 				break;
1674 			case 11:
1675 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1676 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1677 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1678 						 NUM_BANKS(ADDR_SURF_16_BANK));
1679 				break;
1680 			case 12:
1681 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1682 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1683 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1684 						 NUM_BANKS(ADDR_SURF_16_BANK));
1685 				break;
1686 			case 13:
1687 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1688 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1689 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1690 						 NUM_BANKS(ADDR_SURF_16_BANK));
1691 				break;
1692 			case 14:
1693 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1694 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1695 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1696 						 NUM_BANKS(ADDR_SURF_8_BANK));
1697 				break;
1698 			default:
1699 				gb_tile_moden = 0;
1700 				break;
1701 			}
1702 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1703 		}
1704 	} else
1705 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1706 }
1707 
1708 /**
1709  * cik_select_se_sh - select which SE, SH to address
1710  *
1711  * @rdev: radeon_device pointer
1712  * @se_num: shader engine to address
1713  * @sh_num: sh block to address
1714  *
1715  * Select which SE, SH combinations to address. Certain
1716  * registers are instanced per SE or SH.  0xffffffff means
1717  * broadcast to all SEs or SHs (CIK).
1718  */
1719 static void cik_select_se_sh(struct radeon_device *rdev,
1720 			     u32 se_num, u32 sh_num)
1721 {
1722 	u32 data = INSTANCE_BROADCAST_WRITES;
1723 
1724 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1725 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1726 	else if (se_num == 0xffffffff)
1727 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1728 	else if (sh_num == 0xffffffff)
1729 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1730 	else
1731 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1732 	WREG32(GRBM_GFX_INDEX, data);
1733 }
1734 
1735 /**
1736  * cik_create_bitmask - create a bitmask
1737  *
1738  * @bit_width: length of the mask
1739  *
1740  * create a variable length bit mask (CIK).
1741  * Returns the bitmask.
1742  */
1743 static u32 cik_create_bitmask(u32 bit_width)
1744 {
1745 	u32 i, mask = 0;
1746 
1747 	for (i = 0; i < bit_width; i++) {
1748 		mask <<= 1;
1749 		mask |= 1;
1750 	}
1751 	return mask;
1752 }
1753 
1754 /**
1755  * cik_select_se_sh - select which SE, SH to address
1756  *
1757  * @rdev: radeon_device pointer
1758  * @max_rb_num: max RBs (render backends) for the asic
1759  * @se_num: number of SEs (shader engines) for the asic
1760  * @sh_per_se: number of SH blocks per SE for the asic
1761  *
1762  * Calculates the bitmask of disabled RBs (CIK).
1763  * Returns the disabled RB bitmask.
1764  */
1765 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1766 			      u32 max_rb_num, u32 se_num,
1767 			      u32 sh_per_se)
1768 {
1769 	u32 data, mask;
1770 
1771 	data = RREG32(CC_RB_BACKEND_DISABLE);
1772 	if (data & 1)
1773 		data &= BACKEND_DISABLE_MASK;
1774 	else
1775 		data = 0;
1776 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1777 
1778 	data >>= BACKEND_DISABLE_SHIFT;
1779 
1780 	mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1781 
1782 	return data & mask;
1783 }
1784 
1785 /**
1786  * cik_setup_rb - setup the RBs on the asic
1787  *
1788  * @rdev: radeon_device pointer
1789  * @se_num: number of SEs (shader engines) for the asic
1790  * @sh_per_se: number of SH blocks per SE for the asic
1791  * @max_rb_num: max RBs (render backends) for the asic
1792  *
1793  * Configures per-SE/SH RB registers (CIK).
1794  */
1795 static void cik_setup_rb(struct radeon_device *rdev,
1796 			 u32 se_num, u32 sh_per_se,
1797 			 u32 max_rb_num)
1798 {
1799 	int i, j;
1800 	u32 data, mask;
1801 	u32 disabled_rbs = 0;
1802 	u32 enabled_rbs = 0;
1803 
1804 	for (i = 0; i < se_num; i++) {
1805 		for (j = 0; j < sh_per_se; j++) {
1806 			cik_select_se_sh(rdev, i, j);
1807 			data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1808 			disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1809 		}
1810 	}
1811 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1812 
1813 	mask = 1;
1814 	for (i = 0; i < max_rb_num; i++) {
1815 		if (!(disabled_rbs & mask))
1816 			enabled_rbs |= mask;
1817 		mask <<= 1;
1818 	}
1819 
1820 	for (i = 0; i < se_num; i++) {
1821 		cik_select_se_sh(rdev, i, 0xffffffff);
1822 		data = 0;
1823 		for (j = 0; j < sh_per_se; j++) {
1824 			switch (enabled_rbs & 3) {
1825 			case 1:
1826 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1827 				break;
1828 			case 2:
1829 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1830 				break;
1831 			case 3:
1832 			default:
1833 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1834 				break;
1835 			}
1836 			enabled_rbs >>= 2;
1837 		}
1838 		WREG32(PA_SC_RASTER_CONFIG, data);
1839 	}
1840 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1841 }
1842 
1843 /**
1844  * cik_gpu_init - setup the 3D engine
1845  *
1846  * @rdev: radeon_device pointer
1847  *
1848  * Configures the 3D engine and tiling configuration
1849  * registers so that the 3D engine is usable.
1850  */
1851 static __unused void cik_gpu_init(struct radeon_device *rdev)
1852 {
1853 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1854 	u32 mc_shared_chmap, mc_arb_ramcfg;
1855 	u32 hdp_host_path_cntl;
1856 	u32 tmp;
1857 	int i, j;
1858 
1859 	switch (rdev->family) {
1860 	case CHIP_BONAIRE:
1861 		rdev->config.cik.max_shader_engines = 2;
1862 		rdev->config.cik.max_tile_pipes = 4;
1863 		rdev->config.cik.max_cu_per_sh = 7;
1864 		rdev->config.cik.max_sh_per_se = 1;
1865 		rdev->config.cik.max_backends_per_se = 2;
1866 		rdev->config.cik.max_texture_channel_caches = 4;
1867 		rdev->config.cik.max_gprs = 256;
1868 		rdev->config.cik.max_gs_threads = 32;
1869 		rdev->config.cik.max_hw_contexts = 8;
1870 
1871 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1872 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1873 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1874 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1875 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1876 		break;
1877 	case CHIP_KAVERI:
1878 		/* TODO */
1879 		break;
1880 	case CHIP_KABINI:
1881 	default:
1882 		rdev->config.cik.max_shader_engines = 1;
1883 		rdev->config.cik.max_tile_pipes = 2;
1884 		rdev->config.cik.max_cu_per_sh = 2;
1885 		rdev->config.cik.max_sh_per_se = 1;
1886 		rdev->config.cik.max_backends_per_se = 1;
1887 		rdev->config.cik.max_texture_channel_caches = 2;
1888 		rdev->config.cik.max_gprs = 256;
1889 		rdev->config.cik.max_gs_threads = 16;
1890 		rdev->config.cik.max_hw_contexts = 8;
1891 
1892 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1893 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1894 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1895 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1896 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1897 		break;
1898 	}
1899 
1900 	/* Initialize HDP */
1901 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1902 		WREG32((0x2c14 + j), 0x00000000);
1903 		WREG32((0x2c18 + j), 0x00000000);
1904 		WREG32((0x2c1c + j), 0x00000000);
1905 		WREG32((0x2c20 + j), 0x00000000);
1906 		WREG32((0x2c24 + j), 0x00000000);
1907 	}
1908 
1909 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1910 
1911 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1912 
1913 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1914 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1915 
1916 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1917 	rdev->config.cik.mem_max_burst_length_bytes = 256;
1918 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1919 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1920 	if (rdev->config.cik.mem_row_size_in_kb > 4)
1921 		rdev->config.cik.mem_row_size_in_kb = 4;
1922 	/* XXX use MC settings? */
1923 	rdev->config.cik.shader_engine_tile_size = 32;
1924 	rdev->config.cik.num_gpus = 1;
1925 	rdev->config.cik.multi_gpu_tile_size = 64;
1926 
1927 	/* fix up row size */
1928 	gb_addr_config &= ~ROW_SIZE_MASK;
1929 	switch (rdev->config.cik.mem_row_size_in_kb) {
1930 	case 1:
1931 	default:
1932 		gb_addr_config |= ROW_SIZE(0);
1933 		break;
1934 	case 2:
1935 		gb_addr_config |= ROW_SIZE(1);
1936 		break;
1937 	case 4:
1938 		gb_addr_config |= ROW_SIZE(2);
1939 		break;
1940 	}
1941 
1942 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
1943 	 * not have bank info, so create a custom tiling dword.
1944 	 * bits 3:0   num_pipes
1945 	 * bits 7:4   num_banks
1946 	 * bits 11:8  group_size
1947 	 * bits 15:12 row_size
1948 	 */
1949 	rdev->config.cik.tile_config = 0;
1950 	switch (rdev->config.cik.num_tile_pipes) {
1951 	case 1:
1952 		rdev->config.cik.tile_config |= (0 << 0);
1953 		break;
1954 	case 2:
1955 		rdev->config.cik.tile_config |= (1 << 0);
1956 		break;
1957 	case 4:
1958 		rdev->config.cik.tile_config |= (2 << 0);
1959 		break;
1960 	case 8:
1961 	default:
1962 		/* XXX what about 12? */
1963 		rdev->config.cik.tile_config |= (3 << 0);
1964 		break;
1965 	}
1966 	if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1967 		rdev->config.cik.tile_config |= 1 << 4;
1968 	else
1969 		rdev->config.cik.tile_config |= 0 << 4;
1970 	rdev->config.cik.tile_config |=
1971 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1972 	rdev->config.cik.tile_config |=
1973 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1974 
1975 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1976 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1977 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
1978 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1979 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1980 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1981 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1982 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1983 
1984 	cik_tiling_mode_table_init(rdev);
1985 
1986 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1987 		     rdev->config.cik.max_sh_per_se,
1988 		     rdev->config.cik.max_backends_per_se);
1989 
1990 	/* set HW defaults for 3D engine */
1991 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1992 
1993 	WREG32(SX_DEBUG_1, 0x20);
1994 
1995 	WREG32(TA_CNTL_AUX, 0x00010000);
1996 
1997 	tmp = RREG32(SPI_CONFIG_CNTL);
1998 	tmp |= 0x03000000;
1999 	WREG32(SPI_CONFIG_CNTL, tmp);
2000 
2001 	WREG32(SQ_CONFIG, 1);
2002 
2003 	WREG32(DB_DEBUG, 0);
2004 
2005 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2006 	tmp |= 0x00000400;
2007 	WREG32(DB_DEBUG2, tmp);
2008 
2009 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2010 	tmp |= 0x00020200;
2011 	WREG32(DB_DEBUG3, tmp);
2012 
2013 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2014 	tmp |= 0x00018208;
2015 	WREG32(CB_HW_CONTROL, tmp);
2016 
2017 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2018 
2019 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2020 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2021 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2022 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2023 
2024 	WREG32(VGT_NUM_INSTANCES, 1);
2025 
2026 	WREG32(CP_PERFMON_CNTL, 0);
2027 
2028 	WREG32(SQ_CONFIG, 0);
2029 
2030 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2031 					  FORCE_EOV_MAX_REZ_CNT(255)));
2032 
2033 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2034 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
2035 
2036 	WREG32(VGT_GS_VERTEX_REUSE, 16);
2037 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2038 
2039 	tmp = RREG32(HDP_MISC_CNTL);
2040 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2041 	WREG32(HDP_MISC_CNTL, tmp);
2042 
2043 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2044 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2045 
2046 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2047 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2048 
2049 	udelay(50);
2050 }
2051 
2052 /*
2053  * GPU scratch registers helpers function.
2054  */
2055 /**
2056  * cik_scratch_init - setup driver info for CP scratch regs
2057  *
2058  * @rdev: radeon_device pointer
2059  *
2060  * Set up the number and offset of the CP scratch registers.
2061  * NOTE: use of CP scratch registers is a legacy inferface and
2062  * is not used by default on newer asics (r6xx+).  On newer asics,
2063  * memory buffers are used for fences rather than scratch regs.
2064  */
2065 static __unused void cik_scratch_init(struct radeon_device *rdev)
2066 {
2067 	int i;
2068 
2069 	rdev->scratch.num_reg = 7;
2070 	rdev->scratch.reg_base = SCRATCH_REG0;
2071 	for (i = 0; i < rdev->scratch.num_reg; i++) {
2072 		rdev->scratch.free[i] = true;
2073 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2074 	}
2075 }
2076 
2077 /**
2078  * cik_ring_test - basic gfx ring test
2079  *
2080  * @rdev: radeon_device pointer
2081  * @ring: radeon_ring structure holding ring information
2082  *
2083  * Allocate a scratch register and write to it using the gfx ring (CIK).
2084  * Provides a basic gfx ring test to verify that the ring is working.
2085  * Used by cik_cp_gfx_resume();
2086  * Returns 0 on success, error on failure.
2087  */
2088 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2089 {
2090 	uint32_t scratch;
2091 	uint32_t tmp = 0;
2092 	unsigned i;
2093 	int r;
2094 
2095 	r = radeon_scratch_get(rdev, &scratch);
2096 	if (r) {
2097 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2098 		return r;
2099 	}
2100 	WREG32(scratch, 0xCAFEDEAD);
2101 	r = radeon_ring_lock(rdev, ring, 3);
2102 	if (r) {
2103 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2104 		radeon_scratch_free(rdev, scratch);
2105 		return r;
2106 	}
2107 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2108 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2109 	radeon_ring_write(ring, 0xDEADBEEF);
2110 	radeon_ring_unlock_commit(rdev, ring);
2111 
2112 	for (i = 0; i < rdev->usec_timeout; i++) {
2113 		tmp = RREG32(scratch);
2114 		if (tmp == 0xDEADBEEF)
2115 			break;
2116 		DRM_UDELAY(1);
2117 	}
2118 	if (i < rdev->usec_timeout) {
2119 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2120 	} else {
2121 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2122 			  ring->idx, scratch, tmp);
2123 		r = -EINVAL;
2124 	}
2125 	radeon_scratch_free(rdev, scratch);
2126 	return r;
2127 }
2128 
2129 /**
2130  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2131  *
2132  * @rdev: radeon_device pointer
2133  * @fence: radeon fence object
2134  *
2135  * Emits a fence sequnce number on the gfx ring and flushes
2136  * GPU caches.
2137  */
2138 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2139 			     struct radeon_fence *fence)
2140 {
2141 	struct radeon_ring *ring = &rdev->ring[fence->ring];
2142 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2143 
2144 	/* EVENT_WRITE_EOP - flush caches, send int */
2145 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2146 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2147 				 EOP_TC_ACTION_EN |
2148 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2149 				 EVENT_INDEX(5)));
2150 	radeon_ring_write(ring, addr & 0xfffffffc);
2151 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2152 	radeon_ring_write(ring, fence->seq);
2153 	radeon_ring_write(ring, 0);
2154 	/* HDP flush */
2155 	/* We should be using the new WAIT_REG_MEM special op packet here
2156 	 * but it causes the CP to hang
2157 	 */
2158 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2159 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2160 				 WRITE_DATA_DST_SEL(0)));
2161 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2162 	radeon_ring_write(ring, 0);
2163 	radeon_ring_write(ring, 0);
2164 }
2165 
2166 /**
2167  * cik_fence_compute_ring_emit - emit a fence on the compute ring
2168  *
2169  * @rdev: radeon_device pointer
2170  * @fence: radeon fence object
2171  *
2172  * Emits a fence sequnce number on the compute ring and flushes
2173  * GPU caches.
2174  */
2175 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2176 				 struct radeon_fence *fence)
2177 {
2178 	struct radeon_ring *ring = &rdev->ring[fence->ring];
2179 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2180 
2181 	/* RELEASE_MEM - flush caches, send int */
2182 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2183 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2184 				 EOP_TC_ACTION_EN |
2185 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2186 				 EVENT_INDEX(5)));
2187 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2188 	radeon_ring_write(ring, addr & 0xfffffffc);
2189 	radeon_ring_write(ring, upper_32_bits(addr));
2190 	radeon_ring_write(ring, fence->seq);
2191 	radeon_ring_write(ring, 0);
2192 	/* HDP flush */
2193 	/* We should be using the new WAIT_REG_MEM special op packet here
2194 	 * but it causes the CP to hang
2195 	 */
2196 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2197 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2198 				 WRITE_DATA_DST_SEL(0)));
2199 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2200 	radeon_ring_write(ring, 0);
2201 	radeon_ring_write(ring, 0);
2202 }
2203 
2204 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2205 			     struct radeon_ring *ring,
2206 			     struct radeon_semaphore *semaphore,
2207 			     bool emit_wait)
2208 {
2209 	uint64_t addr = semaphore->gpu_addr;
2210 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2211 
2212 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2213 	radeon_ring_write(ring, addr & 0xffffffff);
2214 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2215 }
2216 
2217 /*
2218  * IB stuff
2219  */
2220 /**
2221  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2222  *
2223  * @rdev: radeon_device pointer
2224  * @ib: radeon indirect buffer object
2225  *
2226  * Emits an DE (drawing engine) or CE (constant engine) IB
2227  * on the gfx ring.  IBs are usually generated by userspace
2228  * acceleration drivers and submitted to the kernel for
2229  * sheduling on the ring.  This function schedules the IB
2230  * on the gfx ring for execution by the GPU.
2231  */
2232 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2233 {
2234 	struct radeon_ring *ring = &rdev->ring[ib->ring];
2235 	u32 header, control = INDIRECT_BUFFER_VALID;
2236 
2237 	if (ib->is_const_ib) {
2238 		/* set switch buffer packet before const IB */
2239 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2240 		radeon_ring_write(ring, 0);
2241 
2242 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2243 	} else {
2244 		u32 next_rptr;
2245 		if (ring->rptr_save_reg) {
2246 			next_rptr = ring->wptr + 3 + 4;
2247 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2248 			radeon_ring_write(ring, ((ring->rptr_save_reg -
2249 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
2250 			radeon_ring_write(ring, next_rptr);
2251 		} else if (rdev->wb.enabled) {
2252 			next_rptr = ring->wptr + 5 + 4;
2253 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2254 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2255 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2256 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2257 			radeon_ring_write(ring, next_rptr);
2258 		}
2259 
2260 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2261 	}
2262 
2263 	control |= ib->length_dw |
2264 		(ib->vm ? (ib->vm->id << 24) : 0);
2265 
2266 	radeon_ring_write(ring, header);
2267 	radeon_ring_write(ring,
2268 #ifdef __BIG_ENDIAN
2269 			  (2 << 0) |
2270 #endif
2271 			  (ib->gpu_addr & 0xFFFFFFFC));
2272 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2273 	radeon_ring_write(ring, control);
2274 }
2275 
2276 /**
2277  * cik_ib_test - basic gfx ring IB test
2278  *
2279  * @rdev: radeon_device pointer
2280  * @ring: radeon_ring structure holding ring information
2281  *
2282  * Allocate an IB and execute it on the gfx ring (CIK).
2283  * Provides a basic gfx ring test to verify that IBs are working.
2284  * Returns 0 on success, error on failure.
2285  */
2286 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2287 {
2288 	struct radeon_ib ib;
2289 	uint32_t scratch;
2290 	uint32_t tmp = 0;
2291 	unsigned i;
2292 	int r;
2293 
2294 	r = radeon_scratch_get(rdev, &scratch);
2295 	if (r) {
2296 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2297 		return r;
2298 	}
2299 	WREG32(scratch, 0xCAFEDEAD);
2300 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2301 	if (r) {
2302 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2303 		return r;
2304 	}
2305 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2306 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2307 	ib.ptr[2] = 0xDEADBEEF;
2308 	ib.length_dw = 3;
2309 	r = radeon_ib_schedule(rdev, &ib, NULL);
2310 	if (r) {
2311 		radeon_scratch_free(rdev, scratch);
2312 		radeon_ib_free(rdev, &ib);
2313 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2314 		return r;
2315 	}
2316 	r = radeon_fence_wait(ib.fence, false);
2317 	if (r) {
2318 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2319 		return r;
2320 	}
2321 	for (i = 0; i < rdev->usec_timeout; i++) {
2322 		tmp = RREG32(scratch);
2323 		if (tmp == 0xDEADBEEF)
2324 			break;
2325 		DRM_UDELAY(1);
2326 	}
2327 	if (i < rdev->usec_timeout) {
2328 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2329 	} else {
2330 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2331 			  scratch, tmp);
2332 		r = -EINVAL;
2333 	}
2334 	radeon_scratch_free(rdev, scratch);
2335 	radeon_ib_free(rdev, &ib);
2336 	return r;
2337 }
2338 
2339 /*
2340  * CP.
2341  * On CIK, gfx and compute now have independant command processors.
2342  *
2343  * GFX
2344  * Gfx consists of a single ring and can process both gfx jobs and
2345  * compute jobs.  The gfx CP consists of three microengines (ME):
2346  * PFP - Pre-Fetch Parser
2347  * ME - Micro Engine
2348  * CE - Constant Engine
2349  * The PFP and ME make up what is considered the Drawing Engine (DE).
2350  * The CE is an asynchronous engine used for updating buffer desciptors
2351  * used by the DE so that they can be loaded into cache in parallel
2352  * while the DE is processing state update packets.
2353  *
2354  * Compute
2355  * The compute CP consists of two microengines (ME):
2356  * MEC1 - Compute MicroEngine 1
2357  * MEC2 - Compute MicroEngine 2
2358  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2359  * The queues are exposed to userspace and are programmed directly
2360  * by the compute runtime.
2361  */
2362 /**
2363  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2364  *
2365  * @rdev: radeon_device pointer
2366  * @enable: enable or disable the MEs
2367  *
2368  * Halts or unhalts the gfx MEs.
2369  */
2370 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2371 {
2372 	if (enable)
2373 		WREG32(CP_ME_CNTL, 0);
2374 	else {
2375 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2376 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2377 	}
2378 	udelay(50);
2379 }
2380 
2381 /**
2382  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2383  *
2384  * @rdev: radeon_device pointer
2385  *
2386  * Loads the gfx PFP, ME, and CE ucode.
2387  * Returns 0 for success, -EINVAL if the ucode is not available.
2388  */
2389 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2390 {
2391 	const __be32 *fw_data;
2392 	int i;
2393 
2394 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2395 		return -EINVAL;
2396 
2397 	cik_cp_gfx_enable(rdev, false);
2398 
2399 	/* PFP */
2400 	fw_data = (const __be32 *)rdev->pfp_fw->data;
2401 	WREG32(CP_PFP_UCODE_ADDR, 0);
2402 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2403 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2404 	WREG32(CP_PFP_UCODE_ADDR, 0);
2405 
2406 	/* CE */
2407 	fw_data = (const __be32 *)rdev->ce_fw->data;
2408 	WREG32(CP_CE_UCODE_ADDR, 0);
2409 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2410 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2411 	WREG32(CP_CE_UCODE_ADDR, 0);
2412 
2413 	/* ME */
2414 	fw_data = (const __be32 *)rdev->me_fw->data;
2415 	WREG32(CP_ME_RAM_WADDR, 0);
2416 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2417 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2418 	WREG32(CP_ME_RAM_WADDR, 0);
2419 
2420 	WREG32(CP_PFP_UCODE_ADDR, 0);
2421 	WREG32(CP_CE_UCODE_ADDR, 0);
2422 	WREG32(CP_ME_RAM_WADDR, 0);
2423 	WREG32(CP_ME_RAM_RADDR, 0);
2424 	return 0;
2425 }
2426 
2427 /**
2428  * cik_cp_gfx_start - start the gfx ring
2429  *
2430  * @rdev: radeon_device pointer
2431  *
2432  * Enables the ring and loads the clear state context and other
2433  * packets required to init the ring.
2434  * Returns 0 for success, error for failure.
2435  */
2436 static int cik_cp_gfx_start(struct radeon_device *rdev)
2437 {
2438 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2439 	int r, i;
2440 
2441 	/* init the CP */
2442 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2443 	WREG32(CP_ENDIAN_SWAP, 0);
2444 	WREG32(CP_DEVICE_ID, 1);
2445 
2446 	cik_cp_gfx_enable(rdev, true);
2447 
2448 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2449 	if (r) {
2450 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2451 		return r;
2452 	}
2453 
2454 	/* init the CE partitions.  CE only used for gfx on CIK */
2455 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2456 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2457 	radeon_ring_write(ring, 0xc000);
2458 	radeon_ring_write(ring, 0xc000);
2459 
2460 	/* setup clear context state */
2461 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2462 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2463 
2464 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2465 	radeon_ring_write(ring, 0x80000000);
2466 	radeon_ring_write(ring, 0x80000000);
2467 
2468 	for (i = 0; i < cik_default_size; i++)
2469 		radeon_ring_write(ring, cik_default_state[i]);
2470 
2471 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2472 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2473 
2474 	/* set clear context state */
2475 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2476 	radeon_ring_write(ring, 0);
2477 
2478 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2479 	radeon_ring_write(ring, 0x00000316);
2480 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2481 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2482 
2483 	radeon_ring_unlock_commit(rdev, ring);
2484 
2485 	return 0;
2486 }
2487 
2488 /**
2489  * cik_cp_gfx_fini - stop the gfx ring
2490  *
2491  * @rdev: radeon_device pointer
2492  *
2493  * Stop the gfx ring and tear down the driver ring
2494  * info.
2495  */
2496 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2497 {
2498 	cik_cp_gfx_enable(rdev, false);
2499 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2500 }
2501 
2502 /**
2503  * cik_cp_gfx_resume - setup the gfx ring buffer registers
2504  *
2505  * @rdev: radeon_device pointer
2506  *
2507  * Program the location and size of the gfx ring buffer
2508  * and test it to make sure it's working.
2509  * Returns 0 for success, error for failure.
2510  */
2511 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2512 {
2513 	struct radeon_ring *ring;
2514 	u32 tmp;
2515 	u32 rb_bufsz;
2516 	u64 rb_addr;
2517 	int r;
2518 
2519 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
2520 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2521 
2522 	/* Set the write pointer delay */
2523 	WREG32(CP_RB_WPTR_DELAY, 0);
2524 
2525 	/* set the RB to use vmid 0 */
2526 	WREG32(CP_RB_VMID, 0);
2527 
2528 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2529 
2530 	/* ring 0 - compute and gfx */
2531 	/* Set ring buffer size */
2532 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2533 	rb_bufsz = drm_order(ring->ring_size / 8);
2534 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2535 #ifdef __BIG_ENDIAN
2536 	tmp |= BUF_SWAP_32BIT;
2537 #endif
2538 	WREG32(CP_RB0_CNTL, tmp);
2539 
2540 	/* Initialize the ring buffer's read and write pointers */
2541 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2542 	ring->wptr = 0;
2543 	WREG32(CP_RB0_WPTR, ring->wptr);
2544 
2545 	/* set the wb address wether it's enabled or not */
2546 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2547 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2548 
2549 	/* scratch register shadowing is no longer supported */
2550 	WREG32(SCRATCH_UMSK, 0);
2551 
2552 	if (!rdev->wb.enabled)
2553 		tmp |= RB_NO_UPDATE;
2554 
2555 	mdelay(1);
2556 	WREG32(CP_RB0_CNTL, tmp);
2557 
2558 	rb_addr = ring->gpu_addr >> 8;
2559 	WREG32(CP_RB0_BASE, rb_addr);
2560 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2561 
2562 	ring->rptr = RREG32(CP_RB0_RPTR);
2563 
2564 	/* start the ring */
2565 	cik_cp_gfx_start(rdev);
2566 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2567 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2568 	if (r) {
2569 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2570 		return r;
2571 	}
2572 	return 0;
2573 }
2574 
2575 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2576 			      struct radeon_ring *ring)
2577 {
2578 	u32 rptr;
2579 
2580 
2581 
2582 	if (rdev->wb.enabled) {
2583 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2584 	} else {
2585 		spin_lock(&rdev->srbm_mutex);
2586 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2587 		rptr = RREG32(CP_HQD_PQ_RPTR);
2588 		cik_srbm_select(rdev, 0, 0, 0, 0);
2589 		spin_unlock(&rdev->srbm_mutex);
2590 	}
2591 	rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2592 
2593 	return rptr;
2594 }
2595 
2596 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2597 			      struct radeon_ring *ring)
2598 {
2599 	u32 wptr;
2600 
2601 	if (rdev->wb.enabled) {
2602 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2603 	} else {
2604 		spin_lock(&rdev->srbm_mutex);
2605 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2606 		wptr = RREG32(CP_HQD_PQ_WPTR);
2607 		cik_srbm_select(rdev, 0, 0, 0, 0);
2608 		spin_unlock(&rdev->srbm_mutex);
2609 	}
2610 	wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2611 
2612 	return wptr;
2613 }
2614 
2615 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2616 			       struct radeon_ring *ring)
2617 {
2618 	u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2619 
2620 	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2621 	WDOORBELL32(ring->doorbell_offset, wptr);
2622 }
2623 
2624 /**
2625  * cik_cp_compute_enable - enable/disable the compute CP MEs
2626  *
2627  * @rdev: radeon_device pointer
2628  * @enable: enable or disable the MEs
2629  *
2630  * Halts or unhalts the compute MEs.
2631  */
2632 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2633 {
2634 	if (enable)
2635 		WREG32(CP_MEC_CNTL, 0);
2636 	else
2637 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2638 	udelay(50);
2639 }
2640 
2641 /**
2642  * cik_cp_compute_load_microcode - load the compute CP ME ucode
2643  *
2644  * @rdev: radeon_device pointer
2645  *
2646  * Loads the compute MEC1&2 ucode.
2647  * Returns 0 for success, -EINVAL if the ucode is not available.
2648  */
2649 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2650 {
2651 	const __be32 *fw_data;
2652 	int i;
2653 
2654 	if (!rdev->mec_fw)
2655 		return -EINVAL;
2656 
2657 	cik_cp_compute_enable(rdev, false);
2658 
2659 	/* MEC1 */
2660 	fw_data = (const __be32 *)rdev->mec_fw->data;
2661 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2662 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2663 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2664 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2665 
2666 	if (rdev->family == CHIP_KAVERI) {
2667 		/* MEC2 */
2668 		fw_data = (const __be32 *)rdev->mec_fw->data;
2669 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2670 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2671 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2672 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2673 	}
2674 
2675 	return 0;
2676 }
2677 
2678 /**
2679  * cik_cp_compute_start - start the compute queues
2680  *
2681  * @rdev: radeon_device pointer
2682  *
2683  * Enable the compute queues.
2684  * Returns 0 for success, error for failure.
2685  */
2686 static int cik_cp_compute_start(struct radeon_device *rdev)
2687 {
2688 	cik_cp_compute_enable(rdev, true);
2689 
2690 	return 0;
2691 }
2692 
2693 /**
2694  * cik_cp_compute_fini - stop the compute queues
2695  *
2696  * @rdev: radeon_device pointer
2697  *
2698  * Stop the compute queues and tear down the driver queue
2699  * info.
2700  */
2701 static void cik_cp_compute_fini(struct radeon_device *rdev)
2702 {
2703 	int i, idx, r;
2704 
2705 	cik_cp_compute_enable(rdev, false);
2706 
2707 	for (i = 0; i < 2; i++) {
2708 		if (i == 0)
2709 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
2710 		else
2711 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
2712 
2713 		if (rdev->ring[idx].mqd_obj) {
2714 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2715 			if (unlikely(r != 0))
2716 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2717 
2718 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2719 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2720 
2721 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2722 			rdev->ring[idx].mqd_obj = NULL;
2723 		}
2724 	}
2725 }
2726 
2727 static void cik_mec_fini(struct radeon_device *rdev)
2728 {
2729 	int r;
2730 
2731 	if (rdev->mec.hpd_eop_obj) {
2732 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2733 		if (unlikely(r != 0))
2734 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2735 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2736 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2737 
2738 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2739 		rdev->mec.hpd_eop_obj = NULL;
2740 	}
2741 }
2742 
2743 #define MEC_HPD_SIZE 2048
2744 
2745 static int cik_mec_init(struct radeon_device *rdev)
2746 {
2747 	int r;
2748 	u32 *hpd;
2749 
2750 	/*
2751 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2752 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2753 	 */
2754 	if (rdev->family == CHIP_KAVERI)
2755 		rdev->mec.num_mec = 2;
2756 	else
2757 		rdev->mec.num_mec = 1;
2758 	rdev->mec.num_pipe = 4;
2759 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2760 
2761 	if (rdev->mec.hpd_eop_obj == NULL) {
2762 		r = radeon_bo_create(rdev,
2763 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2764 				     PAGE_SIZE, true,
2765 				     RADEON_GEM_DOMAIN_GTT, NULL,
2766 				     &rdev->mec.hpd_eop_obj);
2767 		if (r) {
2768 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2769 			return r;
2770 		}
2771 	}
2772 
2773 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2774 	if (unlikely(r != 0)) {
2775 		cik_mec_fini(rdev);
2776 		return r;
2777 	}
2778 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2779 			  &rdev->mec.hpd_eop_gpu_addr);
2780 	if (r) {
2781 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2782 		cik_mec_fini(rdev);
2783 		return r;
2784 	}
2785 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2786 	if (r) {
2787 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2788 		cik_mec_fini(rdev);
2789 		return r;
2790 	}
2791 
2792 	/* clear memory.  Not sure if this is required or not */
2793 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2794 
2795 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2796 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2797 
2798 	return 0;
2799 }
2800 
2801 struct hqd_registers
2802 {
2803 	u32 cp_mqd_base_addr;
2804 	u32 cp_mqd_base_addr_hi;
2805 	u32 cp_hqd_active;
2806 	u32 cp_hqd_vmid;
2807 	u32 cp_hqd_persistent_state;
2808 	u32 cp_hqd_pipe_priority;
2809 	u32 cp_hqd_queue_priority;
2810 	u32 cp_hqd_quantum;
2811 	u32 cp_hqd_pq_base;
2812 	u32 cp_hqd_pq_base_hi;
2813 	u32 cp_hqd_pq_rptr;
2814 	u32 cp_hqd_pq_rptr_report_addr;
2815 	u32 cp_hqd_pq_rptr_report_addr_hi;
2816 	u32 cp_hqd_pq_wptr_poll_addr;
2817 	u32 cp_hqd_pq_wptr_poll_addr_hi;
2818 	u32 cp_hqd_pq_doorbell_control;
2819 	u32 cp_hqd_pq_wptr;
2820 	u32 cp_hqd_pq_control;
2821 	u32 cp_hqd_ib_base_addr;
2822 	u32 cp_hqd_ib_base_addr_hi;
2823 	u32 cp_hqd_ib_rptr;
2824 	u32 cp_hqd_ib_control;
2825 	u32 cp_hqd_iq_timer;
2826 	u32 cp_hqd_iq_rptr;
2827 	u32 cp_hqd_dequeue_request;
2828 	u32 cp_hqd_dma_offload;
2829 	u32 cp_hqd_sema_cmd;
2830 	u32 cp_hqd_msg_type;
2831 	u32 cp_hqd_atomic0_preop_lo;
2832 	u32 cp_hqd_atomic0_preop_hi;
2833 	u32 cp_hqd_atomic1_preop_lo;
2834 	u32 cp_hqd_atomic1_preop_hi;
2835 	u32 cp_hqd_hq_scheduler0;
2836 	u32 cp_hqd_hq_scheduler1;
2837 	u32 cp_mqd_control;
2838 };
2839 
2840 struct bonaire_mqd
2841 {
2842 	u32 header;
2843 	u32 dispatch_initiator;
2844 	u32 dimensions[3];
2845 	u32 start_idx[3];
2846 	u32 num_threads[3];
2847 	u32 pipeline_stat_enable;
2848 	u32 perf_counter_enable;
2849 	u32 pgm[2];
2850 	u32 tba[2];
2851 	u32 tma[2];
2852 	u32 pgm_rsrc[2];
2853 	u32 vmid;
2854 	u32 resource_limits;
2855 	u32 static_thread_mgmt01[2];
2856 	u32 tmp_ring_size;
2857 	u32 static_thread_mgmt23[2];
2858 	u32 restart[3];
2859 	u32 thread_trace_enable;
2860 	u32 reserved1;
2861 	u32 user_data[16];
2862 	u32 vgtcs_invoke_count[2];
2863 	struct hqd_registers queue_state;
2864 	u32 dequeue_cntr;
2865 	u32 interrupt_queue[64];
2866 };
2867 
2868 /**
2869  * cik_cp_compute_resume - setup the compute queue registers
2870  *
2871  * @rdev: radeon_device pointer
2872  *
2873  * Program the compute queues and test them to make sure they
2874  * are working.
2875  * Returns 0 for success, error for failure.
2876  */
2877 static int cik_cp_compute_resume(struct radeon_device *rdev)
2878 {
2879 	int r, i, idx;
2880 	u32 tmp;
2881 	bool use_doorbell = true;
2882 	u64 hqd_gpu_addr;
2883 	u64 mqd_gpu_addr;
2884 	u64 eop_gpu_addr;
2885 	u64 wb_gpu_addr;
2886 	u32 *buf;
2887 	struct bonaire_mqd *mqd;
2888 
2889 	r = cik_cp_compute_start(rdev);
2890 	if (r)
2891 		return r;
2892 
2893 	/* fix up chicken bits */
2894 	tmp = RREG32(CP_CPF_DEBUG);
2895 	tmp |= (1 << 23);
2896 	WREG32(CP_CPF_DEBUG, tmp);
2897 
2898 	/* init the pipes */
2899 	spin_lock(&rdev->srbm_mutex);
2900 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2901 		int me = (i < 4) ? 1 : 2;
2902 		int pipe = (i < 4) ? i : (i - 4);
2903 
2904 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2905 
2906 		cik_srbm_select(rdev, me, pipe, 0, 0);
2907 
2908 		/* write the EOP addr */
2909 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2910 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2911 
2912 		/* set the VMID assigned */
2913 		WREG32(CP_HPD_EOP_VMID, 0);
2914 
2915 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2916 		tmp = RREG32(CP_HPD_EOP_CONTROL);
2917 		tmp &= ~EOP_SIZE_MASK;
2918 		tmp |= drm_order(MEC_HPD_SIZE / 8);
2919 		WREG32(CP_HPD_EOP_CONTROL, tmp);
2920 	}
2921 	cik_srbm_select(rdev, 0, 0, 0, 0);
2922 	spin_unlock(&rdev->srbm_mutex);
2923 
2924 	/* init the queues.  Just two for now. */
2925 	for (i = 0; i < 2; i++) {
2926 		if (i == 0)
2927 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
2928 		else
2929 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
2930 
2931 		if (rdev->ring[idx].mqd_obj == NULL) {
2932 			r = radeon_bo_create(rdev,
2933 					     sizeof(struct bonaire_mqd),
2934 					     PAGE_SIZE, true,
2935 					     RADEON_GEM_DOMAIN_GTT, NULL,
2936 					     &rdev->ring[idx].mqd_obj);
2937 			if (r) {
2938 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2939 				return r;
2940 			}
2941 		}
2942 
2943 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2944 		if (unlikely(r != 0)) {
2945 			cik_cp_compute_fini(rdev);
2946 			return r;
2947 		}
2948 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2949 				  &mqd_gpu_addr);
2950 		if (r) {
2951 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2952 			cik_cp_compute_fini(rdev);
2953 			return r;
2954 		}
2955 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2956 		if (r) {
2957 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2958 			cik_cp_compute_fini(rdev);
2959 			return r;
2960 		}
2961 
2962 		/* doorbell offset */
2963 		rdev->ring[idx].doorbell_offset =
2964 			(rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2965 
2966 		/* init the mqd struct */
2967 		memset(buf, 0, sizeof(struct bonaire_mqd));
2968 
2969 		mqd = (struct bonaire_mqd *)buf;
2970 		mqd->header = 0xC0310800;
2971 		mqd->static_thread_mgmt01[0] = 0xffffffff;
2972 		mqd->static_thread_mgmt01[1] = 0xffffffff;
2973 		mqd->static_thread_mgmt23[0] = 0xffffffff;
2974 		mqd->static_thread_mgmt23[1] = 0xffffffff;
2975 
2976 		spin_lock(&rdev->srbm_mutex);
2977 		cik_srbm_select(rdev, rdev->ring[idx].me,
2978 				rdev->ring[idx].pipe,
2979 				rdev->ring[idx].queue, 0);
2980 
2981 		/* disable wptr polling */
2982 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2983 		tmp &= ~WPTR_POLL_EN;
2984 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2985 
2986 		/* enable doorbell? */
2987 		mqd->queue_state.cp_hqd_pq_doorbell_control =
2988 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2989 		if (use_doorbell)
2990 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2991 		else
2992 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2993 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2994 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
2995 
2996 		/* disable the queue if it's active */
2997 		mqd->queue_state.cp_hqd_dequeue_request = 0;
2998 		mqd->queue_state.cp_hqd_pq_rptr = 0;
2999 		mqd->queue_state.cp_hqd_pq_wptr= 0;
3000 		if (RREG32(CP_HQD_ACTIVE) & 1) {
3001 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3002 			for (i = 0; i < rdev->usec_timeout; i++) {
3003 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
3004 					break;
3005 				udelay(1);
3006 			}
3007 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3008 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3009 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3010 		}
3011 
3012 		/* set the pointer to the MQD */
3013 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3014 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3015 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3016 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3017 		/* set MQD vmid to 0 */
3018 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3019 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3020 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3021 
3022 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3023 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3024 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3025 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3026 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3027 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3028 
3029 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3030 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3031 		mqd->queue_state.cp_hqd_pq_control &=
3032 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3033 
3034 		mqd->queue_state.cp_hqd_pq_control |=
3035 			drm_order(rdev->ring[idx].ring_size / 8);
3036 		mqd->queue_state.cp_hqd_pq_control |=
3037 			(drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3038 #ifdef __BIG_ENDIAN
3039 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3040 #endif
3041 		mqd->queue_state.cp_hqd_pq_control &=
3042 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3043 		mqd->queue_state.cp_hqd_pq_control |=
3044 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3045 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3046 
3047 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3048 		if (i == 0)
3049 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3050 		else
3051 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3052 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3053 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3054 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3055 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3056 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3057 
3058 		/* set the wb address wether it's enabled or not */
3059 		if (i == 0)
3060 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3061 		else
3062 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3063 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3064 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3065 			upper_32_bits(wb_gpu_addr) & 0xffff;
3066 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3067 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3068 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3069 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3070 
3071 		/* enable the doorbell if requested */
3072 		if (use_doorbell) {
3073 			mqd->queue_state.cp_hqd_pq_doorbell_control =
3074 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3075 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3076 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
3077 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3078 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3079 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
3080 				~(DOORBELL_SOURCE | DOORBELL_HIT);
3081 
3082 		} else {
3083 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3084 		}
3085 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3086 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3087 
3088 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3089 		rdev->ring[idx].wptr = 0;
3090 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3091 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3092 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3093 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3094 
3095 		/* set the vmid for the queue */
3096 		mqd->queue_state.cp_hqd_vmid = 0;
3097 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3098 
3099 		/* activate the queue */
3100 		mqd->queue_state.cp_hqd_active = 1;
3101 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3102 
3103 		cik_srbm_select(rdev, 0, 0, 0, 0);
3104 		spin_unlock(&rdev->srbm_mutex);
3105 
3106 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3107 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3108 
3109 		rdev->ring[idx].ready = true;
3110 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3111 		if (r)
3112 			rdev->ring[idx].ready = false;
3113 	}
3114 
3115 	return 0;
3116 }
3117 
3118 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3119 {
3120 	cik_cp_gfx_enable(rdev, enable);
3121 	cik_cp_compute_enable(rdev, enable);
3122 }
3123 
3124 static int cik_cp_load_microcode(struct radeon_device *rdev)
3125 {
3126 	int r;
3127 
3128 	r = cik_cp_gfx_load_microcode(rdev);
3129 	if (r)
3130 		return r;
3131 	r = cik_cp_compute_load_microcode(rdev);
3132 	if (r)
3133 		return r;
3134 
3135 	return 0;
3136 }
3137 
3138 static void cik_cp_fini(struct radeon_device *rdev)
3139 {
3140 	cik_cp_gfx_fini(rdev);
3141 	cik_cp_compute_fini(rdev);
3142 }
3143 
3144 static int cik_cp_resume(struct radeon_device *rdev)
3145 {
3146 	int r;
3147 
3148 	/* Reset all cp blocks */
3149 	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3150 	RREG32(GRBM_SOFT_RESET);
3151 	mdelay(15);
3152 	WREG32(GRBM_SOFT_RESET, 0);
3153 	RREG32(GRBM_SOFT_RESET);
3154 
3155 	r = cik_cp_load_microcode(rdev);
3156 	if (r)
3157 		return r;
3158 
3159 	r = cik_cp_gfx_resume(rdev);
3160 	if (r)
3161 		return r;
3162 	r = cik_cp_compute_resume(rdev);
3163 	if (r)
3164 		return r;
3165 
3166 	return 0;
3167 }
3168 
3169 /*
3170  * sDMA - System DMA
3171  * Starting with CIK, the GPU has new asynchronous
3172  * DMA engines.  These engines are used for compute
3173  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
3174  * and each one supports 1 ring buffer used for gfx
3175  * and 2 queues used for compute.
3176  *
3177  * The programming model is very similar to the CP
3178  * (ring buffer, IBs, etc.), but sDMA has it's own
3179  * packet format that is different from the PM4 format
3180  * used by the CP. sDMA supports copying data, writing
3181  * embedded data, solid fills, and a number of other
3182  * things.  It also has support for tiling/detiling of
3183  * buffers.
3184  */
3185 /**
3186  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3187  *
3188  * @rdev: radeon_device pointer
3189  * @ib: IB object to schedule
3190  *
3191  * Schedule an IB in the DMA ring (CIK).
3192  */
3193 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3194 			      struct radeon_ib *ib)
3195 {
3196 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3197 	u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3198 
3199 	if (rdev->wb.enabled) {
3200 		u32 next_rptr = ring->wptr + 5;
3201 		while ((next_rptr & 7) != 4)
3202 			next_rptr++;
3203 		next_rptr += 4;
3204 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3205 		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3206 		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3207 		radeon_ring_write(ring, 1); /* number of DWs to follow */
3208 		radeon_ring_write(ring, next_rptr);
3209 	}
3210 
3211 	/* IB packet must end on a 8 DW boundary */
3212 	while ((ring->wptr & 7) != 4)
3213 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3214 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3215 	radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3216 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3217 	radeon_ring_write(ring, ib->length_dw);
3218 
3219 }
3220 
3221 /**
3222  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3223  *
3224  * @rdev: radeon_device pointer
3225  * @fence: radeon fence object
3226  *
3227  * Add a DMA fence packet to the ring to write
3228  * the fence seq number and DMA trap packet to generate
3229  * an interrupt if needed (CIK).
3230  */
3231 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3232 			      struct radeon_fence *fence)
3233 {
3234 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3235 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3236 	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3237 			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3238 	u32 ref_and_mask;
3239 
3240 	if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3241 		ref_and_mask = SDMA0;
3242 	else
3243 		ref_and_mask = SDMA1;
3244 
3245 	/* write the fence */
3246 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3247 	radeon_ring_write(ring, addr & 0xffffffff);
3248 	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3249 	radeon_ring_write(ring, fence->seq);
3250 	/* generate an interrupt */
3251 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3252 	/* flush HDP */
3253 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3254 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3255 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3256 	radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3257 	radeon_ring_write(ring, ref_and_mask); /* MASK */
3258 	radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3259 }
3260 
3261 /**
3262  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3263  *
3264  * @rdev: radeon_device pointer
3265  * @ring: radeon_ring structure holding ring information
3266  * @semaphore: radeon semaphore object
3267  * @emit_wait: wait or signal semaphore
3268  *
3269  * Add a DMA semaphore packet to the ring wait on or signal
3270  * other rings (CIK).
3271  */
3272 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3273 				  struct radeon_ring *ring,
3274 				  struct radeon_semaphore *semaphore,
3275 				  bool emit_wait)
3276 {
3277 	u64 addr = semaphore->gpu_addr;
3278 	u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3279 
3280 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3281 	radeon_ring_write(ring, addr & 0xfffffff8);
3282 	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3283 }
3284 
3285 /**
3286  * cik_sdma_gfx_stop - stop the gfx async dma engines
3287  *
3288  * @rdev: radeon_device pointer
3289  *
3290  * Stop the gfx async dma ring buffers (CIK).
3291  */
3292 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3293 {
3294 	u32 rb_cntl, reg_offset;
3295 	int i;
3296 
3297 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3298 
3299 	for (i = 0; i < 2; i++) {
3300 		if (i == 0)
3301 			reg_offset = SDMA0_REGISTER_OFFSET;
3302 		else
3303 			reg_offset = SDMA1_REGISTER_OFFSET;
3304 		rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3305 		rb_cntl &= ~SDMA_RB_ENABLE;
3306 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3307 		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3308 	}
3309 }
3310 
3311 /**
3312  * cik_sdma_rlc_stop - stop the compute async dma engines
3313  *
3314  * @rdev: radeon_device pointer
3315  *
3316  * Stop the compute async dma queues (CIK).
3317  */
3318 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3319 {
3320 	/* XXX todo */
3321 }
3322 
3323 /**
3324  * cik_sdma_enable - stop the async dma engines
3325  *
3326  * @rdev: radeon_device pointer
3327  * @enable: enable/disable the DMA MEs.
3328  *
3329  * Halt or unhalt the async dma engines (CIK).
3330  */
3331 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3332 {
3333 	u32 me_cntl, reg_offset;
3334 	int i;
3335 
3336 	for (i = 0; i < 2; i++) {
3337 		if (i == 0)
3338 			reg_offset = SDMA0_REGISTER_OFFSET;
3339 		else
3340 			reg_offset = SDMA1_REGISTER_OFFSET;
3341 		me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3342 		if (enable)
3343 			me_cntl &= ~SDMA_HALT;
3344 		else
3345 			me_cntl |= SDMA_HALT;
3346 		WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3347 	}
3348 }
3349 
3350 /**
3351  * cik_sdma_gfx_resume - setup and start the async dma engines
3352  *
3353  * @rdev: radeon_device pointer
3354  *
3355  * Set up the gfx DMA ring buffers and enable them (CIK).
3356  * Returns 0 for success, error for failure.
3357  */
3358 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3359 {
3360 	struct radeon_ring *ring;
3361 	u32 rb_cntl, ib_cntl;
3362 	u32 rb_bufsz;
3363 	u32 reg_offset, wb_offset;
3364 	int i, r;
3365 
3366 	for (i = 0; i < 2; i++) {
3367 		if (i == 0) {
3368 			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3369 			reg_offset = SDMA0_REGISTER_OFFSET;
3370 			wb_offset = R600_WB_DMA_RPTR_OFFSET;
3371 		} else {
3372 			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3373 			reg_offset = SDMA1_REGISTER_OFFSET;
3374 			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3375 		}
3376 
3377 		WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3378 		WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3379 
3380 		/* Set ring buffer size in dwords */
3381 		rb_bufsz = drm_order(ring->ring_size / 4);
3382 		rb_cntl = rb_bufsz << 1;
3383 #ifdef __BIG_ENDIAN
3384 		rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3385 #endif
3386 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3387 
3388 		/* Initialize the ring buffer's read and write pointers */
3389 		WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3390 		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3391 
3392 		/* set the wb address whether it's enabled or not */
3393 		WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3394 		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3395 		WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3396 		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3397 
3398 		if (rdev->wb.enabled)
3399 			rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3400 
3401 		WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3402 		WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3403 
3404 		ring->wptr = 0;
3405 		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3406 
3407 		ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3408 
3409 		/* enable DMA RB */
3410 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3411 
3412 		ib_cntl = SDMA_IB_ENABLE;
3413 #ifdef __BIG_ENDIAN
3414 		ib_cntl |= SDMA_IB_SWAP_ENABLE;
3415 #endif
3416 		/* enable DMA IBs */
3417 		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3418 
3419 		ring->ready = true;
3420 
3421 		r = radeon_ring_test(rdev, ring->idx, ring);
3422 		if (r) {
3423 			ring->ready = false;
3424 			return r;
3425 		}
3426 	}
3427 
3428 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3429 
3430 	return 0;
3431 }
3432 
3433 /**
3434  * cik_sdma_rlc_resume - setup and start the async dma engines
3435  *
3436  * @rdev: radeon_device pointer
3437  *
3438  * Set up the compute DMA queues and enable them (CIK).
3439  * Returns 0 for success, error for failure.
3440  */
3441 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3442 {
3443 	/* XXX todo */
3444 	return 0;
3445 }
3446 
3447 /**
3448  * cik_sdma_load_microcode - load the sDMA ME ucode
3449  *
3450  * @rdev: radeon_device pointer
3451  *
3452  * Loads the sDMA0/1 ucode.
3453  * Returns 0 for success, -EINVAL if the ucode is not available.
3454  */
3455 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3456 {
3457 	const __be32 *fw_data;
3458 	int i;
3459 
3460 	if (!rdev->sdma_fw)
3461 		return -EINVAL;
3462 
3463 	/* stop the gfx rings and rlc compute queues */
3464 	cik_sdma_gfx_stop(rdev);
3465 	cik_sdma_rlc_stop(rdev);
3466 
3467 	/* halt the MEs */
3468 	cik_sdma_enable(rdev, false);
3469 
3470 	/* sdma0 */
3471 	fw_data = (const __be32 *)rdev->sdma_fw->data;
3472 	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3473 	for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3474 		WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3475 	WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3476 
3477 	/* sdma1 */
3478 	fw_data = (const __be32 *)rdev->sdma_fw->data;
3479 	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3480 	for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3481 		WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3482 	WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3483 
3484 	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3485 	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3486 	return 0;
3487 }
3488 
3489 /**
3490  * cik_sdma_resume - setup and start the async dma engines
3491  *
3492  * @rdev: radeon_device pointer
3493  *
3494  * Set up the DMA engines and enable them (CIK).
3495  * Returns 0 for success, error for failure.
3496  */
3497 static __unused int cik_sdma_resume(struct radeon_device *rdev)
3498 {
3499 	int r;
3500 
3501 	/* Reset dma */
3502 	WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3503 	RREG32(SRBM_SOFT_RESET);
3504 	udelay(50);
3505 	WREG32(SRBM_SOFT_RESET, 0);
3506 	RREG32(SRBM_SOFT_RESET);
3507 
3508 	r = cik_sdma_load_microcode(rdev);
3509 	if (r)
3510 		return r;
3511 
3512 	/* unhalt the MEs */
3513 	cik_sdma_enable(rdev, true);
3514 
3515 	/* start the gfx rings and rlc compute queues */
3516 	r = cik_sdma_gfx_resume(rdev);
3517 	if (r)
3518 		return r;
3519 	r = cik_sdma_rlc_resume(rdev);
3520 	if (r)
3521 		return r;
3522 
3523 	return 0;
3524 }
3525 
3526 /**
3527  * cik_sdma_fini - tear down the async dma engines
3528  *
3529  * @rdev: radeon_device pointer
3530  *
3531  * Stop the async dma engines and free the rings (CIK).
3532  */
3533 static __unused void cik_sdma_fini(struct radeon_device *rdev)
3534 {
3535 	/* stop the gfx rings and rlc compute queues */
3536 	cik_sdma_gfx_stop(rdev);
3537 	cik_sdma_rlc_stop(rdev);
3538 	/* halt the MEs */
3539 	cik_sdma_enable(rdev, false);
3540 	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3541 	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3542 	/* XXX - compute dma queue tear down */
3543 }
3544 
3545 /**
3546  * cik_copy_dma - copy pages using the DMA engine
3547  *
3548  * @rdev: radeon_device pointer
3549  * @src_offset: src GPU address
3550  * @dst_offset: dst GPU address
3551  * @num_gpu_pages: number of GPU pages to xfer
3552  * @fence: radeon fence object
3553  *
3554  * Copy GPU paging using the DMA engine (CIK).
3555  * Used by the radeon ttm implementation to move pages if
3556  * registered as the asic copy callback.
3557  */
3558 int cik_copy_dma(struct radeon_device *rdev,
3559 		 uint64_t src_offset, uint64_t dst_offset,
3560 		 unsigned num_gpu_pages,
3561 		 struct radeon_fence **fence)
3562 {
3563 	struct radeon_semaphore *sem = NULL;
3564 	int ring_index = rdev->asic->copy.dma_ring_index;
3565 	struct radeon_ring *ring = &rdev->ring[ring_index];
3566 	u32 size_in_bytes, cur_size_in_bytes;
3567 	int i, num_loops;
3568 	int r = 0;
3569 
3570 	r = radeon_semaphore_create(rdev, &sem);
3571 	if (r) {
3572 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3573 		return r;
3574 	}
3575 
3576 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3577 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3578 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3579 	if (r) {
3580 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3581 		radeon_semaphore_free(rdev, &sem, NULL);
3582 		return r;
3583 	}
3584 
3585 	if (radeon_fence_need_sync(*fence, ring->idx)) {
3586 		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3587 					    ring->idx);
3588 		radeon_fence_note_sync(*fence, ring->idx);
3589 	} else {
3590 		radeon_semaphore_free(rdev, &sem, NULL);
3591 	}
3592 
3593 	for (i = 0; i < num_loops; i++) {
3594 		cur_size_in_bytes = size_in_bytes;
3595 		if (cur_size_in_bytes > 0x1fffff)
3596 			cur_size_in_bytes = 0x1fffff;
3597 		size_in_bytes -= cur_size_in_bytes;
3598 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3599 		radeon_ring_write(ring, cur_size_in_bytes);
3600 		radeon_ring_write(ring, 0); /* src/dst endian swap */
3601 		radeon_ring_write(ring, src_offset & 0xffffffff);
3602 		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3603 		radeon_ring_write(ring, dst_offset & 0xfffffffc);
3604 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3605 		src_offset += cur_size_in_bytes;
3606 		dst_offset += cur_size_in_bytes;
3607 	}
3608 
3609 	r = radeon_fence_emit(rdev, fence, ring->idx);
3610 	if (r) {
3611 		radeon_ring_unlock_undo(rdev, ring);
3612 		return r;
3613 	}
3614 
3615 	radeon_ring_unlock_commit(rdev, ring);
3616 	radeon_semaphore_free(rdev, &sem, *fence);
3617 
3618 	return r;
3619 }
3620 
3621 /**
3622  * cik_sdma_ring_test - simple async dma engine test
3623  *
3624  * @rdev: radeon_device pointer
3625  * @ring: radeon_ring structure holding ring information
3626  *
3627  * Test the DMA engine by writing using it to write an
3628  * value to memory. (CIK).
3629  * Returns 0 for success, error for failure.
3630  */
3631 int cik_sdma_ring_test(struct radeon_device *rdev,
3632 		       struct radeon_ring *ring)
3633 {
3634 	unsigned i;
3635 	int r;
3636 	volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr;
3637 	u32 tmp;
3638 
3639 	if (!ptr) {
3640 		DRM_ERROR("invalid vram scratch pointer\n");
3641 		return -EINVAL;
3642 	}
3643 
3644 	tmp = 0xCAFEDEAD;
3645 	writel(tmp, ptr);
3646 
3647 	r = radeon_ring_lock(rdev, ring, 4);
3648 	if (r) {
3649 		DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3650 		return r;
3651 	}
3652 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3653 	radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3654 	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3655 	radeon_ring_write(ring, 1); /* number of DWs to follow */
3656 	radeon_ring_write(ring, 0xDEADBEEF);
3657 	radeon_ring_unlock_commit(rdev, ring);
3658 
3659 	for (i = 0; i < rdev->usec_timeout; i++) {
3660 		tmp = readl(ptr);
3661 		if (tmp == 0xDEADBEEF)
3662 			break;
3663 		DRM_UDELAY(1);
3664 	}
3665 
3666 	if (i < rdev->usec_timeout) {
3667 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3668 	} else {
3669 		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3670 			  ring->idx, tmp);
3671 		r = -EINVAL;
3672 	}
3673 	return r;
3674 }
3675 
3676 /**
3677  * cik_sdma_ib_test - test an IB on the DMA engine
3678  *
3679  * @rdev: radeon_device pointer
3680  * @ring: radeon_ring structure holding ring information
3681  *
3682  * Test a simple IB in the DMA ring (CIK).
3683  * Returns 0 on success, error on failure.
3684  */
3685 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3686 {
3687 	struct radeon_ib ib;
3688 	unsigned i;
3689 	int r;
3690 	volatile void __iomem *ptr = (volatile void *)rdev->vram_scratch.ptr;
3691 	u32 tmp = 0;
3692 
3693 	if (!ptr) {
3694 		DRM_ERROR("invalid vram scratch pointer\n");
3695 		return -EINVAL;
3696 	}
3697 
3698 	tmp = 0xCAFEDEAD;
3699 	writel(tmp, ptr);
3700 
3701 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3702 	if (r) {
3703 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3704 		return r;
3705 	}
3706 
3707 	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3708 	ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3709 	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3710 	ib.ptr[3] = 1;
3711 	ib.ptr[4] = 0xDEADBEEF;
3712 	ib.length_dw = 5;
3713 
3714 	r = radeon_ib_schedule(rdev, &ib, NULL);
3715 	if (r) {
3716 		radeon_ib_free(rdev, &ib);
3717 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3718 		return r;
3719 	}
3720 	r = radeon_fence_wait(ib.fence, false);
3721 	if (r) {
3722 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3723 		return r;
3724 	}
3725 	for (i = 0; i < rdev->usec_timeout; i++) {
3726 		tmp = readl(ptr);
3727 		if (tmp == 0xDEADBEEF)
3728 			break;
3729 		DRM_UDELAY(1);
3730 	}
3731 	if (i < rdev->usec_timeout) {
3732 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3733 	} else {
3734 		DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3735 		r = -EINVAL;
3736 	}
3737 	radeon_ib_free(rdev, &ib);
3738 	return r;
3739 }
3740 
3741 
3742 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3743 {
3744 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
3745 		RREG32(GRBM_STATUS));
3746 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
3747 		RREG32(GRBM_STATUS2));
3748 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3749 		RREG32(GRBM_STATUS_SE0));
3750 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3751 		RREG32(GRBM_STATUS_SE1));
3752 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3753 		RREG32(GRBM_STATUS_SE2));
3754 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3755 		RREG32(GRBM_STATUS_SE3));
3756 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
3757 		RREG32(SRBM_STATUS));
3758 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
3759 		RREG32(SRBM_STATUS2));
3760 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
3761 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3762 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
3763 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3764 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3765 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3766 		 RREG32(CP_STALLED_STAT1));
3767 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3768 		 RREG32(CP_STALLED_STAT2));
3769 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3770 		 RREG32(CP_STALLED_STAT3));
3771 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3772 		 RREG32(CP_CPF_BUSY_STAT));
3773 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3774 		 RREG32(CP_CPF_STALLED_STAT1));
3775 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3776 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3777 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3778 		 RREG32(CP_CPC_STALLED_STAT1));
3779 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3780 }
3781 
3782 /**
3783  * cik_gpu_check_soft_reset - check which blocks are busy
3784  *
3785  * @rdev: radeon_device pointer
3786  *
3787  * Check which blocks are busy and return the relevant reset
3788  * mask to be used by cik_gpu_soft_reset().
3789  * Returns a mask of the blocks to be reset.
3790  */
3791 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3792 {
3793 	u32 reset_mask = 0;
3794 	u32 tmp;
3795 
3796 	/* GRBM_STATUS */
3797 	tmp = RREG32(GRBM_STATUS);
3798 	if (tmp & (PA_BUSY | SC_BUSY |
3799 		   BCI_BUSY | SX_BUSY |
3800 		   TA_BUSY | VGT_BUSY |
3801 		   DB_BUSY | CB_BUSY |
3802 		   GDS_BUSY | SPI_BUSY |
3803 		   IA_BUSY | IA_BUSY_NO_DMA))
3804 		reset_mask |= RADEON_RESET_GFX;
3805 
3806 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3807 		reset_mask |= RADEON_RESET_CP;
3808 
3809 	/* GRBM_STATUS2 */
3810 	tmp = RREG32(GRBM_STATUS2);
3811 	if (tmp & RLC_BUSY)
3812 		reset_mask |= RADEON_RESET_RLC;
3813 
3814 	/* SDMA0_STATUS_REG */
3815 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3816 	if (!(tmp & SDMA_IDLE))
3817 		reset_mask |= RADEON_RESET_DMA;
3818 
3819 	/* SDMA1_STATUS_REG */
3820 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3821 	if (!(tmp & SDMA_IDLE))
3822 		reset_mask |= RADEON_RESET_DMA1;
3823 
3824 	/* SRBM_STATUS2 */
3825 	tmp = RREG32(SRBM_STATUS2);
3826 	if (tmp & SDMA_BUSY)
3827 		reset_mask |= RADEON_RESET_DMA;
3828 
3829 	if (tmp & SDMA1_BUSY)
3830 		reset_mask |= RADEON_RESET_DMA1;
3831 
3832 	/* SRBM_STATUS */
3833 	tmp = RREG32(SRBM_STATUS);
3834 
3835 	if (tmp & IH_BUSY)
3836 		reset_mask |= RADEON_RESET_IH;
3837 
3838 	if (tmp & SEM_BUSY)
3839 		reset_mask |= RADEON_RESET_SEM;
3840 
3841 	if (tmp & GRBM_RQ_PENDING)
3842 		reset_mask |= RADEON_RESET_GRBM;
3843 
3844 	if (tmp & VMC_BUSY)
3845 		reset_mask |= RADEON_RESET_VMC;
3846 
3847 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3848 		   MCC_BUSY | MCD_BUSY))
3849 		reset_mask |= RADEON_RESET_MC;
3850 
3851 	if (evergreen_is_display_hung(rdev))
3852 		reset_mask |= RADEON_RESET_DISPLAY;
3853 
3854 	/* Skip MC reset as it's mostly likely not hung, just busy */
3855 	if (reset_mask & RADEON_RESET_MC) {
3856 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3857 		reset_mask &= ~RADEON_RESET_MC;
3858 	}
3859 
3860 	return reset_mask;
3861 }
3862 
3863 /**
3864  * cik_gpu_soft_reset - soft reset GPU
3865  *
3866  * @rdev: radeon_device pointer
3867  * @reset_mask: mask of which blocks to reset
3868  *
3869  * Soft reset the blocks specified in @reset_mask.
3870  */
3871 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3872 {
3873 	struct evergreen_mc_save save;
3874 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3875 	u32 tmp;
3876 
3877 	if (reset_mask == 0)
3878 		return;
3879 
3880 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3881 
3882 	cik_print_gpu_status_regs(rdev);
3883 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3884 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3885 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3886 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3887 
3888 	/* stop the rlc */
3889 	cik_rlc_stop(rdev);
3890 
3891 	/* Disable GFX parsing/prefetching */
3892 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3893 
3894 	/* Disable MEC parsing/prefetching */
3895 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3896 
3897 	if (reset_mask & RADEON_RESET_DMA) {
3898 		/* sdma0 */
3899 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3900 		tmp |= SDMA_HALT;
3901 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3902 	}
3903 	if (reset_mask & RADEON_RESET_DMA1) {
3904 		/* sdma1 */
3905 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3906 		tmp |= SDMA_HALT;
3907 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3908 	}
3909 
3910 	evergreen_mc_stop(rdev, &save);
3911 	if (evergreen_mc_wait_for_idle(rdev)) {
3912 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3913 	}
3914 
3915 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3916 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3917 
3918 	if (reset_mask & RADEON_RESET_CP) {
3919 		grbm_soft_reset |= SOFT_RESET_CP;
3920 
3921 		srbm_soft_reset |= SOFT_RESET_GRBM;
3922 	}
3923 
3924 	if (reset_mask & RADEON_RESET_DMA)
3925 		srbm_soft_reset |= SOFT_RESET_SDMA;
3926 
3927 	if (reset_mask & RADEON_RESET_DMA1)
3928 		srbm_soft_reset |= SOFT_RESET_SDMA1;
3929 
3930 	if (reset_mask & RADEON_RESET_DISPLAY)
3931 		srbm_soft_reset |= SOFT_RESET_DC;
3932 
3933 	if (reset_mask & RADEON_RESET_RLC)
3934 		grbm_soft_reset |= SOFT_RESET_RLC;
3935 
3936 	if (reset_mask & RADEON_RESET_SEM)
3937 		srbm_soft_reset |= SOFT_RESET_SEM;
3938 
3939 	if (reset_mask & RADEON_RESET_IH)
3940 		srbm_soft_reset |= SOFT_RESET_IH;
3941 
3942 	if (reset_mask & RADEON_RESET_GRBM)
3943 		srbm_soft_reset |= SOFT_RESET_GRBM;
3944 
3945 	if (reset_mask & RADEON_RESET_VMC)
3946 		srbm_soft_reset |= SOFT_RESET_VMC;
3947 
3948 	if (!(rdev->flags & RADEON_IS_IGP)) {
3949 		if (reset_mask & RADEON_RESET_MC)
3950 			srbm_soft_reset |= SOFT_RESET_MC;
3951 	}
3952 
3953 	if (grbm_soft_reset) {
3954 		tmp = RREG32(GRBM_SOFT_RESET);
3955 		tmp |= grbm_soft_reset;
3956 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3957 		WREG32(GRBM_SOFT_RESET, tmp);
3958 		tmp = RREG32(GRBM_SOFT_RESET);
3959 
3960 		udelay(50);
3961 
3962 		tmp &= ~grbm_soft_reset;
3963 		WREG32(GRBM_SOFT_RESET, tmp);
3964 		tmp = RREG32(GRBM_SOFT_RESET);
3965 	}
3966 
3967 	if (srbm_soft_reset) {
3968 		tmp = RREG32(SRBM_SOFT_RESET);
3969 		tmp |= srbm_soft_reset;
3970 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3971 		WREG32(SRBM_SOFT_RESET, tmp);
3972 		tmp = RREG32(SRBM_SOFT_RESET);
3973 
3974 		udelay(50);
3975 
3976 		tmp &= ~srbm_soft_reset;
3977 		WREG32(SRBM_SOFT_RESET, tmp);
3978 		tmp = RREG32(SRBM_SOFT_RESET);
3979 	}
3980 
3981 	/* Wait a little for things to settle down */
3982 	udelay(50);
3983 
3984 	evergreen_mc_resume(rdev, &save);
3985 	udelay(50);
3986 
3987 	cik_print_gpu_status_regs(rdev);
3988 }
3989 
3990 /**
3991  * cik_asic_reset - soft reset GPU
3992  *
3993  * @rdev: radeon_device pointer
3994  *
3995  * Look up which blocks are hung and attempt
3996  * to reset them.
3997  * Returns 0 for success.
3998  */
3999 int cik_asic_reset(struct radeon_device *rdev)
4000 {
4001 	u32 reset_mask;
4002 
4003 	reset_mask = cik_gpu_check_soft_reset(rdev);
4004 
4005 	if (reset_mask)
4006 		r600_set_bios_scratch_engine_hung(rdev, true);
4007 
4008 	cik_gpu_soft_reset(rdev, reset_mask);
4009 
4010 	reset_mask = cik_gpu_check_soft_reset(rdev);
4011 
4012 	if (!reset_mask)
4013 		r600_set_bios_scratch_engine_hung(rdev, false);
4014 
4015 	return 0;
4016 }
4017 
4018 /**
4019  * cik_gfx_is_lockup - check if the 3D engine is locked up
4020  *
4021  * @rdev: radeon_device pointer
4022  * @ring: radeon_ring structure holding ring information
4023  *
4024  * Check if the 3D engine is locked up (CIK).
4025  * Returns true if the engine is locked, false if not.
4026  */
4027 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4028 {
4029 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4030 
4031 	if (!(reset_mask & (RADEON_RESET_GFX |
4032 			    RADEON_RESET_COMPUTE |
4033 			    RADEON_RESET_CP))) {
4034 		radeon_ring_lockup_update(ring);
4035 		return false;
4036 	}
4037 	/* force CP activities */
4038 	radeon_ring_force_activity(rdev, ring);
4039 	return radeon_ring_test_lockup(rdev, ring);
4040 }
4041 
4042 /**
4043  * cik_sdma_is_lockup - Check if the DMA engine is locked up
4044  *
4045  * @rdev: radeon_device pointer
4046  * @ring: radeon_ring structure holding ring information
4047  *
4048  * Check if the async DMA engine is locked up (CIK).
4049  * Returns true if the engine appears to be locked up, false if not.
4050  */
4051 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4052 {
4053 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4054 	u32 mask;
4055 
4056 	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4057 		mask = RADEON_RESET_DMA;
4058 	else
4059 		mask = RADEON_RESET_DMA1;
4060 
4061 	if (!(reset_mask & mask)) {
4062 		radeon_ring_lockup_update(ring);
4063 		return false;
4064 	}
4065 	/* force ring activities */
4066 	radeon_ring_force_activity(rdev, ring);
4067 	return radeon_ring_test_lockup(rdev, ring);
4068 }
4069 
4070 /* MC */
4071 /**
4072  * cik_mc_program - program the GPU memory controller
4073  *
4074  * @rdev: radeon_device pointer
4075  *
4076  * Set the location of vram, gart, and AGP in the GPU's
4077  * physical address space (CIK).
4078  */
4079 static __unused void cik_mc_program(struct radeon_device *rdev)
4080 {
4081 	struct evergreen_mc_save save;
4082 	u32 tmp;
4083 	int i, j;
4084 
4085 	/* Initialize HDP */
4086 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4087 		WREG32((0x2c14 + j), 0x00000000);
4088 		WREG32((0x2c18 + j), 0x00000000);
4089 		WREG32((0x2c1c + j), 0x00000000);
4090 		WREG32((0x2c20 + j), 0x00000000);
4091 		WREG32((0x2c24 + j), 0x00000000);
4092 	}
4093 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4094 
4095 	evergreen_mc_stop(rdev, &save);
4096 	if (radeon_mc_wait_for_idle(rdev)) {
4097 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4098 	}
4099 	/* Lockout access through VGA aperture*/
4100 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4101 	/* Update configuration */
4102 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4103 	       rdev->mc.vram_start >> 12);
4104 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4105 	       rdev->mc.vram_end >> 12);
4106 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4107 	       rdev->vram_scratch.gpu_addr >> 12);
4108 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4109 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4110 	WREG32(MC_VM_FB_LOCATION, tmp);
4111 	/* XXX double check these! */
4112 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4113 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4114 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4115 	WREG32(MC_VM_AGP_BASE, 0);
4116 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4117 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4118 	if (radeon_mc_wait_for_idle(rdev)) {
4119 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4120 	}
4121 	evergreen_mc_resume(rdev, &save);
4122 	/* we need to own VRAM, so turn off the VGA renderer here
4123 	 * to stop it overwriting our objects */
4124 	rv515_vga_render_disable(rdev);
4125 }
4126 
4127 /**
4128  * cik_mc_init - initialize the memory controller driver params
4129  *
4130  * @rdev: radeon_device pointer
4131  *
4132  * Look up the amount of vram, vram width, and decide how to place
4133  * vram and gart within the GPU's physical address space (CIK).
4134  * Returns 0 for success.
4135  */
4136 static __unused int cik_mc_init(struct radeon_device *rdev)
4137 {
4138 	u32 tmp;
4139 	int chansize, numchan;
4140 
4141 	/* Get VRAM informations */
4142 	rdev->mc.vram_is_ddr = true;
4143 	tmp = RREG32(MC_ARB_RAMCFG);
4144 	if (tmp & CHANSIZE_MASK) {
4145 		chansize = 64;
4146 	} else {
4147 		chansize = 32;
4148 	}
4149 	tmp = RREG32(MC_SHARED_CHMAP);
4150 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4151 	case 0:
4152 	default:
4153 		numchan = 1;
4154 		break;
4155 	case 1:
4156 		numchan = 2;
4157 		break;
4158 	case 2:
4159 		numchan = 4;
4160 		break;
4161 	case 3:
4162 		numchan = 8;
4163 		break;
4164 	case 4:
4165 		numchan = 3;
4166 		break;
4167 	case 5:
4168 		numchan = 6;
4169 		break;
4170 	case 6:
4171 		numchan = 10;
4172 		break;
4173 	case 7:
4174 		numchan = 12;
4175 		break;
4176 	case 8:
4177 		numchan = 16;
4178 		break;
4179 	}
4180 	rdev->mc.vram_width = numchan * chansize;
4181 	/* Could aper size report 0 ? */
4182 	rdev->mc.aper_base = drm_get_resource_start(rdev->ddev, 0);
4183 	rdev->mc.aper_size = drm_get_resource_len(rdev->ddev, 0);
4184 	/* size in MB on si */
4185 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4186 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4187 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4188 	si_vram_gtt_location(rdev, &rdev->mc);
4189 	radeon_update_bandwidth_info(rdev);
4190 
4191 	return 0;
4192 }
4193 
4194 /*
4195  * GART
4196  * VMID 0 is the physical GPU addresses as used by the kernel.
4197  * VMIDs 1-15 are used for userspace clients and are handled
4198  * by the radeon vm/hsa code.
4199  */
4200 /**
4201  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4202  *
4203  * @rdev: radeon_device pointer
4204  *
4205  * Flush the TLB for the VMID 0 page table (CIK).
4206  */
4207 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4208 {
4209 	/* flush hdp cache */
4210 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4211 
4212 	/* bits 0-15 are the VM contexts0-15 */
4213 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
4214 }
4215 
4216 /**
4217  * cik_pcie_gart_enable - gart enable
4218  *
4219  * @rdev: radeon_device pointer
4220  *
4221  * This sets up the TLBs, programs the page tables for VMID0,
4222  * sets up the hw for VMIDs 1-15 which are allocated on
4223  * demand, and sets up the global locations for the LDS, GDS,
4224  * and GPUVM for FSA64 clients (CIK).
4225  * Returns 0 for success, errors for failure.
4226  */
4227 static __unused int cik_pcie_gart_enable(struct radeon_device *rdev)
4228 {
4229 	int r, i;
4230 
4231 	if (rdev->gart.robj == NULL) {
4232 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4233 		return -EINVAL;
4234 	}
4235 	r = radeon_gart_table_vram_pin(rdev);
4236 	if (r)
4237 		return r;
4238 	radeon_gart_restore(rdev);
4239 	/* Setup TLB control */
4240 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4241 	       (0xA << 7) |
4242 	       ENABLE_L1_TLB |
4243 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4244 	       ENABLE_ADVANCED_DRIVER_MODEL |
4245 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4246 	/* Setup L2 cache */
4247 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4248 	       ENABLE_L2_FRAGMENT_PROCESSING |
4249 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4250 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4251 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4252 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4253 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4254 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4255 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4256 	/* setup context0 */
4257 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4258 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4259 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4260 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4261 			(u32)(rdev->dummy_page.addr >> 12));
4262 	WREG32(VM_CONTEXT0_CNTL2, 0);
4263 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4264 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4265 
4266 	WREG32(0x15D4, 0);
4267 	WREG32(0x15D8, 0);
4268 	WREG32(0x15DC, 0);
4269 
4270 	/* empty context1-15 */
4271 	/* FIXME start with 4G, once using 2 level pt switch to full
4272 	 * vm size space
4273 	 */
4274 	/* set vm size, must be a multiple of 4 */
4275 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4276 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4277 	for (i = 1; i < 16; i++) {
4278 		if (i < 8)
4279 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4280 			       rdev->gart.table_addr >> 12);
4281 		else
4282 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4283 			       rdev->gart.table_addr >> 12);
4284 	}
4285 
4286 	/* enable context1-15 */
4287 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4288 	       (u32)(rdev->dummy_page.addr >> 12));
4289 	WREG32(VM_CONTEXT1_CNTL2, 4);
4290 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4291 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4292 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4293 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4294 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4295 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4296 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4297 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4298 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4299 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4300 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4301 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4302 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4303 
4304 	/* TC cache setup ??? */
4305 	WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4306 	WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4307 	WREG32(TC_CFG_L1_STORE_POLICY, 0);
4308 
4309 	WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4310 	WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4311 	WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4312 	WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4313 	WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4314 
4315 	WREG32(TC_CFG_L1_VOLATILE, 0);
4316 	WREG32(TC_CFG_L2_VOLATILE, 0);
4317 
4318 	if (rdev->family == CHIP_KAVERI) {
4319 		u32 tmp = RREG32(CHUB_CONTROL);
4320 		tmp &= ~BYPASS_VM;
4321 		WREG32(CHUB_CONTROL, tmp);
4322 	}
4323 
4324 	/* XXX SH_MEM regs */
4325 	/* where to put LDS, scratch, GPUVM in FSA64 space */
4326 	spin_lock(&rdev->srbm_mutex);
4327 	for (i = 0; i < 16; i++) {
4328 		cik_srbm_select(rdev, 0, 0, 0, i);
4329 		/* CP and shaders */
4330 		WREG32(SH_MEM_CONFIG, 0);
4331 		WREG32(SH_MEM_APE1_BASE, 1);
4332 		WREG32(SH_MEM_APE1_LIMIT, 0);
4333 		WREG32(SH_MEM_BASES, 0);
4334 		/* SDMA GFX */
4335 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4336 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4337 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4338 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4339 		/* XXX SDMA RLC - todo */
4340 	}
4341 	cik_srbm_select(rdev, 0, 0, 0, 0);
4342 	spin_unlock(&rdev->srbm_mutex);
4343 
4344 	cik_pcie_gart_tlb_flush(rdev);
4345 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4346 		 (unsigned)(rdev->mc.gtt_size >> 20),
4347 		 (unsigned long long)rdev->gart.table_addr);
4348 	rdev->gart.ready = true;
4349 	return 0;
4350 }
4351 
4352 /**
4353  * cik_pcie_gart_disable - gart disable
4354  *
4355  * @rdev: radeon_device pointer
4356  *
4357  * This disables all VM page table (CIK).
4358  */
4359 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4360 {
4361 	/* Disable all tables */
4362 	WREG32(VM_CONTEXT0_CNTL, 0);
4363 	WREG32(VM_CONTEXT1_CNTL, 0);
4364 	/* Setup TLB control */
4365 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4366 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4367 	/* Setup L2 cache */
4368 	WREG32(VM_L2_CNTL,
4369 	       ENABLE_L2_FRAGMENT_PROCESSING |
4370 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4371 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4372 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4373 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4374 	WREG32(VM_L2_CNTL2, 0);
4375 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4376 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4377 	radeon_gart_table_vram_unpin(rdev);
4378 }
4379 
4380 /**
4381  * cik_pcie_gart_fini - vm fini callback
4382  *
4383  * @rdev: radeon_device pointer
4384  *
4385  * Tears down the driver GART/VM setup (CIK).
4386  */
4387 static __unused void cik_pcie_gart_fini(struct radeon_device *rdev)
4388 {
4389 	cik_pcie_gart_disable(rdev);
4390 	radeon_gart_table_vram_free(rdev);
4391 	radeon_gart_fini(rdev);
4392 }
4393 
4394 /* vm parser */
4395 /**
4396  * cik_ib_parse - vm ib_parse callback
4397  *
4398  * @rdev: radeon_device pointer
4399  * @ib: indirect buffer pointer
4400  *
4401  * CIK uses hw IB checking so this is a nop (CIK).
4402  */
4403 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4404 {
4405 	return 0;
4406 }
4407 
4408 /*
4409  * vm
4410  * VMID 0 is the physical GPU addresses as used by the kernel.
4411  * VMIDs 1-15 are used for userspace clients and are handled
4412  * by the radeon vm/hsa code.
4413  */
4414 /**
4415  * cik_vm_init - cik vm init callback
4416  *
4417  * @rdev: radeon_device pointer
4418  *
4419  * Inits cik specific vm parameters (number of VMs, base of vram for
4420  * VMIDs 1-15) (CIK).
4421  * Returns 0 for success.
4422  */
4423 int cik_vm_init(struct radeon_device *rdev)
4424 {
4425 	/* number of VMs */
4426 	rdev->vm_manager.nvm = 16;
4427 	/* base offset of vram pages */
4428 	if (rdev->flags & RADEON_IS_IGP) {
4429 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
4430 		tmp <<= 22;
4431 		rdev->vm_manager.vram_base_offset = tmp;
4432 	} else
4433 		rdev->vm_manager.vram_base_offset = 0;
4434 
4435 	return 0;
4436 }
4437 
4438 /**
4439  * cik_vm_fini - cik vm fini callback
4440  *
4441  * @rdev: radeon_device pointer
4442  *
4443  * Tear down any asic specific VM setup (CIK).
4444  */
4445 void cik_vm_fini(struct radeon_device *rdev)
4446 {
4447 }
4448 
4449 /**
4450  * cik_vm_decode_fault - print human readable fault info
4451  *
4452  * @rdev: radeon_device pointer
4453  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4454  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4455  *
4456  * Print human readable fault information (CIK).
4457  */
4458 static void cik_vm_decode_fault(struct radeon_device *rdev,
4459 				u32 status, u32 addr, u32 mc_client)
4460 {
4461 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4462 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4463 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4464 	char *block = (char *)&mc_client;
4465 
4466 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4467 	       protections, vmid, addr,
4468 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4469 	       block, mc_id);
4470 }
4471 
4472 /**
4473  * cik_vm_flush - cik vm flush using the CP
4474  *
4475  * @rdev: radeon_device pointer
4476  *
4477  * Update the page table base and flush the VM TLB
4478  * using the CP (CIK).
4479  */
4480 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4481 {
4482 	struct radeon_ring *ring = &rdev->ring[ridx];
4483 
4484 	if (vm == NULL)
4485 		return;
4486 
4487 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4488 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4489 				 WRITE_DATA_DST_SEL(0)));
4490 	if (vm->id < 8) {
4491 		radeon_ring_write(ring,
4492 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4493 	} else {
4494 		radeon_ring_write(ring,
4495 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4496 	}
4497 	radeon_ring_write(ring, 0);
4498 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4499 
4500 	/* update SH_MEM_* regs */
4501 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4502 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4503 				 WRITE_DATA_DST_SEL(0)));
4504 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4505 	radeon_ring_write(ring, 0);
4506 	radeon_ring_write(ring, VMID(vm->id));
4507 
4508 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4509 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4510 				 WRITE_DATA_DST_SEL(0)));
4511 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4512 	radeon_ring_write(ring, 0);
4513 
4514 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4515 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4516 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4517 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4518 
4519 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4520 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4521 				 WRITE_DATA_DST_SEL(0)));
4522 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4523 	radeon_ring_write(ring, 0);
4524 	radeon_ring_write(ring, VMID(0));
4525 
4526 	/* HDP flush */
4527 	/* We should be using the WAIT_REG_MEM packet here like in
4528 	 * cik_fence_ring_emit(), but it causes the CP to hang in this
4529 	 * context...
4530 	 */
4531 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4532 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4533 				 WRITE_DATA_DST_SEL(0)));
4534 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4535 	radeon_ring_write(ring, 0);
4536 	radeon_ring_write(ring, 0);
4537 
4538 	/* bits 0-15 are the VM contexts0-15 */
4539 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4540 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4541 				 WRITE_DATA_DST_SEL(0)));
4542 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4543 	radeon_ring_write(ring, 0);
4544 	radeon_ring_write(ring, 1 << vm->id);
4545 
4546 	/* compute doesn't have PFP */
4547 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4548 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4549 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4550 		radeon_ring_write(ring, 0x0);
4551 	}
4552 }
4553 
4554 /**
4555  * cik_vm_set_page - update the page tables using sDMA
4556  *
4557  * @rdev: radeon_device pointer
4558  * @ib: indirect buffer to fill with commands
4559  * @pe: addr of the page entry
4560  * @addr: dst addr to write into pe
4561  * @count: number of page entries to update
4562  * @incr: increase next addr by incr bytes
4563  * @flags: access flags
4564  *
4565  * Update the page tables using CP or sDMA (CIK).
4566  */
4567 void cik_vm_set_page(struct radeon_device *rdev,
4568 		     struct radeon_ib *ib,
4569 		     uint64_t pe,
4570 		     uint64_t addr, unsigned count,
4571 		     uint32_t incr, uint32_t flags)
4572 {
4573 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4574 	uint64_t value;
4575 	unsigned ndw;
4576 
4577 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4578 		/* CP */
4579 		while (count) {
4580 			ndw = 2 + count * 2;
4581 			if (ndw > 0x3FFE)
4582 				ndw = 0x3FFE;
4583 
4584 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4585 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4586 						    WRITE_DATA_DST_SEL(1));
4587 			ib->ptr[ib->length_dw++] = pe;
4588 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4589 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4590 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4591 					value = radeon_vm_map_gart(rdev, addr);
4592 					value &= 0xFFFFFFFFFFFFF000ULL;
4593 				} else if (flags & RADEON_VM_PAGE_VALID) {
4594 					value = addr;
4595 				} else {
4596 					value = 0;
4597 				}
4598 				addr += incr;
4599 				value |= r600_flags;
4600 				ib->ptr[ib->length_dw++] = value;
4601 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4602 			}
4603 		}
4604 	} else {
4605 		/* DMA */
4606 		if (flags & RADEON_VM_PAGE_SYSTEM) {
4607 			while (count) {
4608 				ndw = count * 2;
4609 				if (ndw > 0xFFFFE)
4610 					ndw = 0xFFFFE;
4611 
4612 				/* for non-physically contiguous pages (system) */
4613 				ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4614 				ib->ptr[ib->length_dw++] = pe;
4615 				ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4616 				ib->ptr[ib->length_dw++] = ndw;
4617 				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4618 					if (flags & RADEON_VM_PAGE_SYSTEM) {
4619 						value = radeon_vm_map_gart(rdev, addr);
4620 						value &= 0xFFFFFFFFFFFFF000ULL;
4621 					} else if (flags & RADEON_VM_PAGE_VALID) {
4622 						value = addr;
4623 					} else {
4624 						value = 0;
4625 					}
4626 					addr += incr;
4627 					value |= r600_flags;
4628 					ib->ptr[ib->length_dw++] = value;
4629 					ib->ptr[ib->length_dw++] = upper_32_bits(value);
4630 				}
4631 			}
4632 		} else {
4633 			while (count) {
4634 				ndw = count;
4635 				if (ndw > 0x7FFFF)
4636 					ndw = 0x7FFFF;
4637 
4638 				if (flags & RADEON_VM_PAGE_VALID)
4639 					value = addr;
4640 				else
4641 					value = 0;
4642 				/* for physically contiguous pages (vram) */
4643 				ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4644 				ib->ptr[ib->length_dw++] = pe; /* dst addr */
4645 				ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4646 				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4647 				ib->ptr[ib->length_dw++] = 0;
4648 				ib->ptr[ib->length_dw++] = value; /* value */
4649 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4650 				ib->ptr[ib->length_dw++] = incr; /* increment size */
4651 				ib->ptr[ib->length_dw++] = 0;
4652 				ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4653 				pe += ndw * 8;
4654 				addr += ndw * incr;
4655 				count -= ndw;
4656 			}
4657 		}
4658 		while (ib->length_dw & 0x7)
4659 			ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4660 	}
4661 }
4662 
4663 /**
4664  * cik_dma_vm_flush - cik vm flush using sDMA
4665  *
4666  * @rdev: radeon_device pointer
4667  *
4668  * Update the page table base and flush the VM TLB
4669  * using sDMA (CIK).
4670  */
4671 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4672 {
4673 	struct radeon_ring *ring = &rdev->ring[ridx];
4674 	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4675 			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4676 	u32 ref_and_mask;
4677 
4678 	if (vm == NULL)
4679 		return;
4680 
4681 	if (ridx == R600_RING_TYPE_DMA_INDEX)
4682 		ref_and_mask = SDMA0;
4683 	else
4684 		ref_and_mask = SDMA1;
4685 
4686 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4687 	if (vm->id < 8) {
4688 		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4689 	} else {
4690 		radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4691 	}
4692 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4693 
4694 	/* update SH_MEM_* regs */
4695 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4696 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4697 	radeon_ring_write(ring, VMID(vm->id));
4698 
4699 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4700 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4701 	radeon_ring_write(ring, 0);
4702 
4703 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4704 	radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4705 	radeon_ring_write(ring, 0);
4706 
4707 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4708 	radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4709 	radeon_ring_write(ring, 1);
4710 
4711 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4712 	radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4713 	radeon_ring_write(ring, 0);
4714 
4715 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4716 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4717 	radeon_ring_write(ring, VMID(0));
4718 
4719 	/* flush HDP */
4720 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4721 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4722 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4723 	radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4724 	radeon_ring_write(ring, ref_and_mask); /* MASK */
4725 	radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4726 
4727 	/* flush TLB */
4728 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4729 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4730 	radeon_ring_write(ring, 1 << vm->id);
4731 }
4732 
4733 /*
4734  * RLC
4735  * The RLC is a multi-purpose microengine that handles a
4736  * variety of functions, the most important of which is
4737  * the interrupt controller.
4738  */
4739 /**
4740  * cik_rlc_stop - stop the RLC ME
4741  *
4742  * @rdev: radeon_device pointer
4743  *
4744  * Halt the RLC ME (MicroEngine) (CIK).
4745  */
4746 static void cik_rlc_stop(struct radeon_device *rdev)
4747 {
4748 	int i, j, k;
4749 	u32 mask, tmp;
4750 
4751 	tmp = RREG32(CP_INT_CNTL_RING0);
4752 	tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4753 	WREG32(CP_INT_CNTL_RING0, tmp);
4754 
4755 	RREG32(CB_CGTT_SCLK_CTRL);
4756 	RREG32(CB_CGTT_SCLK_CTRL);
4757 	RREG32(CB_CGTT_SCLK_CTRL);
4758 	RREG32(CB_CGTT_SCLK_CTRL);
4759 
4760 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4761 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4762 
4763 	WREG32(RLC_CNTL, 0);
4764 
4765 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4766 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4767 			cik_select_se_sh(rdev, i, j);
4768 			for (k = 0; k < rdev->usec_timeout; k++) {
4769 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4770 					break;
4771 				udelay(1);
4772 			}
4773 		}
4774 	}
4775 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4776 
4777 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4778 	for (k = 0; k < rdev->usec_timeout; k++) {
4779 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4780 			break;
4781 		udelay(1);
4782 	}
4783 }
4784 
4785 /**
4786  * cik_rlc_start - start the RLC ME
4787  *
4788  * @rdev: radeon_device pointer
4789  *
4790  * Unhalt the RLC ME (MicroEngine) (CIK).
4791  */
4792 static void cik_rlc_start(struct radeon_device *rdev)
4793 {
4794 	u32 tmp;
4795 
4796 	WREG32(RLC_CNTL, RLC_ENABLE);
4797 
4798 	tmp = RREG32(CP_INT_CNTL_RING0);
4799 	tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4800 	WREG32(CP_INT_CNTL_RING0, tmp);
4801 
4802 	udelay(50);
4803 }
4804 
4805 /**
4806  * cik_rlc_resume - setup the RLC hw
4807  *
4808  * @rdev: radeon_device pointer
4809  *
4810  * Initialize the RLC registers, load the ucode,
4811  * and start the RLC (CIK).
4812  * Returns 0 for success, -EINVAL if the ucode is not available.
4813  */
4814 static __unused int cik_rlc_resume(struct radeon_device *rdev)
4815 {
4816 	u32 i, size;
4817 	u32 clear_state_info[3];
4818 	const __be32 *fw_data;
4819 
4820 	if (!rdev->rlc_fw)
4821 		return -EINVAL;
4822 
4823 	switch (rdev->family) {
4824 	case CHIP_BONAIRE:
4825 	default:
4826 		size = BONAIRE_RLC_UCODE_SIZE;
4827 		break;
4828 	case CHIP_KAVERI:
4829 		size = KV_RLC_UCODE_SIZE;
4830 		break;
4831 	case CHIP_KABINI:
4832 		size = KB_RLC_UCODE_SIZE;
4833 		break;
4834 	}
4835 
4836 	cik_rlc_stop(rdev);
4837 
4838 	WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4839 	RREG32(GRBM_SOFT_RESET);
4840 	udelay(50);
4841 	WREG32(GRBM_SOFT_RESET, 0);
4842 	RREG32(GRBM_SOFT_RESET);
4843 	udelay(50);
4844 
4845 	WREG32(RLC_LB_CNTR_INIT, 0);
4846 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4847 
4848 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4849 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4850 	WREG32(RLC_LB_PARAMS, 0x00600408);
4851 	WREG32(RLC_LB_CNTL, 0x80000004);
4852 
4853 	WREG32(RLC_MC_CNTL, 0);
4854 	WREG32(RLC_UCODE_CNTL, 0);
4855 
4856 	fw_data = (const __be32 *)rdev->rlc_fw->data;
4857 		WREG32(RLC_GPM_UCODE_ADDR, 0);
4858 	for (i = 0; i < size; i++)
4859 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4860 	WREG32(RLC_GPM_UCODE_ADDR, 0);
4861 
4862 	/* XXX */
4863 	clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4864 	clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4865 	clear_state_info[2] = 0;//cik_default_size;
4866 	WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4867 	for (i = 0; i < 3; i++)
4868 		WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4869 	WREG32(RLC_DRIVER_DMA_STATUS, 0);
4870 
4871 	cik_rlc_start(rdev);
4872 
4873 	return 0;
4874 }
4875 
4876 /*
4877  * Interrupts
4878  * Starting with r6xx, interrupts are handled via a ring buffer.
4879  * Ring buffers are areas of GPU accessible memory that the GPU
4880  * writes interrupt vectors into and the host reads vectors out of.
4881  * There is a rptr (read pointer) that determines where the
4882  * host is currently reading, and a wptr (write pointer)
4883  * which determines where the GPU has written.  When the
4884  * pointers are equal, the ring is idle.  When the GPU
4885  * writes vectors to the ring buffer, it increments the
4886  * wptr.  When there is an interrupt, the host then starts
4887  * fetching commands and processing them until the pointers are
4888  * equal again at which point it updates the rptr.
4889  */
4890 
4891 /**
4892  * cik_enable_interrupts - Enable the interrupt ring buffer
4893  *
4894  * @rdev: radeon_device pointer
4895  *
4896  * Enable the interrupt ring buffer (CIK).
4897  */
4898 static void cik_enable_interrupts(struct radeon_device *rdev)
4899 {
4900 	u32 ih_cntl = RREG32(IH_CNTL);
4901 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4902 
4903 	ih_cntl |= ENABLE_INTR;
4904 	ih_rb_cntl |= IH_RB_ENABLE;
4905 	WREG32(IH_CNTL, ih_cntl);
4906 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4907 	rdev->ih.enabled = true;
4908 }
4909 
4910 /**
4911  * cik_disable_interrupts - Disable the interrupt ring buffer
4912  *
4913  * @rdev: radeon_device pointer
4914  *
4915  * Disable the interrupt ring buffer (CIK).
4916  */
4917 static void cik_disable_interrupts(struct radeon_device *rdev)
4918 {
4919 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4920 	u32 ih_cntl = RREG32(IH_CNTL);
4921 
4922 	ih_rb_cntl &= ~IH_RB_ENABLE;
4923 	ih_cntl &= ~ENABLE_INTR;
4924 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4925 	WREG32(IH_CNTL, ih_cntl);
4926 	/* set rptr, wptr to 0 */
4927 	WREG32(IH_RB_RPTR, 0);
4928 	WREG32(IH_RB_WPTR, 0);
4929 	rdev->ih.enabled = false;
4930 	rdev->ih.rptr = 0;
4931 }
4932 
4933 /**
4934  * cik_disable_interrupt_state - Disable all interrupt sources
4935  *
4936  * @rdev: radeon_device pointer
4937  *
4938  * Clear all interrupt enable bits used by the driver (CIK).
4939  */
4940 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4941 {
4942 	u32 tmp;
4943 
4944 	/* gfx ring */
4945 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4946 	/* sdma */
4947 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4948 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4949 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4950 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4951 	/* compute queues */
4952 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4953 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4954 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4955 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4956 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4957 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4958 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4959 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4960 	/* grbm */
4961 	WREG32(GRBM_INT_CNTL, 0);
4962 	/* vline/vblank, etc. */
4963 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4964 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4965 	if (rdev->num_crtc >= 4) {
4966 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4967 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4968 	}
4969 	if (rdev->num_crtc >= 6) {
4970 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4971 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4972 	}
4973 
4974 	/* dac hotplug */
4975 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4976 
4977 	/* digital hotplug */
4978 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4979 	WREG32(DC_HPD1_INT_CONTROL, tmp);
4980 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4981 	WREG32(DC_HPD2_INT_CONTROL, tmp);
4982 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4983 	WREG32(DC_HPD3_INT_CONTROL, tmp);
4984 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4985 	WREG32(DC_HPD4_INT_CONTROL, tmp);
4986 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4987 	WREG32(DC_HPD5_INT_CONTROL, tmp);
4988 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4989 	WREG32(DC_HPD6_INT_CONTROL, tmp);
4990 
4991 }
4992 
4993 /**
4994  * cik_irq_init - init and enable the interrupt ring
4995  *
4996  * @rdev: radeon_device pointer
4997  *
4998  * Allocate a ring buffer for the interrupt controller,
4999  * enable the RLC, disable interrupts, enable the IH
5000  * ring buffer and enable it (CIK).
5001  * Called at device load and reume.
5002  * Returns 0 for success, errors for failure.
5003  */
5004 static __unused int cik_irq_init(struct radeon_device *rdev)
5005 {
5006 	int ret = 0;
5007 	int rb_bufsz;
5008 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5009 
5010 	/* allocate ring */
5011 	ret = r600_ih_ring_alloc(rdev);
5012 	if (ret)
5013 		return ret;
5014 
5015 	/* disable irqs */
5016 	cik_disable_interrupts(rdev);
5017 
5018 	/* init rlc */
5019 	ret = cik_rlc_resume(rdev);
5020 	if (ret) {
5021 		r600_ih_ring_fini(rdev);
5022 		return ret;
5023 	}
5024 
5025 	/* setup interrupt control */
5026 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
5027 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5028 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5029 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5030 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5031 	 */
5032 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5033 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5034 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5035 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5036 
5037 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5038 	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5039 
5040 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5041 		      IH_WPTR_OVERFLOW_CLEAR |
5042 		      (rb_bufsz << 1));
5043 
5044 	if (rdev->wb.enabled)
5045 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5046 
5047 	/* set the writeback address whether it's enabled or not */
5048 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5049 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5050 
5051 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5052 
5053 	/* set rptr, wptr to 0 */
5054 	WREG32(IH_RB_RPTR, 0);
5055 	WREG32(IH_RB_WPTR, 0);
5056 
5057 	/* Default settings for IH_CNTL (disabled at first) */
5058 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5059 	/* RPTR_REARM only works if msi's are enabled */
5060 	if (rdev->msi_enabled)
5061 		ih_cntl |= RPTR_REARM;
5062 	WREG32(IH_CNTL, ih_cntl);
5063 
5064 	/* force the active interrupt state to all disabled */
5065 	cik_disable_interrupt_state(rdev);
5066 
5067 	pci_enable_busmaster(rdev->dev);
5068 
5069 	/* enable irqs */
5070 	cik_enable_interrupts(rdev);
5071 
5072 	return ret;
5073 }
5074 
5075 /**
5076  * cik_irq_set - enable/disable interrupt sources
5077  *
5078  * @rdev: radeon_device pointer
5079  *
5080  * Enable interrupt sources on the GPU (vblanks, hpd,
5081  * etc.) (CIK).
5082  * Returns 0 for success, errors for failure.
5083  */
5084 int cik_irq_set(struct radeon_device *rdev)
5085 {
5086 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5087 		PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5088 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5089 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5090 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5091 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5092 	u32 grbm_int_cntl = 0;
5093 	u32 dma_cntl, dma_cntl1;
5094 
5095 	if (!rdev->irq.installed) {
5096 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5097 		return -EINVAL;
5098 	}
5099 	/* don't enable anything if the ih is disabled */
5100 	if (!rdev->ih.enabled) {
5101 		cik_disable_interrupts(rdev);
5102 		/* force the active interrupt state to all disabled */
5103 		cik_disable_interrupt_state(rdev);
5104 		return 0;
5105 	}
5106 
5107 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5108 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5109 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5110 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5111 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5112 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5113 
5114 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5115 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5116 
5117 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5118 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5119 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5120 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5121 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5122 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5123 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5124 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5125 
5126 	/* enable CP interrupts on all rings */
5127 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5128 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
5129 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5130 	}
5131 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5132 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5133 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5134 		if (ring->me == 1) {
5135 			switch (ring->pipe) {
5136 			case 0:
5137 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5138 				break;
5139 			case 1:
5140 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5141 				break;
5142 			case 2:
5143 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5144 				break;
5145 			case 3:
5146 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5147 				break;
5148 			default:
5149 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5150 				break;
5151 			}
5152 		} else if (ring->me == 2) {
5153 			switch (ring->pipe) {
5154 			case 0:
5155 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5156 				break;
5157 			case 1:
5158 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5159 				break;
5160 			case 2:
5161 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5162 				break;
5163 			case 3:
5164 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5165 				break;
5166 			default:
5167 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5168 				break;
5169 			}
5170 		} else {
5171 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5172 		}
5173 	}
5174 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5175 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5176 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5177 		if (ring->me == 1) {
5178 			switch (ring->pipe) {
5179 			case 0:
5180 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5181 				break;
5182 			case 1:
5183 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5184 				break;
5185 			case 2:
5186 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5187 				break;
5188 			case 3:
5189 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5190 				break;
5191 			default:
5192 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5193 				break;
5194 			}
5195 		} else if (ring->me == 2) {
5196 			switch (ring->pipe) {
5197 			case 0:
5198 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5199 				break;
5200 			case 1:
5201 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5202 				break;
5203 			case 2:
5204 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5205 				break;
5206 			case 3:
5207 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5208 				break;
5209 			default:
5210 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5211 				break;
5212 			}
5213 		} else {
5214 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5215 		}
5216 	}
5217 
5218 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5219 		DRM_DEBUG("cik_irq_set: sw int dma\n");
5220 		dma_cntl |= TRAP_ENABLE;
5221 	}
5222 
5223 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5224 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
5225 		dma_cntl1 |= TRAP_ENABLE;
5226 	}
5227 
5228 	if (rdev->irq.crtc_vblank_int[0] ||
5229 	    atomic_read(&rdev->irq.pflip[0])) {
5230 		DRM_DEBUG("cik_irq_set: vblank 0\n");
5231 		crtc1 |= VBLANK_INTERRUPT_MASK;
5232 	}
5233 	if (rdev->irq.crtc_vblank_int[1] ||
5234 	    atomic_read(&rdev->irq.pflip[1])) {
5235 		DRM_DEBUG("cik_irq_set: vblank 1\n");
5236 		crtc2 |= VBLANK_INTERRUPT_MASK;
5237 	}
5238 	if (rdev->irq.crtc_vblank_int[2] ||
5239 	    atomic_read(&rdev->irq.pflip[2])) {
5240 		DRM_DEBUG("cik_irq_set: vblank 2\n");
5241 		crtc3 |= VBLANK_INTERRUPT_MASK;
5242 	}
5243 	if (rdev->irq.crtc_vblank_int[3] ||
5244 	    atomic_read(&rdev->irq.pflip[3])) {
5245 		DRM_DEBUG("cik_irq_set: vblank 3\n");
5246 		crtc4 |= VBLANK_INTERRUPT_MASK;
5247 	}
5248 	if (rdev->irq.crtc_vblank_int[4] ||
5249 	    atomic_read(&rdev->irq.pflip[4])) {
5250 		DRM_DEBUG("cik_irq_set: vblank 4\n");
5251 		crtc5 |= VBLANK_INTERRUPT_MASK;
5252 	}
5253 	if (rdev->irq.crtc_vblank_int[5] ||
5254 	    atomic_read(&rdev->irq.pflip[5])) {
5255 		DRM_DEBUG("cik_irq_set: vblank 5\n");
5256 		crtc6 |= VBLANK_INTERRUPT_MASK;
5257 	}
5258 	if (rdev->irq.hpd[0]) {
5259 		DRM_DEBUG("cik_irq_set: hpd 1\n");
5260 		hpd1 |= DC_HPDx_INT_EN;
5261 	}
5262 	if (rdev->irq.hpd[1]) {
5263 		DRM_DEBUG("cik_irq_set: hpd 2\n");
5264 		hpd2 |= DC_HPDx_INT_EN;
5265 	}
5266 	if (rdev->irq.hpd[2]) {
5267 		DRM_DEBUG("cik_irq_set: hpd 3\n");
5268 		hpd3 |= DC_HPDx_INT_EN;
5269 	}
5270 	if (rdev->irq.hpd[3]) {
5271 		DRM_DEBUG("cik_irq_set: hpd 4\n");
5272 		hpd4 |= DC_HPDx_INT_EN;
5273 	}
5274 	if (rdev->irq.hpd[4]) {
5275 		DRM_DEBUG("cik_irq_set: hpd 5\n");
5276 		hpd5 |= DC_HPDx_INT_EN;
5277 	}
5278 	if (rdev->irq.hpd[5]) {
5279 		DRM_DEBUG("cik_irq_set: hpd 6\n");
5280 		hpd6 |= DC_HPDx_INT_EN;
5281 	}
5282 
5283 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5284 
5285 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5286 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5287 
5288 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5289 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5290 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5291 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5292 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5293 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5294 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5295 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5296 
5297 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5298 
5299 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5300 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5301 	if (rdev->num_crtc >= 4) {
5302 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5303 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5304 	}
5305 	if (rdev->num_crtc >= 6) {
5306 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5307 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5308 	}
5309 
5310 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
5311 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
5312 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
5313 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
5314 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
5315 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
5316 
5317 	return 0;
5318 }
5319 
5320 /**
5321  * cik_irq_ack - ack interrupt sources
5322  *
5323  * @rdev: radeon_device pointer
5324  *
5325  * Ack interrupt sources on the GPU (vblanks, hpd,
5326  * etc.) (CIK).  Certain interrupts sources are sw
5327  * generated and do not require an explicit ack.
5328  */
5329 static inline void cik_irq_ack(struct radeon_device *rdev)
5330 {
5331 	u32 tmp;
5332 
5333 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5334 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5335 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5336 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5337 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5338 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5339 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5340 
5341 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5342 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5343 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5344 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5345 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5346 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5347 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5348 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5349 
5350 	if (rdev->num_crtc >= 4) {
5351 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5352 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5353 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5354 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5355 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5356 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5357 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5358 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5359 	}
5360 
5361 	if (rdev->num_crtc >= 6) {
5362 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5363 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5364 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5365 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5366 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5367 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5368 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5369 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5370 	}
5371 
5372 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5373 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5374 		tmp |= DC_HPDx_INT_ACK;
5375 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5376 	}
5377 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5378 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5379 		tmp |= DC_HPDx_INT_ACK;
5380 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5381 	}
5382 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5383 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5384 		tmp |= DC_HPDx_INT_ACK;
5385 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5386 	}
5387 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5388 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5389 		tmp |= DC_HPDx_INT_ACK;
5390 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5391 	}
5392 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5393 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5394 		tmp |= DC_HPDx_INT_ACK;
5395 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5396 	}
5397 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5398 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5399 		tmp |= DC_HPDx_INT_ACK;
5400 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5401 	}
5402 }
5403 
5404 /**
5405  * cik_irq_disable - disable interrupts
5406  *
5407  * @rdev: radeon_device pointer
5408  *
5409  * Disable interrupts on the hw (CIK).
5410  */
5411 static void cik_irq_disable(struct radeon_device *rdev)
5412 {
5413 	cik_disable_interrupts(rdev);
5414 	/* Wait and acknowledge irq */
5415 	mdelay(1);
5416 	cik_irq_ack(rdev);
5417 	cik_disable_interrupt_state(rdev);
5418 }
5419 
5420 /**
5421  * cik_irq_disable - disable interrupts for suspend
5422  *
5423  * @rdev: radeon_device pointer
5424  *
5425  * Disable interrupts and stop the RLC (CIK).
5426  * Used for suspend.
5427  */
5428 static void cik_irq_suspend(struct radeon_device *rdev)
5429 {
5430 	cik_irq_disable(rdev);
5431 	cik_rlc_stop(rdev);
5432 }
5433 
5434 /**
5435  * cik_irq_fini - tear down interrupt support
5436  *
5437  * @rdev: radeon_device pointer
5438  *
5439  * Disable interrupts on the hw and free the IH ring
5440  * buffer (CIK).
5441  * Used for driver unload.
5442  */
5443 static __unused void cik_irq_fini(struct radeon_device *rdev)
5444 {
5445 	cik_irq_suspend(rdev);
5446 	r600_ih_ring_fini(rdev);
5447 }
5448 
5449 /**
5450  * cik_get_ih_wptr - get the IH ring buffer wptr
5451  *
5452  * @rdev: radeon_device pointer
5453  *
5454  * Get the IH ring buffer wptr from either the register
5455  * or the writeback memory buffer (CIK).  Also check for
5456  * ring buffer overflow and deal with it.
5457  * Used by cik_irq_process().
5458  * Returns the value of the wptr.
5459  */
5460 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5461 {
5462 	u32 wptr, tmp;
5463 
5464 	if (rdev->wb.enabled)
5465 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5466 	else
5467 		wptr = RREG32(IH_RB_WPTR);
5468 
5469 	if (wptr & RB_OVERFLOW) {
5470 		/* When a ring buffer overflow happen start parsing interrupt
5471 		 * from the last not overwritten vector (wptr + 16). Hopefully
5472 		 * this should allow us to catchup.
5473 		 */
5474 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5475 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5476 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5477 		tmp = RREG32(IH_RB_CNTL);
5478 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
5479 		WREG32(IH_RB_CNTL, tmp);
5480 	}
5481 	return (wptr & rdev->ih.ptr_mask);
5482 }
5483 
5484 /*        CIK IV Ring
5485  * Each IV ring entry is 128 bits:
5486  * [7:0]    - interrupt source id
5487  * [31:8]   - reserved
5488  * [59:32]  - interrupt source data
5489  * [63:60]  - reserved
5490  * [71:64]  - RINGID
5491  *            CP:
5492  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5493  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5494  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5495  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5496  *            PIPE_ID - ME0 0=3D
5497  *                    - ME1&2 compute dispatcher (4 pipes each)
5498  *            SDMA:
5499  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
5500  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
5501  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5502  * [79:72]  - VMID
5503  * [95:80]  - PASID
5504  * [127:96] - reserved
5505  */
5506 /**
5507  * cik_irq_process - interrupt handler
5508  *
5509  * @rdev: radeon_device pointer
5510  *
5511  * Interrupt hander (CIK).  Walk the IH ring,
5512  * ack interrupts and schedule work to handle
5513  * interrupt events.
5514  * Returns irq process return code.
5515  */
5516 irqreturn_t cik_irq_process(struct radeon_device *rdev)
5517 {
5518 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5519 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5520 	u32 wptr;
5521 	u32 rptr;
5522 	u32 src_id, src_data, ring_id;
5523 	u8 me_id, pipe_id, queue_id;
5524 	u32 ring_index;
5525 	bool queue_hotplug = false;
5526 	bool queue_reset = false;
5527 	u32 addr, status, mc_client;
5528 
5529 	if (!rdev->ih.enabled || rdev->shutdown)
5530 		return IRQ_NONE;
5531 
5532 	wptr = cik_get_ih_wptr(rdev);
5533 
5534 restart_ih:
5535 	/* is somebody else already processing irqs? */
5536 	if (atomic_xchg(&rdev->ih.lock, 1))
5537 		return IRQ_NONE;
5538 
5539 	rptr = rdev->ih.rptr;
5540 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5541 
5542 	/* Order reading of wptr vs. reading of IH ring data */
5543 	rmb();
5544 
5545 	/* display interrupts */
5546 	cik_irq_ack(rdev);
5547 
5548 	while (rptr != wptr) {
5549 		/* wptr/rptr are in bytes! */
5550 		ring_index = rptr / 4;
5551 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5552 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5553 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5554 
5555 		switch (src_id) {
5556 		case 1: /* D1 vblank/vline */
5557 			switch (src_data) {
5558 			case 0: /* D1 vblank */
5559 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5560 					if (rdev->irq.crtc_vblank_int[0]) {
5561 						drm_handle_vblank(rdev->ddev, 0);
5562 						rdev->pm.vblank_sync = true;
5563 						wake_up(&rdev->irq.vblank_queue);
5564 					}
5565 					if (atomic_read(&rdev->irq.pflip[0]))
5566 						radeon_crtc_handle_flip(rdev, 0);
5567 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5568 					DRM_DEBUG("IH: D1 vblank\n");
5569 				}
5570 				break;
5571 			case 1: /* D1 vline */
5572 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5573 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5574 					DRM_DEBUG("IH: D1 vline\n");
5575 				}
5576 				break;
5577 			default:
5578 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5579 				break;
5580 			}
5581 			break;
5582 		case 2: /* D2 vblank/vline */
5583 			switch (src_data) {
5584 			case 0: /* D2 vblank */
5585 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5586 					if (rdev->irq.crtc_vblank_int[1]) {
5587 						drm_handle_vblank(rdev->ddev, 1);
5588 						rdev->pm.vblank_sync = true;
5589 						wake_up(&rdev->irq.vblank_queue);
5590 					}
5591 					if (atomic_read(&rdev->irq.pflip[1]))
5592 						radeon_crtc_handle_flip(rdev, 1);
5593 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5594 					DRM_DEBUG("IH: D2 vblank\n");
5595 				}
5596 				break;
5597 			case 1: /* D2 vline */
5598 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5599 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5600 					DRM_DEBUG("IH: D2 vline\n");
5601 				}
5602 				break;
5603 			default:
5604 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5605 				break;
5606 			}
5607 			break;
5608 		case 3: /* D3 vblank/vline */
5609 			switch (src_data) {
5610 			case 0: /* D3 vblank */
5611 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5612 					if (rdev->irq.crtc_vblank_int[2]) {
5613 						drm_handle_vblank(rdev->ddev, 2);
5614 						rdev->pm.vblank_sync = true;
5615 						wake_up(&rdev->irq.vblank_queue);
5616 					}
5617 					if (atomic_read(&rdev->irq.pflip[2]))
5618 						radeon_crtc_handle_flip(rdev, 2);
5619 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5620 					DRM_DEBUG("IH: D3 vblank\n");
5621 				}
5622 				break;
5623 			case 1: /* D3 vline */
5624 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5625 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5626 					DRM_DEBUG("IH: D3 vline\n");
5627 				}
5628 				break;
5629 			default:
5630 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5631 				break;
5632 			}
5633 			break;
5634 		case 4: /* D4 vblank/vline */
5635 			switch (src_data) {
5636 			case 0: /* D4 vblank */
5637 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5638 					if (rdev->irq.crtc_vblank_int[3]) {
5639 						drm_handle_vblank(rdev->ddev, 3);
5640 						rdev->pm.vblank_sync = true;
5641 						wake_up(&rdev->irq.vblank_queue);
5642 					}
5643 					if (atomic_read(&rdev->irq.pflip[3]))
5644 						radeon_crtc_handle_flip(rdev, 3);
5645 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5646 					DRM_DEBUG("IH: D4 vblank\n");
5647 				}
5648 				break;
5649 			case 1: /* D4 vline */
5650 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5651 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5652 					DRM_DEBUG("IH: D4 vline\n");
5653 				}
5654 				break;
5655 			default:
5656 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5657 				break;
5658 			}
5659 			break;
5660 		case 5: /* D5 vblank/vline */
5661 			switch (src_data) {
5662 			case 0: /* D5 vblank */
5663 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5664 					if (rdev->irq.crtc_vblank_int[4]) {
5665 						drm_handle_vblank(rdev->ddev, 4);
5666 						rdev->pm.vblank_sync = true;
5667 						wake_up(&rdev->irq.vblank_queue);
5668 					}
5669 					if (atomic_read(&rdev->irq.pflip[4]))
5670 						radeon_crtc_handle_flip(rdev, 4);
5671 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5672 					DRM_DEBUG("IH: D5 vblank\n");
5673 				}
5674 				break;
5675 			case 1: /* D5 vline */
5676 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5677 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5678 					DRM_DEBUG("IH: D5 vline\n");
5679 				}
5680 				break;
5681 			default:
5682 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5683 				break;
5684 			}
5685 			break;
5686 		case 6: /* D6 vblank/vline */
5687 			switch (src_data) {
5688 			case 0: /* D6 vblank */
5689 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5690 					if (rdev->irq.crtc_vblank_int[5]) {
5691 						drm_handle_vblank(rdev->ddev, 5);
5692 						rdev->pm.vblank_sync = true;
5693 						wake_up(&rdev->irq.vblank_queue);
5694 					}
5695 					if (atomic_read(&rdev->irq.pflip[5]))
5696 						radeon_crtc_handle_flip(rdev, 5);
5697 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5698 					DRM_DEBUG("IH: D6 vblank\n");
5699 				}
5700 				break;
5701 			case 1: /* D6 vline */
5702 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5703 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5704 					DRM_DEBUG("IH: D6 vline\n");
5705 				}
5706 				break;
5707 			default:
5708 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5709 				break;
5710 			}
5711 			break;
5712 		case 42: /* HPD hotplug */
5713 			switch (src_data) {
5714 			case 0:
5715 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5716 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5717 					queue_hotplug = true;
5718 					DRM_DEBUG("IH: HPD1\n");
5719 				}
5720 				break;
5721 			case 1:
5722 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5723 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5724 					queue_hotplug = true;
5725 					DRM_DEBUG("IH: HPD2\n");
5726 				}
5727 				break;
5728 			case 2:
5729 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5730 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5731 					queue_hotplug = true;
5732 					DRM_DEBUG("IH: HPD3\n");
5733 				}
5734 				break;
5735 			case 3:
5736 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5737 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5738 					queue_hotplug = true;
5739 					DRM_DEBUG("IH: HPD4\n");
5740 				}
5741 				break;
5742 			case 4:
5743 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5744 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5745 					queue_hotplug = true;
5746 					DRM_DEBUG("IH: HPD5\n");
5747 				}
5748 				break;
5749 			case 5:
5750 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5751 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5752 					queue_hotplug = true;
5753 					DRM_DEBUG("IH: HPD6\n");
5754 				}
5755 				break;
5756 			default:
5757 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5758 				break;
5759 			}
5760 			break;
5761 		case 146:
5762 		case 147:
5763 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5764 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5765 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
5766 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5767 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5768 				addr);
5769 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5770 				status);
5771 			cik_vm_decode_fault(rdev, status, addr, mc_client);
5772 			/* reset addr and status */
5773 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5774 			break;
5775 		case 176: /* GFX RB CP_INT */
5776 		case 177: /* GFX IB CP_INT */
5777 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5778 			break;
5779 		case 181: /* CP EOP event */
5780 			DRM_DEBUG("IH: CP EOP\n");
5781 			/* XXX check the bitfield order! */
5782 			me_id = (ring_id & 0x60) >> 5;
5783 			pipe_id = (ring_id & 0x18) >> 3;
5784 			queue_id = (ring_id & 0x7) >> 0;
5785 			switch (me_id) {
5786 			case 0:
5787 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5788 				break;
5789 			case 1:
5790 			case 2:
5791 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5792 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5793 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5794 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5795 				break;
5796 			}
5797 			break;
5798 		case 184: /* CP Privileged reg access */
5799 			DRM_ERROR("Illegal register access in command stream\n");
5800 			/* XXX check the bitfield order! */
5801 			me_id = (ring_id & 0x60) >> 5;
5802 			pipe_id = (ring_id & 0x18) >> 3;
5803 			queue_id = (ring_id & 0x7) >> 0;
5804 			switch (me_id) {
5805 			case 0:
5806 				/* This results in a full GPU reset, but all we need to do is soft
5807 				 * reset the CP for gfx
5808 				 */
5809 				queue_reset = true;
5810 				break;
5811 			case 1:
5812 				/* XXX compute */
5813 				queue_reset = true;
5814 				break;
5815 			case 2:
5816 				/* XXX compute */
5817 				queue_reset = true;
5818 				break;
5819 			}
5820 			break;
5821 		case 185: /* CP Privileged inst */
5822 			DRM_ERROR("Illegal instruction in command stream\n");
5823 			/* XXX check the bitfield order! */
5824 			me_id = (ring_id & 0x60) >> 5;
5825 			pipe_id = (ring_id & 0x18) >> 3;
5826 			queue_id = (ring_id & 0x7) >> 0;
5827 			switch (me_id) {
5828 			case 0:
5829 				/* This results in a full GPU reset, but all we need to do is soft
5830 				 * reset the CP for gfx
5831 				 */
5832 				queue_reset = true;
5833 				break;
5834 			case 1:
5835 				/* XXX compute */
5836 				queue_reset = true;
5837 				break;
5838 			case 2:
5839 				/* XXX compute */
5840 				queue_reset = true;
5841 				break;
5842 			}
5843 			break;
5844 		case 224: /* SDMA trap event */
5845 			/* XXX check the bitfield order! */
5846 			me_id = (ring_id & 0x3) >> 0;
5847 			queue_id = (ring_id & 0xc) >> 2;
5848 			DRM_DEBUG("IH: SDMA trap\n");
5849 			switch (me_id) {
5850 			case 0:
5851 				switch (queue_id) {
5852 				case 0:
5853 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5854 					break;
5855 				case 1:
5856 					/* XXX compute */
5857 					break;
5858 				case 2:
5859 					/* XXX compute */
5860 					break;
5861 				}
5862 				break;
5863 			case 1:
5864 				switch (queue_id) {
5865 				case 0:
5866 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5867 					break;
5868 				case 1:
5869 					/* XXX compute */
5870 					break;
5871 				case 2:
5872 					/* XXX compute */
5873 					break;
5874 				}
5875 				break;
5876 			}
5877 			break;
5878 		case 241: /* SDMA Privileged inst */
5879 		case 247: /* SDMA Privileged inst */
5880 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
5881 			/* XXX check the bitfield order! */
5882 			me_id = (ring_id & 0x3) >> 0;
5883 			queue_id = (ring_id & 0xc) >> 2;
5884 			switch (me_id) {
5885 			case 0:
5886 				switch (queue_id) {
5887 				case 0:
5888 					queue_reset = true;
5889 					break;
5890 				case 1:
5891 					/* XXX compute */
5892 					queue_reset = true;
5893 					break;
5894 				case 2:
5895 					/* XXX compute */
5896 					queue_reset = true;
5897 					break;
5898 				}
5899 				break;
5900 			case 1:
5901 				switch (queue_id) {
5902 				case 0:
5903 					queue_reset = true;
5904 					break;
5905 				case 1:
5906 					/* XXX compute */
5907 					queue_reset = true;
5908 					break;
5909 				case 2:
5910 					/* XXX compute */
5911 					queue_reset = true;
5912 					break;
5913 				}
5914 				break;
5915 			}
5916 			break;
5917 		case 233: /* GUI IDLE */
5918 			DRM_DEBUG("IH: GUI idle\n");
5919 			break;
5920 		default:
5921 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5922 			break;
5923 		}
5924 
5925 		/* wptr/rptr are in bytes! */
5926 		rptr += 16;
5927 		rptr &= rdev->ih.ptr_mask;
5928 	}
5929 	if (queue_hotplug)
5930 		taskqueue_enqueue(rdev->tq, &rdev->hotplug_work);
5931 	if (queue_reset)
5932 		taskqueue_enqueue(rdev->tq, &rdev->reset_work);
5933 	rdev->ih.rptr = rptr;
5934 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
5935 	atomic_set(&rdev->ih.lock, 0);
5936 
5937 	/* make sure wptr hasn't changed while processing */
5938 	wptr = cik_get_ih_wptr(rdev);
5939 	if (wptr != rptr)
5940 		goto restart_ih;
5941 
5942 	return IRQ_HANDLED;
5943 }
5944 
5945 /*
5946  * startup/shutdown callbacks
5947  */
5948 /**
5949  * cik_startup - program the asic to a functional state
5950  *
5951  * @rdev: radeon_device pointer
5952  *
5953  * Programs the asic to a functional state (CIK).
5954  * Called by cik_init() and cik_resume().
5955  * Returns 0 for success, error for failure.
5956  */
5957 static int cik_startup(struct radeon_device *rdev)
5958 {
5959 	struct radeon_ring *ring;
5960 	int r;
5961 
5962 	cik_mc_program(rdev);
5963 
5964 	if (rdev->flags & RADEON_IS_IGP) {
5965 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5966 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5967 			r = cik_init_microcode(rdev);
5968 			if (r) {
5969 				DRM_ERROR("Failed to load firmware!\n");
5970 				return r;
5971 			}
5972 		}
5973 	} else {
5974 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5975 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5976 		    !rdev->mc_fw) {
5977 			r = cik_init_microcode(rdev);
5978 			if (r) {
5979 				DRM_ERROR("Failed to load firmware!\n");
5980 				return r;
5981 			}
5982 		}
5983 
5984 		r = ci_mc_load_microcode(rdev);
5985 		if (r) {
5986 			DRM_ERROR("Failed to load MC firmware!\n");
5987 			return r;
5988 		}
5989 	}
5990 
5991 	r = r600_vram_scratch_init(rdev);
5992 	if (r)
5993 		return r;
5994 
5995 	r = cik_pcie_gart_enable(rdev);
5996 	if (r)
5997 		return r;
5998 	cik_gpu_init(rdev);
5999 
6000 	/* allocate rlc buffers */
6001 	r = si_rlc_init(rdev);
6002 	if (r) {
6003 		DRM_ERROR("Failed to init rlc BOs!\n");
6004 		return r;
6005 	}
6006 
6007 	/* allocate wb buffer */
6008 	r = radeon_wb_init(rdev);
6009 	if (r)
6010 		return r;
6011 
6012 	/* allocate mec buffers */
6013 	r = cik_mec_init(rdev);
6014 	if (r) {
6015 		DRM_ERROR("Failed to init MEC BOs!\n");
6016 		return r;
6017 	}
6018 
6019 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6020 	if (r) {
6021 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6022 		return r;
6023 	}
6024 
6025 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6026 	if (r) {
6027 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6028 		return r;
6029 	}
6030 
6031 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6032 	if (r) {
6033 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6034 		return r;
6035 	}
6036 
6037 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6038 	if (r) {
6039 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6040 		return r;
6041 	}
6042 
6043 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6044 	if (r) {
6045 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6046 		return r;
6047 	}
6048 
6049 	r = cik_uvd_resume(rdev);
6050 	if (!r) {
6051 		r = radeon_fence_driver_start_ring(rdev,
6052 						   R600_RING_TYPE_UVD_INDEX);
6053 		if (r)
6054 			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6055 	}
6056 	if (r)
6057 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6058 
6059 	/* Enable IRQ */
6060 	if (!rdev->irq.installed) {
6061 		r = radeon_irq_kms_init(rdev);
6062 		if (r)
6063 			return r;
6064 	}
6065 
6066 	r = cik_irq_init(rdev);
6067 	if (r) {
6068 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6069 		radeon_irq_kms_fini(rdev);
6070 		return r;
6071 	}
6072 	cik_irq_set(rdev);
6073 
6074 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6075 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6076 			     CP_RB0_RPTR, CP_RB0_WPTR,
6077 			     0, 0xfffff, RADEON_CP_PACKET2);
6078 	if (r)
6079 		return r;
6080 
6081 	/* set up the compute queues */
6082 	/* type-2 packets are deprecated on MEC, use type-3 instead */
6083 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6084 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6085 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6086 			     0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6087 	if (r)
6088 		return r;
6089 	ring->me = 1; /* first MEC */
6090 	ring->pipe = 0; /* first pipe */
6091 	ring->queue = 0; /* first queue */
6092 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6093 
6094 	/* type-2 packets are deprecated on MEC, use type-3 instead */
6095 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6096 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6097 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6098 			     0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6099 	if (r)
6100 		return r;
6101 	/* dGPU only have 1 MEC */
6102 	ring->me = 1; /* first MEC */
6103 	ring->pipe = 0; /* first pipe */
6104 	ring->queue = 1; /* second queue */
6105 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6106 
6107 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6108 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6109 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6110 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6111 			     2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6112 	if (r)
6113 		return r;
6114 
6115 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6116 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6117 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6118 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6119 			     2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6120 	if (r)
6121 		return r;
6122 
6123 	r = cik_cp_resume(rdev);
6124 	if (r)
6125 		return r;
6126 
6127 	r = cik_sdma_resume(rdev);
6128 	if (r)
6129 		return r;
6130 
6131 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6132 	if (ring->ring_size) {
6133 		r = radeon_ring_init(rdev, ring, ring->ring_size,
6134 				     R600_WB_UVD_RPTR_OFFSET,
6135 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6136 				     0, 0xfffff, RADEON_CP_PACKET2);
6137 		if (!r)
6138 			r = r600_uvd_init(rdev);
6139 		if (r)
6140 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6141 	}
6142 
6143 	r = radeon_ib_pool_init(rdev);
6144 	if (r) {
6145 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6146 		return r;
6147 	}
6148 
6149 	r = radeon_vm_manager_init(rdev);
6150 	if (r) {
6151 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6152 		return r;
6153 	}
6154 
6155 	return 0;
6156 }
6157 
6158 /**
6159  * cik_resume - resume the asic to a functional state
6160  *
6161  * @rdev: radeon_device pointer
6162  *
6163  * Programs the asic to a functional state (CIK).
6164  * Called at resume.
6165  * Returns 0 for success, error for failure.
6166  */
6167 int cik_resume(struct radeon_device *rdev)
6168 {
6169 	int r;
6170 
6171 	/* post card */
6172 	atom_asic_init(rdev->mode_info.atom_context);
6173 
6174 	/* init golden registers */
6175 	cik_init_golden_registers(rdev);
6176 
6177 	rdev->accel_working = true;
6178 	r = cik_startup(rdev);
6179 	if (r) {
6180 		DRM_ERROR("cik startup failed on resume\n");
6181 		rdev->accel_working = false;
6182 		return r;
6183 	}
6184 
6185 	return r;
6186 
6187 }
6188 
6189 /**
6190  * cik_suspend - suspend the asic
6191  *
6192  * @rdev: radeon_device pointer
6193  *
6194  * Bring the chip into a state suitable for suspend (CIK).
6195  * Called at suspend.
6196  * Returns 0 for success.
6197  */
6198 int cik_suspend(struct radeon_device *rdev)
6199 {
6200 	radeon_vm_manager_fini(rdev);
6201 	cik_cp_enable(rdev, false);
6202 	cik_sdma_enable(rdev, false);
6203 	r600_uvd_stop(rdev);
6204 	radeon_uvd_suspend(rdev);
6205 	cik_irq_suspend(rdev);
6206 	radeon_wb_disable(rdev);
6207 	cik_pcie_gart_disable(rdev);
6208 	return 0;
6209 }
6210 
6211 /* Plan is to move initialization in that function and use
6212  * helper function so that radeon_device_init pretty much
6213  * do nothing more than calling asic specific function. This
6214  * should also allow to remove a bunch of callback function
6215  * like vram_info.
6216  */
6217 /**
6218  * cik_init - asic specific driver and hw init
6219  *
6220  * @rdev: radeon_device pointer
6221  *
6222  * Setup asic specific driver variables and program the hw
6223  * to a functional state (CIK).
6224  * Called at driver startup.
6225  * Returns 0 for success, errors for failure.
6226  */
6227 int cik_init(struct radeon_device *rdev)
6228 {
6229 	struct radeon_ring *ring;
6230 	int r;
6231 
6232 	/* Read BIOS */
6233 	if (!radeon_get_bios(rdev)) {
6234 		if (ASIC_IS_AVIVO(rdev))
6235 			return -EINVAL;
6236 	}
6237 	/* Must be an ATOMBIOS */
6238 	if (!rdev->is_atom_bios) {
6239 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6240 		return -EINVAL;
6241 	}
6242 	r = radeon_atombios_init(rdev);
6243 	if (r)
6244 		return r;
6245 
6246 	/* Post card if necessary */
6247 	if (!radeon_card_posted(rdev)) {
6248 		if (!rdev->bios) {
6249 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6250 			return -EINVAL;
6251 		}
6252 		DRM_INFO("GPU not posted. posting now...\n");
6253 		atom_asic_init(rdev->mode_info.atom_context);
6254 	}
6255 	/* init golden registers */
6256 	cik_init_golden_registers(rdev);
6257 	/* Initialize scratch registers */
6258 	cik_scratch_init(rdev);
6259 	/* Initialize surface registers */
6260 	radeon_surface_init(rdev);
6261 	/* Initialize clocks */
6262 	radeon_get_clock_info(rdev->ddev);
6263 
6264 	/* Fence driver */
6265 	r = radeon_fence_driver_init(rdev);
6266 	if (r)
6267 		return r;
6268 
6269 	/* initialize memory controller */
6270 	r = cik_mc_init(rdev);
6271 	if (r)
6272 		return r;
6273 	/* Memory manager */
6274 	r = radeon_bo_init(rdev);
6275 	if (r)
6276 		return r;
6277 
6278 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6279 	ring->ring_obj = NULL;
6280 	r600_ring_init(rdev, ring, 1024 * 1024);
6281 
6282 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6283 	ring->ring_obj = NULL;
6284 	r600_ring_init(rdev, ring, 1024 * 1024);
6285 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6286 	if (r)
6287 		return r;
6288 
6289 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6290 	ring->ring_obj = NULL;
6291 	r600_ring_init(rdev, ring, 1024 * 1024);
6292 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6293 	if (r)
6294 		return r;
6295 
6296 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6297 	ring->ring_obj = NULL;
6298 	r600_ring_init(rdev, ring, 256 * 1024);
6299 
6300 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6301 	ring->ring_obj = NULL;
6302 	r600_ring_init(rdev, ring, 256 * 1024);
6303 
6304 	r = radeon_uvd_init(rdev);
6305 	if (!r) {
6306 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6307 		ring->ring_obj = NULL;
6308 		r600_ring_init(rdev, ring, 4096);
6309 	}
6310 
6311 	rdev->ih.ring_obj = NULL;
6312 	r600_ih_ring_init(rdev, 64 * 1024);
6313 
6314 	r = r600_pcie_gart_init(rdev);
6315 	if (r)
6316 		return r;
6317 
6318 	rdev->accel_working = true;
6319 	r = cik_startup(rdev);
6320 	if (r) {
6321 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6322 		cik_cp_fini(rdev);
6323 		cik_sdma_fini(rdev);
6324 		cik_irq_fini(rdev);
6325 		si_rlc_fini(rdev);
6326 		cik_mec_fini(rdev);
6327 		radeon_wb_fini(rdev);
6328 		radeon_ib_pool_fini(rdev);
6329 		radeon_vm_manager_fini(rdev);
6330 		radeon_irq_kms_fini(rdev);
6331 		cik_pcie_gart_fini(rdev);
6332 		rdev->accel_working = false;
6333 	}
6334 
6335 	/* Don't start up if the MC ucode is missing.
6336 	 * The default clocks and voltages before the MC ucode
6337 	 * is loaded are not suffient for advanced operations.
6338 	 */
6339 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6340 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6341 		return -EINVAL;
6342 	}
6343 
6344 	return 0;
6345 }
6346 
6347 /**
6348  * cik_fini - asic specific driver and hw fini
6349  *
6350  * @rdev: radeon_device pointer
6351  *
6352  * Tear down the asic specific driver variables and program the hw
6353  * to an idle state (CIK).
6354  * Called at driver unload.
6355  */
6356 void cik_fini(struct radeon_device *rdev)
6357 {
6358 	cik_cp_fini(rdev);
6359 	cik_sdma_fini(rdev);
6360 	cik_irq_fini(rdev);
6361 	si_rlc_fini(rdev);
6362 	cik_mec_fini(rdev);
6363 	radeon_wb_fini(rdev);
6364 	radeon_vm_manager_fini(rdev);
6365 	radeon_ib_pool_fini(rdev);
6366 	radeon_irq_kms_fini(rdev);
6367 	r600_uvd_stop(rdev);
6368 	radeon_uvd_fini(rdev);
6369 	cik_pcie_gart_fini(rdev);
6370 	r600_vram_scratch_fini(rdev);
6371 	radeon_gem_fini(rdev);
6372 	radeon_fence_driver_fini(rdev);
6373 	radeon_bo_fini(rdev);
6374 	radeon_atombios_fini(rdev);
6375 	kfree(rdev->bios);
6376 	rdev->bios = NULL;
6377 }
6378 
6379 /* display watermark setup */
6380 /**
6381  * dce8_line_buffer_adjust - Set up the line buffer
6382  *
6383  * @rdev: radeon_device pointer
6384  * @radeon_crtc: the selected display controller
6385  * @mode: the current display mode on the selected display
6386  * controller
6387  *
6388  * Setup up the line buffer allocation for
6389  * the selected display controller (CIK).
6390  * Returns the line buffer size in pixels.
6391  */
6392 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6393 				   struct radeon_crtc *radeon_crtc,
6394 				   struct drm_display_mode *mode)
6395 {
6396 	u32 tmp;
6397 
6398 	/*
6399 	 * Line Buffer Setup
6400 	 * There are 6 line buffers, one for each display controllers.
6401 	 * There are 3 partitions per LB. Select the number of partitions
6402 	 * to enable based on the display width.  For display widths larger
6403 	 * than 4096, you need use to use 2 display controllers and combine
6404 	 * them using the stereo blender.
6405 	 */
6406 	if (radeon_crtc->base.enabled && mode) {
6407 		if (mode->crtc_hdisplay < 1920)
6408 			tmp = 1;
6409 		else if (mode->crtc_hdisplay < 2560)
6410 			tmp = 2;
6411 		else if (mode->crtc_hdisplay < 4096)
6412 			tmp = 0;
6413 		else {
6414 			DRM_DEBUG_KMS("Mode too big for LB!\n");
6415 			tmp = 0;
6416 		}
6417 	} else
6418 		tmp = 1;
6419 
6420 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6421 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6422 
6423 	if (radeon_crtc->base.enabled && mode) {
6424 		switch (tmp) {
6425 		case 0:
6426 		default:
6427 			return 4096 * 2;
6428 		case 1:
6429 			return 1920 * 2;
6430 		case 2:
6431 			return 2560 * 2;
6432 		}
6433 	}
6434 
6435 	/* controller not enabled, so no lb used */
6436 	return 0;
6437 }
6438 
6439 /**
6440  * cik_get_number_of_dram_channels - get the number of dram channels
6441  *
6442  * @rdev: radeon_device pointer
6443  *
6444  * Look up the number of video ram channels (CIK).
6445  * Used for display watermark bandwidth calculations
6446  * Returns the number of dram channels
6447  */
6448 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6449 {
6450 	u32 tmp = RREG32(MC_SHARED_CHMAP);
6451 
6452 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6453 	case 0:
6454 	default:
6455 		return 1;
6456 	case 1:
6457 		return 2;
6458 	case 2:
6459 		return 4;
6460 	case 3:
6461 		return 8;
6462 	case 4:
6463 		return 3;
6464 	case 5:
6465 		return 6;
6466 	case 6:
6467 		return 10;
6468 	case 7:
6469 		return 12;
6470 	case 8:
6471 		return 16;
6472 	}
6473 }
6474 
6475 struct dce8_wm_params {
6476 	u32 dram_channels; /* number of dram channels */
6477 	u32 yclk;          /* bandwidth per dram data pin in kHz */
6478 	u32 sclk;          /* engine clock in kHz */
6479 	u32 disp_clk;      /* display clock in kHz */
6480 	u32 src_width;     /* viewport width */
6481 	u32 active_time;   /* active display time in ns */
6482 	u32 blank_time;    /* blank time in ns */
6483 	bool interlaced;    /* mode is interlaced */
6484 	fixed20_12 vsc;    /* vertical scale ratio */
6485 	u32 num_heads;     /* number of active crtcs */
6486 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6487 	u32 lb_size;       /* line buffer allocated to pipe */
6488 	u32 vtaps;         /* vertical scaler taps */
6489 };
6490 
6491 /**
6492  * dce8_dram_bandwidth - get the dram bandwidth
6493  *
6494  * @wm: watermark calculation data
6495  *
6496  * Calculate the raw dram bandwidth (CIK).
6497  * Used for display watermark bandwidth calculations
6498  * Returns the dram bandwidth in MBytes/s
6499  */
6500 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6501 {
6502 	/* Calculate raw DRAM Bandwidth */
6503 	fixed20_12 dram_efficiency; /* 0.7 */
6504 	fixed20_12 yclk, dram_channels, bandwidth;
6505 	fixed20_12 a;
6506 
6507 	a.full = dfixed_const(1000);
6508 	yclk.full = dfixed_const(wm->yclk);
6509 	yclk.full = dfixed_div(yclk, a);
6510 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
6511 	a.full = dfixed_const(10);
6512 	dram_efficiency.full = dfixed_const(7);
6513 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
6514 	bandwidth.full = dfixed_mul(dram_channels, yclk);
6515 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6516 
6517 	return dfixed_trunc(bandwidth);
6518 }
6519 
6520 /**
6521  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6522  *
6523  * @wm: watermark calculation data
6524  *
6525  * Calculate the dram bandwidth used for display (CIK).
6526  * Used for display watermark bandwidth calculations
6527  * Returns the dram bandwidth for display in MBytes/s
6528  */
6529 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6530 {
6531 	/* Calculate DRAM Bandwidth and the part allocated to display. */
6532 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6533 	fixed20_12 yclk, dram_channels, bandwidth;
6534 	fixed20_12 a;
6535 
6536 	a.full = dfixed_const(1000);
6537 	yclk.full = dfixed_const(wm->yclk);
6538 	yclk.full = dfixed_div(yclk, a);
6539 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
6540 	a.full = dfixed_const(10);
6541 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6542 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6543 	bandwidth.full = dfixed_mul(dram_channels, yclk);
6544 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6545 
6546 	return dfixed_trunc(bandwidth);
6547 }
6548 
6549 /**
6550  * dce8_data_return_bandwidth - get the data return bandwidth
6551  *
6552  * @wm: watermark calculation data
6553  *
6554  * Calculate the data return bandwidth used for display (CIK).
6555  * Used for display watermark bandwidth calculations
6556  * Returns the data return bandwidth in MBytes/s
6557  */
6558 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6559 {
6560 	/* Calculate the display Data return Bandwidth */
6561 	fixed20_12 return_efficiency; /* 0.8 */
6562 	fixed20_12 sclk, bandwidth;
6563 	fixed20_12 a;
6564 
6565 	a.full = dfixed_const(1000);
6566 	sclk.full = dfixed_const(wm->sclk);
6567 	sclk.full = dfixed_div(sclk, a);
6568 	a.full = dfixed_const(10);
6569 	return_efficiency.full = dfixed_const(8);
6570 	return_efficiency.full = dfixed_div(return_efficiency, a);
6571 	a.full = dfixed_const(32);
6572 	bandwidth.full = dfixed_mul(a, sclk);
6573 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6574 
6575 	return dfixed_trunc(bandwidth);
6576 }
6577 
6578 /**
6579  * dce8_dmif_request_bandwidth - get the dmif bandwidth
6580  *
6581  * @wm: watermark calculation data
6582  *
6583  * Calculate the dmif bandwidth used for display (CIK).
6584  * Used for display watermark bandwidth calculations
6585  * Returns the dmif bandwidth in MBytes/s
6586  */
6587 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6588 {
6589 	/* Calculate the DMIF Request Bandwidth */
6590 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6591 	fixed20_12 disp_clk, bandwidth;
6592 	fixed20_12 a, b;
6593 
6594 	a.full = dfixed_const(1000);
6595 	disp_clk.full = dfixed_const(wm->disp_clk);
6596 	disp_clk.full = dfixed_div(disp_clk, a);
6597 	a.full = dfixed_const(32);
6598 	b.full = dfixed_mul(a, disp_clk);
6599 
6600 	a.full = dfixed_const(10);
6601 	disp_clk_request_efficiency.full = dfixed_const(8);
6602 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6603 
6604 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6605 
6606 	return dfixed_trunc(bandwidth);
6607 }
6608 
6609 /**
6610  * dce8_available_bandwidth - get the min available bandwidth
6611  *
6612  * @wm: watermark calculation data
6613  *
6614  * Calculate the min available bandwidth used for display (CIK).
6615  * Used for display watermark bandwidth calculations
6616  * Returns the min available bandwidth in MBytes/s
6617  */
6618 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6619 {
6620 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6621 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6622 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6623 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6624 
6625 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6626 }
6627 
6628 /**
6629  * dce8_average_bandwidth - get the average available bandwidth
6630  *
6631  * @wm: watermark calculation data
6632  *
6633  * Calculate the average available bandwidth used for display (CIK).
6634  * Used for display watermark bandwidth calculations
6635  * Returns the average available bandwidth in MBytes/s
6636  */
6637 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6638 {
6639 	/* Calculate the display mode Average Bandwidth
6640 	 * DisplayMode should contain the source and destination dimensions,
6641 	 * timing, etc.
6642 	 */
6643 	fixed20_12 bpp;
6644 	fixed20_12 line_time;
6645 	fixed20_12 src_width;
6646 	fixed20_12 bandwidth;
6647 	fixed20_12 a;
6648 
6649 	a.full = dfixed_const(1000);
6650 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6651 	line_time.full = dfixed_div(line_time, a);
6652 	bpp.full = dfixed_const(wm->bytes_per_pixel);
6653 	src_width.full = dfixed_const(wm->src_width);
6654 	bandwidth.full = dfixed_mul(src_width, bpp);
6655 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6656 	bandwidth.full = dfixed_div(bandwidth, line_time);
6657 
6658 	return dfixed_trunc(bandwidth);
6659 }
6660 
6661 /**
6662  * dce8_latency_watermark - get the latency watermark
6663  *
6664  * @wm: watermark calculation data
6665  *
6666  * Calculate the latency watermark (CIK).
6667  * Used for display watermark bandwidth calculations
6668  * Returns the latency watermark in ns
6669  */
6670 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6671 {
6672 	/* First calculate the latency in ns */
6673 	u32 mc_latency = 2000; /* 2000 ns. */
6674 	u32 available_bandwidth = dce8_available_bandwidth(wm);
6675 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6676 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6677 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6678 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6679 		(wm->num_heads * cursor_line_pair_return_time);
6680 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6681 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6682 	u32 tmp, dmif_size = 12288;
6683 	fixed20_12 a, b, c;
6684 
6685 	if (wm->num_heads == 0)
6686 		return 0;
6687 
6688 	a.full = dfixed_const(2);
6689 	b.full = dfixed_const(1);
6690 	if ((wm->vsc.full > a.full) ||
6691 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6692 	    (wm->vtaps >= 5) ||
6693 	    ((wm->vsc.full >= a.full) && wm->interlaced))
6694 		max_src_lines_per_dst_line = 4;
6695 	else
6696 		max_src_lines_per_dst_line = 2;
6697 
6698 	a.full = dfixed_const(available_bandwidth);
6699 	b.full = dfixed_const(wm->num_heads);
6700 	a.full = dfixed_div(a, b);
6701 
6702 	b.full = dfixed_const(mc_latency + 512);
6703 	c.full = dfixed_const(wm->disp_clk);
6704 	b.full = dfixed_div(b, c);
6705 
6706 	c.full = dfixed_const(dmif_size);
6707 	b.full = dfixed_div(c, b);
6708 
6709 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6710 
6711 	b.full = dfixed_const(1000);
6712 	c.full = dfixed_const(wm->disp_clk);
6713 	b.full = dfixed_div(c, b);
6714 	c.full = dfixed_const(wm->bytes_per_pixel);
6715 	b.full = dfixed_mul(b, c);
6716 
6717 	lb_fill_bw = min(tmp, dfixed_trunc(b));
6718 
6719 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6720 	b.full = dfixed_const(1000);
6721 	c.full = dfixed_const(lb_fill_bw);
6722 	b.full = dfixed_div(c, b);
6723 	a.full = dfixed_div(a, b);
6724 	line_fill_time = dfixed_trunc(a);
6725 
6726 	if (line_fill_time < wm->active_time)
6727 		return latency;
6728 	else
6729 		return latency + (line_fill_time - wm->active_time);
6730 
6731 }
6732 
6733 /**
6734  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6735  * average and available dram bandwidth
6736  *
6737  * @wm: watermark calculation data
6738  *
6739  * Check if the display average bandwidth fits in the display
6740  * dram bandwidth (CIK).
6741  * Used for display watermark bandwidth calculations
6742  * Returns true if the display fits, false if not.
6743  */
6744 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6745 {
6746 	if (dce8_average_bandwidth(wm) <=
6747 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6748 		return true;
6749 	else
6750 		return false;
6751 }
6752 
6753 /**
6754  * dce8_average_bandwidth_vs_available_bandwidth - check
6755  * average and available bandwidth
6756  *
6757  * @wm: watermark calculation data
6758  *
6759  * Check if the display average bandwidth fits in the display
6760  * available bandwidth (CIK).
6761  * Used for display watermark bandwidth calculations
6762  * Returns true if the display fits, false if not.
6763  */
6764 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6765 {
6766 	if (dce8_average_bandwidth(wm) <=
6767 	    (dce8_available_bandwidth(wm) / wm->num_heads))
6768 		return true;
6769 	else
6770 		return false;
6771 }
6772 
6773 /**
6774  * dce8_check_latency_hiding - check latency hiding
6775  *
6776  * @wm: watermark calculation data
6777  *
6778  * Check latency hiding (CIK).
6779  * Used for display watermark bandwidth calculations
6780  * Returns true if the display fits, false if not.
6781  */
6782 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6783 {
6784 	u32 lb_partitions = wm->lb_size / wm->src_width;
6785 	u32 line_time = wm->active_time + wm->blank_time;
6786 	u32 latency_tolerant_lines;
6787 	u32 latency_hiding;
6788 	fixed20_12 a;
6789 
6790 	a.full = dfixed_const(1);
6791 	if (wm->vsc.full > a.full)
6792 		latency_tolerant_lines = 1;
6793 	else {
6794 		if (lb_partitions <= (wm->vtaps + 1))
6795 			latency_tolerant_lines = 1;
6796 		else
6797 			latency_tolerant_lines = 2;
6798 	}
6799 
6800 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6801 
6802 	if (dce8_latency_watermark(wm) <= latency_hiding)
6803 		return true;
6804 	else
6805 		return false;
6806 }
6807 
6808 /**
6809  * dce8_program_watermarks - program display watermarks
6810  *
6811  * @rdev: radeon_device pointer
6812  * @radeon_crtc: the selected display controller
6813  * @lb_size: line buffer size
6814  * @num_heads: number of display controllers in use
6815  *
6816  * Calculate and program the display watermarks for the
6817  * selected display controller (CIK).
6818  */
6819 static void dce8_program_watermarks(struct radeon_device *rdev,
6820 				    struct radeon_crtc *radeon_crtc,
6821 				    u32 lb_size, u32 num_heads)
6822 {
6823 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
6824 	struct dce8_wm_params wm;
6825 	u32 pixel_period;
6826 	u32 line_time = 0;
6827 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
6828 	u32 tmp, wm_mask;
6829 
6830 	if (radeon_crtc->base.enabled && num_heads && mode) {
6831 		pixel_period = 1000000 / (u32)mode->clock;
6832 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6833 
6834 		wm.yclk = rdev->pm.current_mclk * 10;
6835 		wm.sclk = rdev->pm.current_sclk * 10;
6836 		wm.disp_clk = mode->clock;
6837 		wm.src_width = mode->crtc_hdisplay;
6838 		wm.active_time = mode->crtc_hdisplay * pixel_period;
6839 		wm.blank_time = line_time - wm.active_time;
6840 		wm.interlaced = false;
6841 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6842 			wm.interlaced = true;
6843 		wm.vsc = radeon_crtc->vsc;
6844 		wm.vtaps = 1;
6845 		if (radeon_crtc->rmx_type != RMX_OFF)
6846 			wm.vtaps = 2;
6847 		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6848 		wm.lb_size = lb_size;
6849 		wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6850 		wm.num_heads = num_heads;
6851 
6852 		/* set for high clocks */
6853 		latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6854 		/* set for low clocks */
6855 		/* wm.yclk = low clk; wm.sclk = low clk */
6856 		latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6857 
6858 		/* possibly force display priority to high */
6859 		/* should really do this at mode validation time... */
6860 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6861 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6862 		    !dce8_check_latency_hiding(&wm) ||
6863 		    (rdev->disp_priority == 2)) {
6864 			DRM_DEBUG_KMS("force priority to high\n");
6865 		}
6866 	}
6867 
6868 	/* select wm A */
6869 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6870 	tmp = wm_mask;
6871 	tmp &= ~LATENCY_WATERMARK_MASK(3);
6872 	tmp |= LATENCY_WATERMARK_MASK(1);
6873 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6874 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6875 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6876 		LATENCY_HIGH_WATERMARK(line_time)));
6877 	/* select wm B */
6878 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6879 	tmp &= ~LATENCY_WATERMARK_MASK(3);
6880 	tmp |= LATENCY_WATERMARK_MASK(2);
6881 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6882 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6883 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6884 		LATENCY_HIGH_WATERMARK(line_time)));
6885 	/* restore original selection */
6886 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6887 }
6888 
6889 /**
6890  * dce8_bandwidth_update - program display watermarks
6891  *
6892  * @rdev: radeon_device pointer
6893  *
6894  * Calculate and program the display watermarks and line
6895  * buffer allocation (CIK).
6896  */
6897 void dce8_bandwidth_update(struct radeon_device *rdev)
6898 {
6899 	struct drm_display_mode *mode = NULL;
6900 	u32 num_heads = 0, lb_size;
6901 	int i;
6902 
6903 	radeon_update_display_priority(rdev);
6904 
6905 	for (i = 0; i < rdev->num_crtc; i++) {
6906 		if (rdev->mode_info.crtcs[i]->base.enabled)
6907 			num_heads++;
6908 	}
6909 	for (i = 0; i < rdev->num_crtc; i++) {
6910 		mode = &rdev->mode_info.crtcs[i]->base.mode;
6911 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6912 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6913 	}
6914 }
6915 
6916 /**
6917  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6918  *
6919  * @rdev: radeon_device pointer
6920  *
6921  * Fetches a GPU clock counter snapshot (SI).
6922  * Returns the 64 bit clock counter snapshot.
6923  */
6924 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6925 {
6926 	uint64_t clock;
6927 
6928 	spin_lock(&rdev->gpu_clock_mutex);
6929 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6930 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6931 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6932 	spin_unlock(&rdev->gpu_clock_mutex);
6933 	return clock;
6934 }
6935 
6936 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6937                               u32 cntl_reg, u32 status_reg)
6938 {
6939 	int r, i;
6940 	struct atom_clock_dividers dividers;
6941 	uint32_t tmp;
6942 
6943 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6944 					   clock, false, &dividers);
6945 	if (r)
6946 		return r;
6947 
6948 	tmp = RREG32_SMC(cntl_reg);
6949 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6950 	tmp |= dividers.post_divider;
6951 	WREG32_SMC(cntl_reg, tmp);
6952 
6953 	for (i = 0; i < 100; i++) {
6954 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
6955 			break;
6956 		mdelay(10);
6957 	}
6958 	if (i == 100)
6959 		return -ETIMEDOUT;
6960 
6961 	return 0;
6962 }
6963 
6964 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6965 {
6966 	int r = 0;
6967 
6968 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6969 	if (r)
6970 		return r;
6971 
6972 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6973 	return r;
6974 }
6975 
6976 int cik_uvd_resume(struct radeon_device *rdev)
6977 {
6978 	uint64_t addr;
6979 	uint32_t size;
6980 	int r;
6981 
6982 	r = radeon_uvd_resume(rdev);
6983 	if (r)
6984 		return r;
6985 
6986 	/* programm the VCPU memory controller bits 0-27 */
6987 	addr = rdev->uvd.gpu_addr >> 3;
6988 	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->datasize + 4) >> 3;
6989 	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6990 	WREG32(UVD_VCPU_CACHE_SIZE0, size);
6991 
6992 	addr += size;
6993 	size = RADEON_UVD_STACK_SIZE >> 3;
6994 	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6995 	WREG32(UVD_VCPU_CACHE_SIZE1, size);
6996 
6997 	addr += size;
6998 	size = RADEON_UVD_HEAP_SIZE >> 3;
6999 	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
7000 	WREG32(UVD_VCPU_CACHE_SIZE2, size);
7001 
7002 	/* bits 28-31 */
7003 	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
7004 	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
7005 
7006 	/* bits 32-39 */
7007 	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7008 	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
7009 
7010 	return 0;
7011 }
7012